Hello team Icepyx,
I am running into a few problems with reader.load().
For the sake of reproducibility: my download query is…
poly = [(-50.5, 64), (-50.5, 63.5), (-49, 63.5), (-49, 64), (-50.5, 64)]
region_a = ipx.Query(product = 'ATL06',
spatial_extent = poly,
date_range = ['2019-01-01','2020-01-01'],
start_time='00:00:00', end_time='23:59:59')
region_a.earthdata_login('<myusername>','<myemailaddress>')
region_a.order_vars.append(var_list= ["h_li","latitude","longitude"])
#print(region_a.order_vars.wanted)
region_a.download_granules('../icesat-2_data/')
Trying to load the data using…:
directory = '../icesat-2_data/'
pattern = "processed_ATL{product:2}_{datetime:%Y%m%d%H%M%S}_{rgt:4}{cycle:2}{orbitsegment:2}_{version:3}_{revision:2}.h5"
reader = ipx.Read(data_source = directory, product = "ATL06", filename_pattern = pattern)
reader.vars.append(var_list = ["h_li","latitude","longitude"])
# print(reader.vars.wanted)
reader.load()
this returns the following error trace:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\netCDF4_.py:182, in _nc4_require_group(ds, group, mode, create_group)
181 try:
--> 182 ds = ds.groups[key]
183 except KeyError as e:
File ~\Miniconda3\envs\icepyx\lib\site-packages\h5netcdf\utils.py:14, in Frozen.__getitem__(self, key)
13 def __getitem__(self, key):
---> 14 return self._mapping[key]
File ~\Miniconda3\envs\icepyx\lib\site-packages\h5netcdf\core.py:397, in _LazyObjectLookup.__getitem__(self, key)
396 key = "_nc4_non_coord_" + key
--> 397 if self._objects[key] is not None:
398 return self._objects[key]
KeyError: 'land_ice_segments'
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
Cell In [7], line 3
1 reader.vars.append(var_list = ["h_li","latitude","longitude"])
2 # reader.vars.wanted
----> 3 reader.load()
File ~\Miniconda3\envs\icepyx\lib\site-packages\icepyx\core\read.py:542, in Read.load(self)
535 # DevNote: I'd originally hoped to rely on intake-xarray in order to not have to iterate through the files myself,
536 # by providing a generalized url/source in building the catalog.
537 # However, this led to errors when I tried to combine two identical datasets because the single dimension was equal.
538 # In these situations, xarray recommends manually controlling the merge/concat process yourself.
539 # While unlikely to be a broad issue, I've heard of multiple matching timestamps causing issues for combining multiple IS2 datasets.
540 for file in self._filelist:
541 all_dss.append(
--> 542 self._build_single_file_dataset(file, groups_list)
543 ) # wanted_groups, vgrp.keys()))
545 if len(all_dss) == 1:
546 return all_dss[0]
File ~\Miniconda3\envs\icepyx\lib\site-packages\icepyx\core\read.py:682, in Read._build_single_file_dataset(self, file, groups_list)
680 grp_path = wanted_groups_list[0]
681 wanted_groups_list = wanted_groups_list[1:]
--> 682 ds = self._read_single_grp(file, grp_path)
683 is2ds, ds = Read._add_vars_to_ds(
684 is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict
685 )
687 # if there are any deeper nested variables, get those so they have actual coordinates and add them
File ~\Miniconda3\envs\icepyx\lib\site-packages\icepyx\core\read.py:602, in Read._read_single_grp(self, file, grp_path)
598 try:
599 grpcat = is2cat.build_catalog(
600 file, self._pattern, self._source_type, grp_paths=grp_path
601 )
--> 602 ds = grpcat[self._source_type].read()
604 # NOTE: could also do this with h5py, but then would have to read in each variable in the group separately
605 except ValueError:
File ~\Miniconda3\envs\icepyx\lib\site-packages\intake_xarray\base.py:39, in DataSourceMixin.read(self)
37 def read(self):
38 """Return a version of the xarray with all the data in memory"""
---> 39 self._load_metadata()
40 return self._ds.load()
File ~\Miniconda3\envs\icepyx\lib\site-packages\intake\source\base.py:285, in DataSourceBase._load_metadata(self)
283 """load metadata only if needed"""
284 if self._schema is None:
--> 285 self._schema = self._get_schema()
286 self.dtype = self._schema.dtype
287 self.shape = self._schema.shape
File ~\Miniconda3\envs\icepyx\lib\site-packages\intake_xarray\base.py:18, in DataSourceMixin._get_schema(self)
15 self.urlpath = self._get_cache(self.urlpath)[0]
17 if self._ds is None:
---> 18 self._open_dataset()
20 metadata = {
21 'dims': dict(self._ds.dims),
22 'data_vars': {k: list(self._ds[k].coords)
23 for k in self._ds.data_vars.keys()},
24 'coords': tuple(self._ds.coords.keys()),
25 }
26 if getattr(self, 'on_server', False):
File ~\Miniconda3\envs\icepyx\lib\site-packages\intake_xarray\netcdf.py:92, in NetCDFSource._open_dataset(self)
88 else:
89 # https://github.com/intake/filesystem_spec/issues/476#issuecomment-732372918
90 url = fsspec.open(self.urlpath, **self.storage_options).open()
---> 92 self._ds = _open_dataset(url, chunks=self.chunks, **kwargs)
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\api.py:531, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
519 decoders = _resolve_decoders_kwargs(
520 decode_cf,
521 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
527 decode_coords=decode_coords,
528 )
530 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 531 backend_ds = backend.open_dataset(
532 filename_or_obj,
533 drop_variables=drop_variables,
534 **decoders,
535 **kwargs,
536 )
537 ds = _dataset_from_backend_dataset(
538 backend_ds,
539 filename_or_obj,
(...)
547 **kwargs,
548 )
549 return ds
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\h5netcdf_.py:389, in H5netcdfBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, format, group, lock, invalid_netcdf, phony_dims, decode_vlen_strings)
369 def open_dataset(
370 self,
371 filename_or_obj,
(...)
385 decode_vlen_strings=True,
386 ):
388 filename_or_obj = _normalize_path(filename_or_obj)
--> 389 store = H5NetCDFStore.open(
390 filename_or_obj,
391 format=format,
392 group=group,
393 lock=lock,
394 invalid_netcdf=invalid_netcdf,
395 phony_dims=phony_dims,
396 decode_vlen_strings=decode_vlen_strings,
397 )
399 store_entrypoint = StoreBackendEntrypoint()
401 ds = store_entrypoint.open_dataset(
402 store,
403 mask_and_scale=mask_and_scale,
(...)
409 decode_timedelta=decode_timedelta,
410 )
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\h5netcdf_.py:181, in H5NetCDFStore.open(cls, filename, mode, format, group, lock, autoclose, invalid_netcdf, phony_dims, decode_vlen_strings)
178 lock = combine_locks([HDF5_LOCK, get_write_lock(filename)])
180 manager = CachingFileManager(h5netcdf.File, filename, mode=mode, kwargs=kwargs)
--> 181 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\h5netcdf_.py:132, in H5NetCDFStore.__init__(self, manager, group, mode, lock, autoclose)
129 self.format = None
130 # todo: utilizing find_root_and_group seems a bit clunky
131 # making filename available on h5netcdf.Group seems better
--> 132 self._filename = find_root_and_group(self.ds)[0].filename
133 self.is_remote = is_remote_uri(self._filename)
134 self.lock = ensure_lock(lock)
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\h5netcdf_.py:192, in H5NetCDFStore.ds(self)
190 @property
191 def ds(self):
--> 192 return self._acquire()
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\h5netcdf_.py:185, in H5NetCDFStore._acquire(self, needs_lock)
183 def _acquire(self, needs_lock=True):
184 with self._manager.acquire_context(needs_lock) as root:
--> 185 ds = _nc4_require_group(
186 root, self._group, self._mode, create_group=_h5netcdf_create_group
187 )
188 return ds
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\netCDF4_.py:188, in _nc4_require_group(ds, group, mode, create_group)
185 ds = create_group(ds, key)
186 else:
187 # wrap error to provide slightly more helpful message
--> 188 raise OSError(f"group not found: {key}", e)
189 return ds
OSError: [Errno group not found: land_ice_segments] 'land_ice_segments'
I have attempted to remedy this using by using h5py.File(f), and the allkeys() function here to iteratively open all the files in the directory, list their keys (and sub keys), and remove the files that do not have land_ice_segments.
Having done that (removing 2 of the the 47 files originally downloaded)…the same error message persists.
The documentation warns of duplicate gran_idx and I don’t think that is happening here
rgt_cyc = []
for f in files:
F = h5py.File(f,'r')
rgt_cyc.append((F['orbit_info/rgt'][()][0], F['orbit_info/cycle_number'][()][0]))
len(rgt_cyc) == len(list(set(rgt_cyc)))
>>> True
Whilst trying to identify unpick this…i found reader.load() had no issues when working with these six files
['../icesat-2_data\\processed_ATL06_20190503212504_05440305_005_01.h5',
'../icesat-2_data\\processed_ATL06_20190507211645_06050305_005_01.h5',
'../icesat-2_data\\processed_ATL06_20190527075008_09020303_005_01.h5',
'../icesat-2_data\\processed_ATL06_20190531074148_09630303_005_01.h5',
'../icesat-2_data\\processed_ATL06_20190605195242_10470305_005_01.h5',
'../icesat-2_data\\processed_ATL06_20190609194422_11080305_005_01.h5']
adding in… another file (processed_ATL06_20190326104632_13440203_005_01.h5) and the error message returns. And i think this stems from these files not having the same list of gt’s.
I have tried selectively downloading (using order_vars.append(keyword_list = ['gt3l']) and run into the same error message.
…I think my question is: is (a) what am I doing wrong? and (b) it possible to combine all of these files - without having to selectively throw away some tracks? (and ideally extend the initial download query to include the full time series).
Thank you in advance.