Hello team Icepyx,
I am running into a few problems with reader.load()
.
For the sake of reproducibility: my download query is…
poly = [(-50.5, 64), (-50.5, 63.5), (-49, 63.5), (-49, 64), (-50.5, 64)]
region_a = ipx.Query(product = 'ATL06',
spatial_extent = poly,
date_range = ['2019-01-01','2020-01-01'],
start_time='00:00:00', end_time='23:59:59')
region_a.earthdata_login('<myusername>','<myemailaddress>')
region_a.order_vars.append(var_list= ["h_li","latitude","longitude"])
#print(region_a.order_vars.wanted)
region_a.download_granules('../icesat-2_data/')
Trying to load the data using…:
directory = '../icesat-2_data/'
pattern = "processed_ATL{product:2}_{datetime:%Y%m%d%H%M%S}_{rgt:4}{cycle:2}{orbitsegment:2}_{version:3}_{revision:2}.h5"
reader = ipx.Read(data_source = directory, product = "ATL06", filename_pattern = pattern)
reader.vars.append(var_list = ["h_li","latitude","longitude"])
# print(reader.vars.wanted)
reader.load()
this returns the following error trace:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\netCDF4_.py:182, in _nc4_require_group(ds, group, mode, create_group)
181 try:
--> 182 ds = ds.groups[key]
183 except KeyError as e:
File ~\Miniconda3\envs\icepyx\lib\site-packages\h5netcdf\utils.py:14, in Frozen.__getitem__(self, key)
13 def __getitem__(self, key):
---> 14 return self._mapping[key]
File ~\Miniconda3\envs\icepyx\lib\site-packages\h5netcdf\core.py:397, in _LazyObjectLookup.__getitem__(self, key)
396 key = "_nc4_non_coord_" + key
--> 397 if self._objects[key] is not None:
398 return self._objects[key]
KeyError: 'land_ice_segments'
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
Cell In [7], line 3
1 reader.vars.append(var_list = ["h_li","latitude","longitude"])
2 # reader.vars.wanted
----> 3 reader.load()
File ~\Miniconda3\envs\icepyx\lib\site-packages\icepyx\core\read.py:542, in Read.load(self)
535 # DevNote: I'd originally hoped to rely on intake-xarray in order to not have to iterate through the files myself,
536 # by providing a generalized url/source in building the catalog.
537 # However, this led to errors when I tried to combine two identical datasets because the single dimension was equal.
538 # In these situations, xarray recommends manually controlling the merge/concat process yourself.
539 # While unlikely to be a broad issue, I've heard of multiple matching timestamps causing issues for combining multiple IS2 datasets.
540 for file in self._filelist:
541 all_dss.append(
--> 542 self._build_single_file_dataset(file, groups_list)
543 ) # wanted_groups, vgrp.keys()))
545 if len(all_dss) == 1:
546 return all_dss[0]
File ~\Miniconda3\envs\icepyx\lib\site-packages\icepyx\core\read.py:682, in Read._build_single_file_dataset(self, file, groups_list)
680 grp_path = wanted_groups_list[0]
681 wanted_groups_list = wanted_groups_list[1:]
--> 682 ds = self._read_single_grp(file, grp_path)
683 is2ds, ds = Read._add_vars_to_ds(
684 is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict
685 )
687 # if there are any deeper nested variables, get those so they have actual coordinates and add them
File ~\Miniconda3\envs\icepyx\lib\site-packages\icepyx\core\read.py:602, in Read._read_single_grp(self, file, grp_path)
598 try:
599 grpcat = is2cat.build_catalog(
600 file, self._pattern, self._source_type, grp_paths=grp_path
601 )
--> 602 ds = grpcat[self._source_type].read()
604 # NOTE: could also do this with h5py, but then would have to read in each variable in the group separately
605 except ValueError:
File ~\Miniconda3\envs\icepyx\lib\site-packages\intake_xarray\base.py:39, in DataSourceMixin.read(self)
37 def read(self):
38 """Return a version of the xarray with all the data in memory"""
---> 39 self._load_metadata()
40 return self._ds.load()
File ~\Miniconda3\envs\icepyx\lib\site-packages\intake\source\base.py:285, in DataSourceBase._load_metadata(self)
283 """load metadata only if needed"""
284 if self._schema is None:
--> 285 self._schema = self._get_schema()
286 self.dtype = self._schema.dtype
287 self.shape = self._schema.shape
File ~\Miniconda3\envs\icepyx\lib\site-packages\intake_xarray\base.py:18, in DataSourceMixin._get_schema(self)
15 self.urlpath = self._get_cache(self.urlpath)[0]
17 if self._ds is None:
---> 18 self._open_dataset()
20 metadata = {
21 'dims': dict(self._ds.dims),
22 'data_vars': {k: list(self._ds[k].coords)
23 for k in self._ds.data_vars.keys()},
24 'coords': tuple(self._ds.coords.keys()),
25 }
26 if getattr(self, 'on_server', False):
File ~\Miniconda3\envs\icepyx\lib\site-packages\intake_xarray\netcdf.py:92, in NetCDFSource._open_dataset(self)
88 else:
89 # https://github.com/intake/filesystem_spec/issues/476#issuecomment-732372918
90 url = fsspec.open(self.urlpath, **self.storage_options).open()
---> 92 self._ds = _open_dataset(url, chunks=self.chunks, **kwargs)
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\api.py:531, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
519 decoders = _resolve_decoders_kwargs(
520 decode_cf,
521 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
527 decode_coords=decode_coords,
528 )
530 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 531 backend_ds = backend.open_dataset(
532 filename_or_obj,
533 drop_variables=drop_variables,
534 **decoders,
535 **kwargs,
536 )
537 ds = _dataset_from_backend_dataset(
538 backend_ds,
539 filename_or_obj,
(...)
547 **kwargs,
548 )
549 return ds
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\h5netcdf_.py:389, in H5netcdfBackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, format, group, lock, invalid_netcdf, phony_dims, decode_vlen_strings)
369 def open_dataset(
370 self,
371 filename_or_obj,
(...)
385 decode_vlen_strings=True,
386 ):
388 filename_or_obj = _normalize_path(filename_or_obj)
--> 389 store = H5NetCDFStore.open(
390 filename_or_obj,
391 format=format,
392 group=group,
393 lock=lock,
394 invalid_netcdf=invalid_netcdf,
395 phony_dims=phony_dims,
396 decode_vlen_strings=decode_vlen_strings,
397 )
399 store_entrypoint = StoreBackendEntrypoint()
401 ds = store_entrypoint.open_dataset(
402 store,
403 mask_and_scale=mask_and_scale,
(...)
409 decode_timedelta=decode_timedelta,
410 )
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\h5netcdf_.py:181, in H5NetCDFStore.open(cls, filename, mode, format, group, lock, autoclose, invalid_netcdf, phony_dims, decode_vlen_strings)
178 lock = combine_locks([HDF5_LOCK, get_write_lock(filename)])
180 manager = CachingFileManager(h5netcdf.File, filename, mode=mode, kwargs=kwargs)
--> 181 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\h5netcdf_.py:132, in H5NetCDFStore.__init__(self, manager, group, mode, lock, autoclose)
129 self.format = None
130 # todo: utilizing find_root_and_group seems a bit clunky
131 # making filename available on h5netcdf.Group seems better
--> 132 self._filename = find_root_and_group(self.ds)[0].filename
133 self.is_remote = is_remote_uri(self._filename)
134 self.lock = ensure_lock(lock)
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\h5netcdf_.py:192, in H5NetCDFStore.ds(self)
190 @property
191 def ds(self):
--> 192 return self._acquire()
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\h5netcdf_.py:185, in H5NetCDFStore._acquire(self, needs_lock)
183 def _acquire(self, needs_lock=True):
184 with self._manager.acquire_context(needs_lock) as root:
--> 185 ds = _nc4_require_group(
186 root, self._group, self._mode, create_group=_h5netcdf_create_group
187 )
188 return ds
File ~\Miniconda3\envs\icepyx\lib\site-packages\xarray\backends\netCDF4_.py:188, in _nc4_require_group(ds, group, mode, create_group)
185 ds = create_group(ds, key)
186 else:
187 # wrap error to provide slightly more helpful message
--> 188 raise OSError(f"group not found: {key}", e)
189 return ds
OSError: [Errno group not found: land_ice_segments] 'land_ice_segments'
I have attempted to remedy this using by using h5py.File(f)
, and the allkeys()
function here to iteratively open all the files in the directory, list their keys (and sub keys), and remove the files that do not have land_ice_segments
.
Having done that (removing 2 of the the 47 files originally downloaded)…the same error message persists.
The documentation warns of duplicate gran_idx
and I don’t think that is happening here
rgt_cyc = []
for f in files:
F = h5py.File(f,'r')
rgt_cyc.append((F['orbit_info/rgt'][()][0], F['orbit_info/cycle_number'][()][0]))
len(rgt_cyc) == len(list(set(rgt_cyc)))
>>> True
Whilst trying to identify unpick this…i found reader.load()
had no issues when working with these six files
['../icesat-2_data\\processed_ATL06_20190503212504_05440305_005_01.h5',
'../icesat-2_data\\processed_ATL06_20190507211645_06050305_005_01.h5',
'../icesat-2_data\\processed_ATL06_20190527075008_09020303_005_01.h5',
'../icesat-2_data\\processed_ATL06_20190531074148_09630303_005_01.h5',
'../icesat-2_data\\processed_ATL06_20190605195242_10470305_005_01.h5',
'../icesat-2_data\\processed_ATL06_20190609194422_11080305_005_01.h5']
adding in… another file (processed_ATL06_20190326104632_13440203_005_01.h5
) and the error message returns. And i think this stems from these files not having the same list of gt
’s.
I have tried selectively downloading (using order_vars.append(keyword_list = ['gt3l'])
and run into the same error message.
…I think my question is: is (a) what am I doing wrong? and (b) it possible to combine all of these files - without having to selectively throw away some tracks? (and ideally extend the initial download query to include the full time series).
Thank you in advance.