Hi @rabernat , thank you for the answer.
I’ve been testing virtualizarr
and ran into an issue when trying to concatenate multiple .nc
files virtually without loading the actual data. Here’s a brief outline of what I did:
import warnings
import fsspec
from virtualizarr.parsers import HDFParser
from virtualizarr import open_virtual_mfdataset
from obstore.store import HTTPStore
warnings.filterwarnings("ignore", category=UserWarning)
fs = fsspec.filesystem('http')
nc_files = sorted(fs.glob("https://sense4fire.eu/database/v02/S4F.CCILC_FireCCI51/Europe-reg_333m/bm_herb/*.nc"))
store = HTTPStore("https://sense4fire.eu")
vds = open_virtual_mfdataset(
nc_files[:30], # trying with a subset before scaling to ~1091 files
parser=HDFParser(),
object_store=store,
combine_attrs="override",
data_vars="minimal",
coords="minimal"
)
However, I got the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[14], line 1
----> 1 vds = open_virtual_mfdataset(
2 nc_files[:30],
3 parser=HDFParser(),
4 object_store=store,
5 combine_attrs="override",
6 data_vars="minimal",
7 coords="minimal"
8 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/virtualizarr/xarray.py:227, in open_virtual_mfdataset(paths, object_store, parser, concat_dim, compat, preprocess, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)
214 combined_vds = _nested_combine(
215 virtual_datasets,
216 concat_dims=concat_dim,
(...)
222 combine_attrs=combine_attrs,
223 )
224 elif combine == "by_coords":
225 # Redo ordering from coordinates, ignoring how they were ordered
226 # previously
--> 227 combined_vds = combine_by_coords(
228 virtual_datasets,
229 compat=compat,
230 data_vars=data_vars,
231 coords=coords,
232 join=join,
233 combine_attrs=combine_attrs,
234 )
235 else:
236 raise ValueError(
237 f"{combine} is an invalid option for the keyword argument ``combine``"
238 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:983, in combine_by_coords(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs)
979 grouped_by_vars = groupby_defaultdict(data_objects, key=vars_as_keys)
981 # Perform the multidimensional combine on each group of data variables
982 # before merging back together
--> 983 concatenated_grouped_by_data_vars = tuple(
984 _combine_single_variable_hypercube(
985 tuple(datasets_with_same_vars),
986 fill_value=fill_value,
987 data_vars=data_vars,
988 coords=coords,
989 compat=compat,
990 join=join,
991 combine_attrs=combine_attrs,
992 )
993 for vars, datasets_with_same_vars in grouped_by_vars
994 )
996 return merge(
997 concatenated_grouped_by_data_vars,
998 compat=compat,
(...)
1001 combine_attrs=combine_attrs,
1002 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:984, in <genexpr>(.0)
979 grouped_by_vars = groupby_defaultdict(data_objects, key=vars_as_keys)
981 # Perform the multidimensional combine on each group of data variables
982 # before merging back together
983 concatenated_grouped_by_data_vars = tuple(
--> 984 _combine_single_variable_hypercube(
985 tuple(datasets_with_same_vars),
986 fill_value=fill_value,
987 data_vars=data_vars,
988 coords=coords,
989 compat=compat,
990 join=join,
991 combine_attrs=combine_attrs,
992 )
993 for vars, datasets_with_same_vars in grouped_by_vars
994 )
996 return merge(
997 concatenated_grouped_by_data_vars,
998 compat=compat,
(...)
1001 combine_attrs=combine_attrs,
1002 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:656, in _combine_single_variable_hypercube(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)
653 _check_dimension_depth_tile_ids(combined_ids)
655 # Concatenate along all of concat_dims one by one to create single ds
--> 656 concatenated = _combine_nd(
657 combined_ids,
658 concat_dims=concat_dims,
659 data_vars=data_vars,
660 coords=coords,
661 compat=compat,
662 fill_value=fill_value,
663 join=join,
664 combine_attrs=combine_attrs,
665 )
667 # Check the overall coordinates are monotonically increasing
668 for dim in concat_dims:
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:246, in _combine_nd(combined_ids, concat_dims, data_vars, coords, compat, fill_value, join, combine_attrs)
242 # Each iteration of this loop reduces the length of the tile_ids tuples
243 # by one. It always combines along the first dimension, removing the first
244 # element of the tuple
245 for concat_dim in concat_dims:
--> 246 combined_ids = _combine_all_along_first_dim(
247 combined_ids,
248 dim=concat_dim,
249 data_vars=data_vars,
250 coords=coords,
251 compat=compat,
252 fill_value=fill_value,
253 join=join,
254 combine_attrs=combine_attrs,
255 )
256 (combined_ds,) = combined_ids.values()
257 return combined_ds
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:278, in _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat, fill_value, join, combine_attrs)
276 combined_ids = dict(sorted(group))
277 datasets = combined_ids.values()
--> 278 new_combined_ids[new_id] = _combine_1d(
279 datasets, dim, compat, data_vars, coords, fill_value, join, combine_attrs
280 )
281 return new_combined_ids
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:301, in _combine_1d(datasets, concat_dim, compat, data_vars, coords, fill_value, join, combine_attrs)
299 if concat_dim is not None:
300 try:
--> 301 combined = concat(
302 datasets,
303 dim=concat_dim,
304 data_vars=data_vars,
305 coords=coords,
306 compat=compat,
307 fill_value=fill_value,
308 join=join,
309 combine_attrs=combine_attrs,
310 )
311 except ValueError as err:
312 if "encountered unexpected variable" in str(err):
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/concat.py:277, in concat(objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs, create_index_for_new_dim)
264 return _dataarray_concat(
265 objs,
266 dim=dim,
(...)
274 create_index_for_new_dim=create_index_for_new_dim,
275 )
276 elif isinstance(first_obj, Dataset):
--> 277 return _dataset_concat(
278 objs,
279 dim=dim,
280 data_vars=data_vars,
281 coords=coords,
282 compat=compat,
283 positions=positions,
284 fill_value=fill_value,
285 join=join,
286 combine_attrs=combine_attrs,
287 create_index_for_new_dim=create_index_for_new_dim,
288 )
289 else:
290 raise TypeError(
291 "can only concatenate xarray Dataset and DataArray "
292 f"objects, got {type(first_obj)}"
293 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/concat.py:561, in _dataset_concat(datasets, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs, create_index_for_new_dim)
555 if variables_to_merge:
556 grouped = {
557 k: v
558 for k, v in collect_variables_and_indexes(datasets).items()
559 if k in variables_to_merge
560 }
--> 561 merged_vars, merged_indexes = merge_collected(
562 grouped, compat=compat, equals=equals
563 )
564 result_vars.update(merged_vars)
565 result_indexes.update(merged_indexes)
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/merge.py:295, in merge_collected(grouped, prioritized, compat, combine_attrs, equals)
293 variables = [variable for variable, _ in elements_list]
294 try:
--> 295 merged_vars[name] = unique_variable(
296 name, variables, compat, equals.get(name, None)
297 )
298 except MergeError:
299 if compat != "minimal":
300 # we need more than "minimal" compatibility (for which
301 # we drop conflicting coordinates)
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/merge.py:142, in unique_variable(name, variables, compat, equals)
138 break
140 if equals is None:
141 # now compare values with minimum number of computes
--> 142 out = out.compute()
143 for var in variables[1:]:
144 equals = getattr(out, compat)(var)
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/core/variable.py:978, in Variable.compute(self, **kwargs)
960 """Manually trigger loading of this variable's data from disk or a
961 remote source into memory and return a new variable. The original is
962 left unaltered.
(...)
975 dask.array.compute
976 """
977 new = self.copy(deep=False)
--> 978 return new.load(**kwargs)
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/core/variable.py:956, in Variable.load(self, **kwargs)
939 def load(self, **kwargs):
940 """Manually trigger loading of this variable's data from disk or a
941 remote source into memory and return this variable.
942
(...)
954 dask.array.compute
955 """
--> 956 self._data = to_duck_array(self._data, **kwargs)
957 return self
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/namedarray/pycompat.py:138, in to_duck_array(data, **kwargs)
135 from xarray.namedarray.parallelcompat import get_chunked_array_type
137 if is_chunked_array(data):
--> 138 chunkmanager = get_chunked_array_type(data)
139 loaded_data, *_ = chunkmanager.compute(data, **kwargs) # type: ignore[var-annotated]
140 return loaded_data
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/namedarray/parallelcompat.py:177, in get_chunked_array_type(*args)
171 selected = [
172 chunkmanager
173 for chunkmanager in chunkmanagers.values()
174 if chunkmanager.is_chunked_array(chunked_arr)
175 ]
176 if not selected:
--> 177 raise TypeError(
178 f"Could not find a Chunk Manager which recognises type {type(chunked_arr)}"
179 )
180 elif len(selected) >= 2:
181 raise TypeError(f"Multiple ChunkManagers recognise type {type(chunked_arr)}")
TypeError: Could not find a Chunk Manager which recognises type <class 'virtualizarr.manifests.array.ManifestArray'>
It seems like xarray is trying to compute or load variables when comparing them (possibly during combine_by_coords
), and since ManifestArray isn’t a recognized chunked array type, it fails.
I have even tried so:
vds = open_virtual_mfdataset(
nc_files[:30],
parser=HDFParser(),
object_store=store,
combine_attrs="override",
)
But I got this error :
ValueError Traceback (most recent call last)
Cell In[15], line 1
----> 1 vds = open_virtual_mfdataset(
2 nc_files[:30],
3 parser=HDFParser(),
4 object_store=store,
5 combine_attrs="override",
6
7 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/virtualizarr/xarray.py:227, in open_virtual_mfdataset(paths, object_store, parser, concat_dim, compat, preprocess, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)
214 combined_vds = _nested_combine(
215 virtual_datasets,
216 concat_dims=concat_dim,
(...)
222 combine_attrs=combine_attrs,
223 )
224 elif combine == "by_coords":
225 # Redo ordering from coordinates, ignoring how they were ordered
226 # previously
--> 227 combined_vds = combine_by_coords(
228 virtual_datasets,
229 compat=compat,
230 data_vars=data_vars,
231 coords=coords,
232 join=join,
233 combine_attrs=combine_attrs,
234 )
235 else:
236 raise ValueError(
237 f"{combine} is an invalid option for the keyword argument ``combine``"
238 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:983, in combine_by_coords(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs)
979 grouped_by_vars = groupby_defaultdict(data_objects, key=vars_as_keys)
981 # Perform the multidimensional combine on each group of data variables
982 # before merging back together
--> 983 concatenated_grouped_by_data_vars = tuple(
984 _combine_single_variable_hypercube(
985 tuple(datasets_with_same_vars),
986 fill_value=fill_value,
987 data_vars=data_vars,
988 coords=coords,
989 compat=compat,
990 join=join,
991 combine_attrs=combine_attrs,
992 )
993 for vars, datasets_with_same_vars in grouped_by_vars
994 )
996 return merge(
997 concatenated_grouped_by_data_vars,
998 compat=compat,
(...)
1001 combine_attrs=combine_attrs,
1002 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:984, in <genexpr>(.0)
979 grouped_by_vars = groupby_defaultdict(data_objects, key=vars_as_keys)
981 # Perform the multidimensional combine on each group of data variables
982 # before merging back together
983 concatenated_grouped_by_data_vars = tuple(
--> 984 _combine_single_variable_hypercube(
985 tuple(datasets_with_same_vars),
986 fill_value=fill_value,
987 data_vars=data_vars,
988 coords=coords,
989 compat=compat,
990 join=join,
991 combine_attrs=combine_attrs,
992 )
993 for vars, datasets_with_same_vars in grouped_by_vars
994 )
996 return merge(
997 concatenated_grouped_by_data_vars,
998 compat=compat,
(...)
1001 combine_attrs=combine_attrs,
1002 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:656, in _combine_single_variable_hypercube(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)
653 _check_dimension_depth_tile_ids(combined_ids)
655 # Concatenate along all of concat_dims one by one to create single ds
--> 656 concatenated = _combine_nd(
657 combined_ids,
658 concat_dims=concat_dims,
659 data_vars=data_vars,
660 coords=coords,
661 compat=compat,
662 fill_value=fill_value,
663 join=join,
664 combine_attrs=combine_attrs,
665 )
667 # Check the overall coordinates are monotonically increasing
668 for dim in concat_dims:
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:246, in _combine_nd(combined_ids, concat_dims, data_vars, coords, compat, fill_value, join, combine_attrs)
242 # Each iteration of this loop reduces the length of the tile_ids tuples
243 # by one. It always combines along the first dimension, removing the first
244 # element of the tuple
245 for concat_dim in concat_dims:
--> 246 combined_ids = _combine_all_along_first_dim(
247 combined_ids,
248 dim=concat_dim,
249 data_vars=data_vars,
250 coords=coords,
251 compat=compat,
252 fill_value=fill_value,
253 join=join,
254 combine_attrs=combine_attrs,
255 )
256 (combined_ds,) = combined_ids.values()
257 return combined_ds
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:278, in _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat, fill_value, join, combine_attrs)
276 combined_ids = dict(sorted(group))
277 datasets = combined_ids.values()
--> 278 new_combined_ids[new_id] = _combine_1d(
279 datasets, dim, compat, data_vars, coords, fill_value, join, combine_attrs
280 )
281 return new_combined_ids
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/combine.py:301, in _combine_1d(datasets, concat_dim, compat, data_vars, coords, fill_value, join, combine_attrs)
299 if concat_dim is not None:
300 try:
--> 301 combined = concat(
302 datasets,
303 dim=concat_dim,
304 data_vars=data_vars,
305 coords=coords,
306 compat=compat,
307 fill_value=fill_value,
308 join=join,
309 combine_attrs=combine_attrs,
310 )
311 except ValueError as err:
312 if "encountered unexpected variable" in str(err):
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/concat.py:277, in concat(objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs, create_index_for_new_dim)
264 return _dataarray_concat(
265 objs,
266 dim=dim,
(...)
274 create_index_for_new_dim=create_index_for_new_dim,
275 )
276 elif isinstance(first_obj, Dataset):
--> 277 return _dataset_concat(
278 objs,
279 dim=dim,
280 data_vars=data_vars,
281 coords=coords,
282 compat=compat,
283 positions=positions,
284 fill_value=fill_value,
285 join=join,
286 combine_attrs=combine_attrs,
287 create_index_for_new_dim=create_index_for_new_dim,
288 )
289 else:
290 raise TypeError(
291 "can only concatenate xarray Dataset and DataArray "
292 f"objects, got {type(first_obj)}"
293 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/concat.py:516, in _dataset_concat(datasets, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs, create_index_for_new_dim)
513 # Make sure we're working on a copy (we'll be loading variables)
514 datasets = [ds.copy() for ds in datasets]
515 datasets = list(
--> 516 align(
517 *datasets, join=join, copy=False, exclude=[dim_name], fill_value=fill_value
518 )
519 )
521 dim_coords, dims_sizes, coord_names, data_names, vars_order = _parse_datasets(
522 datasets
523 )
524 dim_names = set(dim_coords)
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/alignment.py:867, in align(join, copy, indexes, exclude, fill_value, *objects)
671 """
672 Given any number of Dataset and/or DataArray objects, returns new
673 objects with aligned indexes and dimension sizes.
(...)
857
858 """
859 aligner = Aligner(
860 objects,
861 join=join,
(...)
865 fill_value=fill_value,
866 )
--> 867 aligner.align()
868 return aligner.results
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/alignment.py:567, in Aligner.align(self)
565 self.results = self.objects
566 else:
--> 567 self.reindex_all()
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/alignment.py:543, in Aligner.reindex_all(self)
542 def reindex_all(self) -> None:
--> 543 self.results = tuple(
544 self._reindex_one(obj, matching_indexes)
545 for obj, matching_indexes in zip(
546 self.objects, self.objects_matching_indexes, strict=True
547 )
548 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/alignment.py:544, in <genexpr>(.0)
542 def reindex_all(self) -> None:
543 self.results = tuple(
--> 544 self._reindex_one(obj, matching_indexes)
545 for obj, matching_indexes in zip(
546 self.objects, self.objects_matching_indexes, strict=True
547 )
548 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/alignment.py:532, in Aligner._reindex_one(self, obj, matching_indexes)
529 new_indexes, new_variables = self._get_indexes_and_vars(obj, matching_indexes)
530 dim_pos_indexers = self._get_dim_pos_indexers(matching_indexes)
--> 532 return obj._reindex_callback(
533 self,
534 dim_pos_indexers,
535 new_variables,
536 new_indexes,
537 self.fill_value,
538 self.exclude_dims,
539 self.exclude_vars,
540 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/core/dataset.py:3277, in Dataset._reindex_callback(self, aligner, dim_pos_indexers, variables, indexes, fill_value, exclude_dims, exclude_vars)
3271 else:
3272 to_reindex = {
3273 k: v
3274 for k, v in self.variables.items()
3275 if k not in variables and k not in exclude_vars
3276 }
-> 3277 reindexed_vars = alignment.reindex_variables(
3278 to_reindex,
3279 dim_pos_indexers,
3280 copy=aligner.copy,
3281 fill_value=fill_value,
3282 sparse=aligner.sparse,
3283 )
3284 new_variables.update(reindexed_vars)
3285 new_coord_names = self._coord_names | set(new_indexes)
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/structure/alignment.py:83, in reindex_variables(variables, dim_pos_indexers, copy, fill_value, sparse)
80 needs_masking = any(d in masked_dims for d in var.dims)
82 if needs_masking:
---> 83 new_var = var._getitem_with_mask(indxr, fill_value=fill_value_)
84 elif all(is_full_slice(k) for k in indxr):
85 # no reindexing necessary
86 # here we need to manually deal with copying data, since
87 # we neither created a new ndarray nor used fancy indexing
88 new_var = var.copy(deep=copy)
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/core/variable.py:812, in Variable._getitem_with_mask(self, key, fill_value)
809 actual_indexer = indexer
811 indexable = as_indexable(self._data)
--> 812 data = indexing.apply_indexer(indexable, actual_indexer)
814 mask = indexing.create_mask(indexer, self.shape, data)
815 # we need to invert the mask in order to pass data first. This helps
816 # pint to choose the correct unit
817 # TODO: revert after https://github.com/hgrecco/pint/issues/1019 is fixed
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/core/indexing.py:1028, in apply_indexer(indexable, indexer)
1026 return indexable.vindex[indexer]
1027 elif isinstance(indexer, OuterIndexer):
-> 1028 return indexable.oindex[indexer]
1029 else:
1030 return indexable[indexer]
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/core/indexing.py:372, in IndexCallable.__getitem__(self, key)
371 def __getitem__(self, key: Any) -> Any:
--> 372 return self.getter(key)
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/xarray/core/indexing.py:1541, in NumpyIndexingAdapter._oindex_get(self, indexer)
1539 def _oindex_get(self, indexer: OuterIndexer):
1540 key = _outer_to_numpy_indexer(indexer, self.array.shape)
-> 1541 return self.array[key]
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/virtualizarr/manifests/array.py:261, in ManifestArray.__getitem__(self, key)
256 raise TypeError(
257 f"indexer must be of type int, slice, ellipsis, None, or np.ndarray; or a tuple of such types. Got {key}"
258 )
260 # check value is valid
--> 261 indexer = _possibly_expand_trailing_ellipsis(indexer, self.ndim)
262 if len(indexer) != self.ndim:
263 raise ValueError(
264 f"Invalid indexer for array. Indexer length must be less than or equal to the number of dimensions in the array, "
265 f"but indexer={indexer} has length {len(indexer)} and array has {self.ndim} dimensions."
266 f"\nIf concatenating using xarray, ensure all non-coordinate data variables to be concatenated include the concatenation dimension, "
267 f"or consider passing `data_vars='minimal'` and `coords='minimal'` to the xarray combining function."
268 )
File ~/Desktop/masterarbeit-pouria-rezai/.venv/lib/python3.12/site-packages/virtualizarr/manifests/array.py:364, in _possibly_expand_trailing_ellipsis(indexer, ndim)
354 """
355 Allows for passing indexers <= the shape of the array, so long as they end with an ellipsis.
356
(...)
361 where marr.ndim => 3.
362 """
363 final_dim_indexer = indexer[-1]
--> 364 if final_dim_indexer == ...:
365 if len(indexer) > ndim:
366 raise ValueError(
367 f"Invalid indexer for array. Indexer length must be less than or equal to the number of dimensions in the array, "
368 f"but indexer={indexer} has length {len(indexer)} and array has {ndim} dimensions."
369 f"\nIf concatenating using xarray, ensure all non-coordinate data variables to be concatenated include the concatenation dimension, "
370 f"or consider passing `data_vars='minimal'` and `coords='minimal'` to the xarray combining function."
371 )
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
My Understanding:
Virtualizarr is meant to allow lazy access without loading data. But xarray.combine_by_coords
tries to compare variables internally using .compute()
, which causes ManifestArray
to break because it’s not integrated with any known chunk manager
Question:
Is it possible to concatenate these files fully lazily, i.e., without triggering any .compute()
or .load()
on the underlying arrays?
Let me know if I’m missing something or if you have an idea how to do it right i would be happy to hear it