Hi there,
I’m trying to calculate marine heatwaves with the xmhw package, where the data is larger than memory, by adopting a proposed fix for large spatial regions in xmhw dask. I try to update the code to store chunks in between calculations, instead of combining them in memory.
However, after storing to disk, while attempting to load the files with xr.open_mfdataset
I get an error claiming that the Resulting object does not have monotonic global indexes along dimension events
. When inspecting the events
coordinate, I see that it is monotonically increasing, but not with constant steps.
I have more files, some of which can be combined and some that don’t. The only difference I can spot is that the events coordinate does start at different values (in the two examples showed below its 1 and 2).
But I struggle to see how that could be the issue, since for longitude, that is exactly why I combine by coordinate, that I have several chunks of longitudes which I want to combine.
If you have, in addition, an idea how to store the data directly into a combined file (like a Zarr store, or chunked netcdf) that would be much appreciated too!
Thanks already and best regards
Ezra
data
first dataset:
second dataset:
code
- create and store files
# section of the code which creates the data files to be combined later
_, lat_chunks, lon_chunks = ts.chunk(
{
"time": -1,
"lat": settings.data.results.block.lat_chunksize,
"lon": settings.data.results.block.lon_chunksize,
}
).chunks
steps = {"lat": lat_chunks[0], "lon": lon_chunks[0]}
blocks = {"lat": len(lat_chunks), "lon": len(lon_chunks)}
tempdir = Path(settings.data.results.block.tempdir)
tempdir.mkdir(parents=True, exist_ok=True)
for i in range(blocks["lon"]):
lon_from = none_bound(i * steps["lon"], len(ts.lon))
lon_to = none_bound((i + 1) * steps["lon"], len(ts.lon))
for j in range(blocks["lat"]):
lat_from = none_bound(j * steps["lat"], len(ts.lat))
lat_to = none_bound((j + 1) * steps["lat"], len(ts.lat))
slices = {"lon": slice(lon_from, lon_to), "lat": slice(lat_from, lat_to)}
mhw, mhw_inter = detect(
ts.isel(**slices), th.isel(**slices), se.isel(**slices), **detect_kws
)
[
ds.to_netcdf(settings.path_for(str(tempdir / name)), engine="h5netcdf")
for ds, name in zip(
[mhw, mhw_inter],
[
f"marine_heatwaves_chunk{i}{j}.nc",
f"marine_heatwaves_inter_chunk{i}{j}.nc",
],
)
]
del mhw, mhw_inter
- load files
import os
from pathlib import Path
import xarray as xr
tempdir = Path(settings.path_for(settings.data.results.block.tempdir))
files = ["marine_heatwaves_chunk50.nc", "marine_heatwaves_chunk60.nc"]
mhw_filepaths = [tempdir / file for file in os.listdir(tempdir) if file in files]
ds = xr.open_mfdataset(mhw_filepaths, engine="h5netcdf")
# or
# ds = xr.open_mfdataset(
# mhw_filepaths, preprocess=lambda x: x.sortby("events"), engine="h5netcdf"
# )
error message
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[77], line 13
7 files = ["marine_heatwaves_chunk50.nc", "marine_heatwaves_chunk60.nc"]
8 mhw_filepaths = [
9 tempdir / file
10 for file in os.listdir(tempdir)
11 if file in files
12 ]
---> 13 ds = xr.open_mfdataset(
14 mhw_filepaths, engine="h5netcdf"
15 )
File /usr/local/Caskroom/mambaforge/base/envs/mhwss23/lib/python3.10/site-packages/xarray/backends/api.py:1040, in open_mfdataset(paths, chunks, concat_dim, compat, preprocess, engine, data_vars, coords, combine, parallel, join, attrs_file, combine_attrs, **kwargs)
1027 combined = _nested_combine(
1028 datasets,
1029 concat_dims=concat_dim,
(...)
1035 combine_attrs=combine_attrs,
1036 )
1037 elif combine == "by_coords":
1038 # Redo ordering from coordinates, ignoring how they were ordered
1039 # previously
-> 1040 combined = combine_by_coords(
1041 datasets,
1042 compat=compat,
1043 data_vars=data_vars,
1044 coords=coords,
1045 join=join,
1046 combine_attrs=combine_attrs,
1047 )
1048 else:
1049 raise ValueError(
1050 "{} is an invalid option for the keyword argument"
1051 " ``combine``".format(combine)
1052 )
File /usr/local/Caskroom/mambaforge/base/envs/mhwss23/lib/python3.10/site-packages/xarray/core/combine.py:973, in combine_by_coords(data_objects, compat, data_vars, coords, fill_value, join, combine_attrs, datasets)
969 grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
971 # Perform the multidimensional combine on each group of data variables
972 # before merging back together
--> 973 concatenated_grouped_by_data_vars = tuple(
974 _combine_single_variable_hypercube(
975 tuple(datasets_with_same_vars),
976 fill_value=fill_value,
977 data_vars=data_vars,
978 coords=coords,
979 compat=compat,
980 join=join,
981 combine_attrs=combine_attrs,
982 )
983 for vars, datasets_with_same_vars in grouped_by_vars
984 )
986 return merge(
987 concatenated_grouped_by_data_vars,
988 compat=compat,
(...)
991 combine_attrs=combine_attrs,
992 )
File /usr/local/Caskroom/mambaforge/base/envs/mhwss23/lib/python3.10/site-packages/xarray/core/combine.py:974, in (.0)
969 grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
971 # Perform the multidimensional combine on each group of data variables
972 # before merging back together
973 concatenated_grouped_by_data_vars = tuple(
--> 974 _combine_single_variable_hypercube(
975 tuple(datasets_with_same_vars),
976 fill_value=fill_value,
977 data_vars=data_vars,
978 coords=coords,
979 compat=compat,
980 join=join,
981 combine_attrs=combine_attrs,
982 )
983 for vars, datasets_with_same_vars in grouped_by_vars
984 )
986 return merge(
987 concatenated_grouped_by_data_vars,
988 compat=compat,
(...)
991 combine_attrs=combine_attrs,
992 )
File /usr/local/Caskroom/mambaforge/base/envs/mhwss23/lib/python3.10/site-packages/xarray/core/combine.py:648, in _combine_single_variable_hypercube(datasets, fill_value, data_vars, coords, compat, join, combine_attrs)
646 indexes = concatenated.indexes.get(dim)
647 if not (indexes.is_monotonic_increasing or indexes.is_monotonic_decreasing):
--> 648 raise ValueError(
649 "Resulting object does not have monotonic"
650 " global indexes along dimension {}".format(dim)
651 )
653 return concatenated
ValueError: Resulting object does not have monotonic global indexes along dimension events