Xarray to netcdf (ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all())

Dear All,

Using geemap and google colab I successfully exported ERA5_L data to Netcdf format.
I used the exported Netcdf data for further ingestion like seasonal mean etc , it works great.
When I tried to save the data as netcdf , I got the following error :

Xarray to netcdf (ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all())

Even when I tried to export the original data to netcdf, it gives the same error.
I checked the encoding, Fillvalue etc , everything is fine. But still i am getting the same error.

Here are the code snapshots:

import xarray as xr
#Reading Data
fn="ISB_ERA5.nc"
ISB=xr.open_dataset(fn,engine="netcdf4")
<xarray.Dataset>
Dimensions:                  (time: 24, lon: 56, lat: 31)
Coordinates:
  * time                     (time) datetime64[ns] 2010-01-01 ... 2011-12-01
  * lon                      (lon) float64 72.78 72.79 72.8 ... 73.32 73.33
  * lat                      (lat) float64 33.5 33.51 33.52 ... 33.78 33.79 33.8
Data variables:
    total_precipitation_sum  (time, lon, lat) float32 ...
Attributes:
    crs:      EPSG:4326

Checking the encoding

ISB.total_precipitation_sum.encoding

{'zlib': False,
 'szip': False,
 'zstd': False,
 'bzip2': False,
 'blosc': False,
 'shuffle': False,
 'complevel': 0,
 'fletcher32': False,
 'contiguous': True,
 'chunksizes': None,
 'source': '/Users/macbookpro/Documents/0_PM10_PM25/DATA_Scripts/ERA5L_DATA/ISB_ERA5.nc',
 'original_shape': (24, 56, 31),
 'dtype': dtype('float32'),
 '_FillValue': nan,
 'scale_factor': 0.01}

selecting by indexing and saving to .nc formate file

import netCDF4
single_timestep_data = ISB.isel(time=0)
ISB.to_netcdf("single_timestep_data.nc", mode='w', format='NETCDF4', engine='netcdf4')

Your suggestion/response to resolve this issue would be greatly appreciated.

Kind Regards
Toqeer

Hi @Toqeer ! Does this still happen even if you persist/compute the computation before exporting? The subsetting can be affecting this, and triggering the computation upfront may help on debugging.

Can you share the full traceback please?

1 Like

Dear @rlourenco thank you very much for the response. I face this problem after export.
In google colab it works fine,
Regards

Dear @dcherian thanks for the quick response, here is the traceback error

import netCDF4
single_timestep_data = ISB.isel(time=0)
ISB.to_netcdf("single_timestep_data.nc", mode='w', format='NETCDF4', engine='netcdf4')
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[170], line 3
      1 import netCDF4
      2 single_timestep_data = ISB.isel(time=0)
----> 3 ISB.to_netcdf("single_timestep_data.nc", mode='w', format='NETCDF4', engine='netcdf4')

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/core/dataset.py:2041, in Dataset.to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
   2038     encoding = {}
   2039 from xarray.backends.api import to_netcdf
-> 2041 return to_netcdf(  # type: ignore  # mypy cannot resolve the overloads:(
   2042     self,
   2043     path,
   2044     mode=mode,
   2045     format=format,
   2046     group=group,
   2047     engine=engine,
   2048     encoding=encoding,
   2049     unlimited_dims=unlimited_dims,
   2050     compute=compute,
   2051     multifile=False,
   2052     invalid_netcdf=invalid_netcdf,
   2053 )

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/backends/api.py:1246, in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
   1241 # TODO: figure out how to refactor this logic (here and in save_mfdataset)
   1242 # to avoid this mess of conditionals
   1243 try:
   1244     # TODO: allow this work (setting up the file for writing array data)
   1245     # to be parallelized with dask
-> 1246     dump_to_store(
   1247         dataset, store, writer, encoding=encoding, unlimited_dims=unlimited_dims
   1248     )
   1249     if autoclose:
   1250         store.close()

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/backends/api.py:1293, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
   1290 if encoder:
   1291     variables, attrs = encoder(variables, attrs)
-> 1293 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/backends/common.py:352, in AbstractWritableDataStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
    349 if writer is None:
    350     writer = ArrayWriter()
--> 352 variables, attributes = self.encode(variables, attributes)
    354 self.set_attributes(attributes)
    355 self.set_dimensions(variables, unlimited_dims=unlimited_dims)

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/backends/common.py:441, in WritableCFDataStore.encode(self, variables, attributes)
    438 def encode(self, variables, attributes):
    439     # All NetCDF files get CF encoded by default, without this attempting
    440     # to write times, for example, would fail.
--> 441     variables, attributes = cf_encoder(variables, attributes)
    442     variables = {k: self.encode_variable(v) for k, v in variables.items()}
    443     attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/conventions.py:784, in cf_encoder(variables, attributes)
    782 for var in new_vars.values():
    783     bounds = var.attrs["bounds"] if "bounds" in var.attrs else None
--> 784     if bounds and bounds in new_vars:
    785         # see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries
    786         for attr in [
    787             "units",
    788             "standard_name",
   (...)
    795             "month_lengths",
    796         ]:
    797             if attr in new_vars[bounds].attrs and attr in var.attrs:

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

Thanks the bounds attribute is an ndarray and if bounds is failing.Can you open an issue at GitHub - pydata/xarray: N-D labeled arrays and datasets in Python please? As a temporary fix, you can set x.attrs["bounds"] = list(x.attrs["bounds"] where x is a DataArray

2 Likes

Dear @dcherian. Thank you very much for the suggestion, the problem is solved. Just a little correction, instead of list, tuple works i.e
x.attrs["bounds"] = tuple(x.attrs["bounds"]) , where x is a DataArray.

The use of list , x.attrs["bounds"] = list(x.attrs["bounds"])
results in the following error

TypeError: unhashable type: 'list'
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[189], line 2
      1 import netCDF4
----> 2 ISB.to_netcdf('ISBm.nc',format='NETCDF4', engine='netcdf4')

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/core/dataset.py:2041, in Dataset.to_netcdf(self, path, mode, format, group, engine, encoding, unlimited_dims, compute, invalid_netcdf)
   2038     encoding = {}
   2039 from xarray.backends.api import to_netcdf
-> 2041 return to_netcdf(  # type: ignore  # mypy cannot resolve the overloads:(
   2042     self,
   2043     path,
   2044     mode=mode,
   2045     format=format,
   2046     group=group,
   2047     engine=engine,
   2048     encoding=encoding,
   2049     unlimited_dims=unlimited_dims,
   2050     compute=compute,
   2051     multifile=False,
   2052     invalid_netcdf=invalid_netcdf,
   2053 )

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/backends/api.py:1246, in to_netcdf(dataset, path_or_file, mode, format, group, engine, encoding, unlimited_dims, compute, multifile, invalid_netcdf)
   1241 # TODO: figure out how to refactor this logic (here and in save_mfdataset)
   1242 # to avoid this mess of conditionals
   1243 try:
   1244     # TODO: allow this work (setting up the file for writing array data)
   1245     # to be parallelized with dask
-> 1246     dump_to_store(
   1247         dataset, store, writer, encoding=encoding, unlimited_dims=unlimited_dims
   1248     )
   1249     if autoclose:
   1250         store.close()

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/backends/api.py:1293, in dump_to_store(dataset, store, writer, encoder, encoding, unlimited_dims)
   1290 if encoder:
   1291     variables, attrs = encoder(variables, attrs)
-> 1293 store.store(variables, attrs, check_encoding, writer, unlimited_dims=unlimited_dims)

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/backends/common.py:352, in AbstractWritableDataStore.store(self, variables, attributes, check_encoding_set, writer, unlimited_dims)
    349 if writer is None:
    350     writer = ArrayWriter()
--> 352 variables, attributes = self.encode(variables, attributes)
    354 self.set_attributes(attributes)
    355 self.set_dimensions(variables, unlimited_dims=unlimited_dims)

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/backends/common.py:441, in WritableCFDataStore.encode(self, variables, attributes)
    438 def encode(self, variables, attributes):
    439     # All NetCDF files get CF encoded by default, without this attempting
    440     # to write times, for example, would fail.
--> 441     variables, attributes = cf_encoder(variables, attributes)
    442     variables = {k: self.encode_variable(v) for k, v in variables.items()}
    443     attributes = {k: self.encode_attribute(v) for k, v in attributes.items()}

File ~/Codes/anaconda3/envs/geosp/lib/python3.9/site-packages/xarray/conventions.py:784, in cf_encoder(variables, attributes)
    782 for var in new_vars.values():
    783     bounds = var.attrs["bounds"] if "bounds" in var.attrs else None
--> 784     if bounds and bounds in new_vars:
    785         # see http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries
    786         for attr in [
    787             "units",
    788             "standard_name",
   (...)
    795             "month_lengths",
    796         ]:
    797             if attr in new_vars[bounds].attrs and attr in var.attrs:

TypeError: unhashable type: 'list'

Thank you very much