Skip to content

Commit

Permalink
Allow invalid_netcdf=True in to_netcdf() (pydata#3221)
Browse files Browse the repository at this point in the history
* to_netcdf: invalid_netcdf kwarg seems working OK

* backends: Added test for invalid_netcdf kwarg

* dataset: add docstring for invalid_netcdf kwarg

* Formatting: Applying Black.

* h5netcdf: More explicit kwarg and exception

* to_netcdf: Better test for kwarg invalid_netcdf

* test_complex: More clear arg names

* test invalid_netcdf=True raises with wrong engines

* Doc and What's new: invalid_netcdf kwarg

* Making Black happy.

* docs: Add h5netcdf to environment.
  • Loading branch information
ulijh authored and dcherian committed Aug 22, 2019
1 parent 52a16a6 commit 76d4a67
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies:
- dask=1.1.0
- ipython=7.2.0
- netCDF4=1.4.2
- h5netcdf=0.7.4
- cartopy=0.17.0
- rasterio=1.0.24
- zarr=2.2.0
Expand Down
25 changes: 25 additions & 0 deletions doc/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,31 @@ supported by netCDF4-python: 'standard', 'gregorian', 'proleptic_gregorian' 'nol
By default, xarray uses the 'proleptic_gregorian' calendar and units of the smallest time
difference between values, with a reference time of the first time value.

Invalid netCDF files
~~~~~~~~~~~~~~~~~~~~

The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't
allowed in netCDF4 (see
`h5netcdf documentation <https://github.com/shoyer/h5netcdf#invalid-netcdf-files)>`_.
This feature is availabe through :py:func:`DataArray.to_netcdf` and
:py:func:`Dataset.to_netcdf` when used with ``engine="h5netcdf"``
and currently raises a warning unless ``invalid_netcdf=True`` is set:

.. ipython:: python
# Writing complex valued data
da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j])
da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True)
# Reading it back
xr.open_dataarray("complex.nc", engine="h5netcdf")
.. warning::

Note that this produces a file that is likely to be not readable by other netCDF
libraries!

.. _io.iris:

Iris
Expand Down
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ Enhancements
- In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if
``append_dim`` is set, as it will automatically be set to ``'a'`` internally.
By `David Brochart <https://github.com/davidbrochart>`_.
- :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used
with ``engine="h5netcdf"``. It is passed to :py:func:`h5netcdf.File`.
By `Ulrich Herter <https://github.com/ulijh>`_.

- :py:meth:`~xarray.Dataset.drop` now supports keyword arguments; dropping index
labels by specifying both ``dim`` and ``labels`` is deprecated (:issue:`2910`).
Expand Down
8 changes: 8 additions & 0 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,7 @@ def to_netcdf(
unlimited_dims: Iterable[Hashable] = None,
compute: bool = True,
multifile: bool = False,
invalid_netcdf: bool = False,
) -> Union[Tuple[ArrayWriter, AbstractDataStore], bytes, "Delayed", None]:
"""This function creates an appropriate datastore for writing a dataset to
disk as a netCDF file
Expand Down Expand Up @@ -1043,6 +1044,13 @@ def to_netcdf(

target = path_or_file if path_or_file is not None else BytesIO()
kwargs = dict(autoclose=True) if autoclose else {}
if invalid_netcdf:
if engine == "h5netcdf":
kwargs["invalid_netcdf"] = invalid_netcdf
else:
raise ValueError(
"unrecognized option 'invalid_netcdf' for engine %s" % engine
)
store = store_open(target, mode, format, group, **kwargs)

if unlimited_dims is None:
Expand Down
15 changes: 13 additions & 2 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,25 @@ class H5NetCDFStore(WritableCFDataStore):
"""

def __init__(
self, filename, mode="r", format=None, group=None, lock=None, autoclose=False
self,
filename,
mode="r",
format=None,
group=None,
lock=None,
autoclose=False,
invalid_netcdf=None,
):
import h5netcdf

if format not in [None, "NETCDF4"]:
raise ValueError("invalid format for h5netcdf backend")

self._manager = CachingFileManager(h5netcdf.File, filename, mode=mode)
kwargs = {"invalid_netcdf": invalid_netcdf}

self._manager = CachingFileManager(
h5netcdf.File, filename, mode=mode, kwargs=kwargs
)

if lock is None:
if mode == "r":
Expand Down
6 changes: 6 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1443,6 +1443,7 @@ def to_netcdf(
encoding: Mapping = None,
unlimited_dims: Iterable[Hashable] = None,
compute: bool = True,
invalid_netcdf: bool = False,
) -> Union[bytes, "Delayed", None]:
"""Write dataset contents to a netCDF file.
Expand Down Expand Up @@ -1506,6 +1507,10 @@ def to_netcdf(
compute: boolean
If true compute immediately, otherwise return a
``dask.delayed.Delayed`` object that can be computed later.
invalid_netcdf: boolean
Only valid along with engine='h5netcdf'. If True, allow writing
hdf5 files which are valid netcdf as described in
https://github.com/shoyer/h5netcdf. Default: False.
"""
if encoding is None:
encoding = {}
Expand All @@ -1521,6 +1526,7 @@ def to_netcdf(
encoding=encoding,
unlimited_dims=unlimited_dims,
compute=compute,
invalid_netcdf=invalid_netcdf,
)

def to_zarr(
Expand Down
21 changes: 16 additions & 5 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2172,13 +2172,17 @@ def create_store(self):
yield backends.H5NetCDFStore(tmp_file, "w")

@pytest.mark.filterwarnings("ignore:complex dtypes are supported by h5py")
def test_complex(self):
@pytest.mark.parametrize(
"invalid_netcdf, warns, num_warns",
[(None, FutureWarning, 1), (False, FutureWarning, 1), (True, None, 0)],
)
def test_complex(self, invalid_netcdf, warns, num_warns):
expected = Dataset({"x": ("y", np.ones(5) + 1j * np.ones(5))})
with pytest.warns(FutureWarning):
# TODO: make it possible to write invalid netCDF files from xarray
# without a warning
with self.roundtrip(expected) as actual:
save_kwargs = {"invalid_netcdf": invalid_netcdf}
with pytest.warns(warns) as record:
with self.roundtrip(expected, save_kwargs=save_kwargs) as actual:
assert_equal(expected, actual)
assert len(record) == num_warns

def test_cross_engine_read_write_netcdf4(self):
# Drop dim3, because its labels include strings. These appear to be
Expand Down Expand Up @@ -4398,3 +4402,10 @@ def test_use_cftime_false_nonstandard_calendar(calendar, units_year):
original.to_netcdf(tmp_file)
with pytest.raises((OutOfBoundsDatetime, ValueError)):
open_dataset(tmp_file, use_cftime=False)


@pytest.mark.parametrize("engine", ["netcdf4", "scipy"])
def test_invalid_netcdf_raises(engine):
data = create_test_data()
with raises_regex(ValueError, "unrecognized option 'invalid_netcdf'"):
data.to_netcdf("foo.nc", engine=engine, invalid_netcdf=True)

0 comments on commit 76d4a67

Please sign in to comment.