From 1486beaf8f4b4d5e30779d19425843212deea5ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sat, 4 Jan 2025 16:38:35 +0100 Subject: [PATCH] split out CFDatetimeCoder, deprecate use_cftime as kwarg (#9901) * split out CFDatetimeCoder into coders, deprecate use_cftime as keyword argument * add whats-new.rst entry * Apply suggestions from code review Co-authored-by: Deepak Cherian * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix warning * fix docstrings * try fix typing * Apply suggestions from code review Co-authored-by: Spencer Clark * Apply suggestions from code review Co-authored-by: Spencer Clark * Update xarray/conventions.py * Update deprecated directive * fix docstrings/whats-new.rst after merge * fix whats-new.rst * update warnings/errors --------- Co-authored-by: Deepak Cherian Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Spencer Clark --- doc/api.rst | 11 ++++++ doc/whats-new.rst | 9 +++-- xarray/__init__.py | 3 +- xarray/backends/api.py | 56 ++++++++++++++++++++++------- xarray/coders.py | 10 ++++++ xarray/coding/times.py | 15 ++++++-- xarray/conventions.py | 55 ++++++++++++++++++++++------ xarray/convert.py | 3 +- xarray/tests/test_backends.py | 15 +++++--- xarray/tests/test_coding_times.py | 59 ++++++++++++++++++++++--------- xarray/tests/test_conventions.py | 4 ++- xarray/tests/test_dataarray.py | 2 +- 12 files changed, 190 insertions(+), 52 deletions(-) create mode 100644 xarray/coders.py diff --git a/doc/api.rst b/doc/api.rst index 342ae08e1a4..f731ac1c59a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1096,6 +1096,17 @@ DataTree methods .. Missing: .. ``open_mfdatatree`` +Encoding/Decoding +================= + +Coder objects +------------- + +.. autosummary:: + :toctree: generated/ + + coders.CFDatetimeCoder + Coordinates objects =================== diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4d52c62a1e2..69a7b6acf97 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,9 @@ v2025.01.1 (unreleased) New Features ~~~~~~~~~~~~ - +- Split out :py:class:`coders.CFDatetimeCoder` as public API in ``xr.coders``, make ``decode_times`` keyword argument + consume :py:class:`coders.CFDatetimeCoder` (:pull:`9901`). + By `Kai Mühlbauer `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -30,7 +32,9 @@ Breaking changes Deprecations ~~~~~~~~~~~~ - +- Time decoding related kwarg ``use_cftime`` is deprecated. Use keyword argument + ``decode_times=CFDatetimeCoder(use_cftime=True)`` in :py:func:`~xarray.open_dataset`, :py:func:`~xarray.open_dataarray`, :py:func:`~xarray.open_datatree`, :py:func:`~xarray.open_groups`, :py:func:`~xarray.open_zarr` and :py:func:`~xarray.decode_cf` instead (:pull:`9901`). + By `Kai Mühlbauer `_. Bug fixes ~~~~~~~~~ @@ -70,6 +74,7 @@ New Features iso8601-parser (:pull:`9885`). By `Kai Mühlbauer `_. + Breaking changes ~~~~~~~~~~~~~~~~ - Methods including ``dropna``, ``rank``, ``idxmax``, ``idxmin`` require diff --git a/xarray/__init__.py b/xarray/__init__.py index 622c927b468..8af936ed27a 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -1,6 +1,6 @@ from importlib.metadata import version as _version -from xarray import groupers, testing, tutorial, ufuncs +from xarray import coders, groupers, testing, tutorial, ufuncs from xarray.backends.api import ( load_dataarray, load_dataset, @@ -66,6 +66,7 @@ # `mypy --strict` running in projects that import xarray. __all__ = ( # noqa: RUF022 # Sub-packages + "coders", "groupers", "testing", "tutorial", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index b53e173769c..2adcc57c6b9 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -33,6 +33,7 @@ _normalize_path, ) from xarray.backends.locks import _get_scheduler +from xarray.coders import CFDatetimeCoder from xarray.core import indexing from xarray.core.combine import ( _infer_concat_order_from_positions, @@ -481,7 +482,10 @@ def open_dataset( cache: bool | None = None, decode_cf: bool | None = None, mask_and_scale: bool | Mapping[str, bool] | None = None, - decode_times: bool | Mapping[str, bool] | None = None, + decode_times: bool + | CFDatetimeCoder + | Mapping[str, bool | CFDatetimeCoder] + | None = None, decode_timedelta: bool | Mapping[str, bool] | None = None, use_cftime: bool | Mapping[str, bool] | None = None, concat_characters: bool | Mapping[str, bool] | None = None, @@ -543,9 +547,10 @@ def open_dataset( be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. - decode_times : bool or dict-like, optional + decode_times : bool, CFDatetimeCoder or dict-like, optional If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, leave them encoded as numbers. + into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them + encoded as numbers. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. @@ -569,6 +574,10 @@ def open_dataset( raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + concat_characters : bool or dict-like, optional If True, concatenate along the last dimension of character arrays to form string arrays. Dimensions will only be concatenated over (and @@ -698,7 +707,10 @@ def open_dataarray( cache: bool | None = None, decode_cf: bool | None = None, mask_and_scale: bool | None = None, - decode_times: bool | None = None, + decode_times: bool + | CFDatetimeCoder + | Mapping[str, bool | CFDatetimeCoder] + | None = None, decode_timedelta: bool | None = None, use_cftime: bool | None = None, concat_characters: bool | None = None, @@ -761,9 +773,11 @@ def open_dataarray( `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will be replaced by NA. This keyword may not be supported by all the backends. - decode_times : bool, optional + decode_times : bool, CFDatetimeCoder or dict-like, optional If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, leave them encoded as numbers. + into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them encoded as numbers. + Pass a mapping, e.g. ``{"my_variable": False}``, + to toggle this feature per-variable individually. This keyword may not be supported by all the backends. decode_timedelta : bool, optional If True, decode variables and coordinates with time units in @@ -781,6 +795,10 @@ def open_dataarray( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. This keyword may not be supported by all the backends. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + concat_characters : bool, optional If True, concatenate along the last dimension of character arrays to form string arrays. Dimensions will only be concatenated over (and @@ -903,7 +921,10 @@ def open_datatree( cache: bool | None = None, decode_cf: bool | None = None, mask_and_scale: bool | Mapping[str, bool] | None = None, - decode_times: bool | Mapping[str, bool] | None = None, + decode_times: bool + | CFDatetimeCoder + | Mapping[str, bool | CFDatetimeCoder] + | None = None, decode_timedelta: bool | Mapping[str, bool] | None = None, use_cftime: bool | Mapping[str, bool] | None = None, concat_characters: bool | Mapping[str, bool] | None = None, @@ -961,9 +982,9 @@ def open_datatree( be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. - decode_times : bool or dict-like, optional + decode_times : bool, CFDatetimeCoder or dict-like, optional If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, leave them encoded as numbers. + into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them encoded as numbers. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. @@ -987,6 +1008,10 @@ def open_datatree( raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + concat_characters : bool or dict-like, optional If True, concatenate along the last dimension of character arrays to form string arrays. Dimensions will only be concatenated over (and @@ -1118,7 +1143,10 @@ def open_groups( cache: bool | None = None, decode_cf: bool | None = None, mask_and_scale: bool | Mapping[str, bool] | None = None, - decode_times: bool | Mapping[str, bool] | None = None, + decode_times: bool + | CFDatetimeCoder + | Mapping[str, bool | CFDatetimeCoder] + | None = None, decode_timedelta: bool | Mapping[str, bool] | None = None, use_cftime: bool | Mapping[str, bool] | None = None, concat_characters: bool | Mapping[str, bool] | None = None, @@ -1180,9 +1208,9 @@ def open_groups( be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. - decode_times : bool or dict-like, optional + decode_times : bool, CFDatetimeCoder or dict-like, optional If True, decode times encoded in the standard NetCDF datetime format - into datetime objects. Otherwise, leave them encoded as numbers. + into datetime objects. Otherwise, use :py:class:`coders.CFDatetimeCoder` or leave them encoded as numbers. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. @@ -1206,6 +1234,10 @@ def open_groups( raise an error. Pass a mapping, e.g. ``{"my_variable": False}``, to toggle this feature per-variable individually. This keyword may not be supported by all the backends. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + concat_characters : bool or dict-like, optional If True, concatenate along the last dimension of character arrays to form string arrays. Dimensions will only be concatenated over (and diff --git a/xarray/coders.py b/xarray/coders.py new file mode 100644 index 00000000000..238ac714780 --- /dev/null +++ b/xarray/coders.py @@ -0,0 +1,10 @@ +""" +This module provides coder objects that encapsulate the +"encoding/decoding" process. +""" + +from xarray.coding.times import CFDatetimeCoder + +__all__ = [ + "CFDatetimeCoder", +] diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 74ba97af81b..6d758ee8d87 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -286,7 +286,10 @@ def _unpack_time_unit_and_ref_date( def _decode_cf_datetime_dtype( - data, units: str, calendar: str | None, use_cftime: bool | None + data, + units: str, + calendar: str | None, + use_cftime: bool | None, ) -> np.dtype: # Verify that at least the first and last date can be decoded # successfully. Otherwise, tracebacks end up swallowed by @@ -421,7 +424,10 @@ def _decode_datetime_with_pandas( def decode_cf_datetime( - num_dates, units: str, calendar: str | None = None, use_cftime: bool | None = None + num_dates, + units: str, + calendar: str | None = None, + use_cftime: bool | None = None, ) -> np.ndarray: """Given an array of numeric dates in netCDF format, convert it into a numpy array of date time objects. @@ -1093,7 +1099,10 @@ def _lazily_encode_cf_timedelta( class CFDatetimeCoder(VariableCoder): - def __init__(self, use_cftime: bool | None = None) -> None: + def __init__( + self, + use_cftime: bool | None = None, + ) -> None: self.use_cftime = use_cftime def encode(self, variable: Variable, name: T_Name = None) -> Variable: diff --git a/xarray/conventions.py b/xarray/conventions.py index 57407a15f51..485c9ac0c71 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -7,6 +7,7 @@ import numpy as np +from xarray.coders import CFDatetimeCoder from xarray.coding import strings, times, variables from xarray.coding.variables import SerializationWarning, pop_to from xarray.core import indexing @@ -88,7 +89,7 @@ def encode_cf_variable( ensure_not_multiindex(var, name=name) for coder in [ - times.CFDatetimeCoder(), + CFDatetimeCoder(), times.CFTimedeltaCoder(), variables.CFScaleOffsetCoder(), variables.CFMaskCoder(), @@ -109,7 +110,7 @@ def decode_cf_variable( var: Variable, concat_characters: bool = True, mask_and_scale: bool = True, - decode_times: bool = True, + decode_times: bool | CFDatetimeCoder = True, decode_endianness: bool = True, stack_char_dim: bool = True, use_cftime: bool | None = None, @@ -136,7 +137,7 @@ def decode_cf_variable( Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). If the _Unsigned attribute is present treat integer arrays as unsigned. - decode_times : bool + decode_times : bool or CFDatetimeCoder Decode cf times ("hours since 2000-01-01") to np.datetime64. decode_endianness : bool Decode arrays from non-native to native endianness. @@ -155,6 +156,9 @@ def decode_cf_variable( decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + Returns ------- out : Variable @@ -167,7 +171,7 @@ def decode_cf_variable( original_dtype = var.dtype if decode_timedelta is None: - decode_timedelta = decode_times + decode_timedelta = True if decode_times else False if concat_characters: if stack_char_dim: @@ -191,7 +195,31 @@ def decode_cf_variable( if decode_timedelta: var = times.CFTimedeltaCoder().decode(var, name=name) if decode_times: - var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name) + # remove checks after end of deprecation cycle + if not isinstance(decode_times, CFDatetimeCoder): + if use_cftime is not None: + emit_user_level_warning( + "Usage of 'use_cftime' as a kwarg is deprecated. " + "Please pass a 'CFDatetimeCoder' instance initialized " + "with 'use_cftime' to the 'decode_times' kwarg instead.\n" + "Example usage:\n" + " time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)\n" + " ds = xr.open_dataset(decode_times=time_coder)\n", + DeprecationWarning, + ) + decode_times = CFDatetimeCoder(use_cftime=use_cftime) + else: + if use_cftime is not None: + raise TypeError( + "Usage of 'use_cftime' as a kwarg is not allowed " + "if a 'CFDatetimeCoder' instance is passed to " + "'decode_times'. Please set 'use_cftime' " + "when initializing 'CFDatetimeCoder' instead.\n" + "Example usage:\n" + " time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)\n" + " ds = xr.open_dataset(decode_times=time_coder)\n", + ) + var = decode_times.decode(var, name=name) if decode_endianness and not var.dtype.isnative: var = variables.EndianCoder().decode(var) @@ -288,9 +316,10 @@ def _update_bounds_encoding(variables: T_Variables) -> None: T = TypeVar("T") +U = TypeVar("U") -def _item_or_default(obj: Mapping[Any, T] | T, key: Hashable, default: T) -> T: +def _item_or_default(obj: Mapping[Any, T | U] | T, key: Hashable, default: T) -> T | U: """ Return item by key if obj is mapping and key is present, else return default value. """ @@ -302,7 +331,7 @@ def decode_cf_variables( attributes: T_Attrs, concat_characters: bool | Mapping[str, bool] = True, mask_and_scale: bool | Mapping[str, bool] = True, - decode_times: bool | Mapping[str, bool] = True, + decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] = True, decode_coords: bool | Literal["coordinates", "all"] = True, drop_variables: T_DropVariables = None, use_cftime: bool | Mapping[str, bool] | None = None, @@ -439,7 +468,7 @@ def decode_cf( obj: T_DatasetOrAbstractstore, concat_characters: bool = True, mask_and_scale: bool = True, - decode_times: bool = True, + decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] = True, decode_coords: bool | Literal["coordinates", "all"] = True, drop_variables: T_DropVariables = None, use_cftime: bool | None = None, @@ -458,7 +487,7 @@ def decode_cf( mask_and_scale : bool, optional Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). - decode_times : bool, optional + decode_times : bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder], optional Decode cf times (e.g., integers since "hours since 2000-01-01") to np.datetime64. decode_coords : bool or {"coordinates", "all"}, optional @@ -483,6 +512,10 @@ def decode_cf( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + + .. deprecated:: 2025.01.1 + Please pass a :py:class:`coders.CFDatetimeCoder` instance initialized with ``use_cftime`` to the ``decode_times`` kwarg instead. + decode_timedelta : bool, optional If True, decode variables and coordinates with time units in {"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"} @@ -536,7 +569,7 @@ def cf_decoder( attributes: T_Attrs, concat_characters: bool = True, mask_and_scale: bool = True, - decode_times: bool = True, + decode_times: bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] = True, ) -> tuple[T_Variables, T_Attrs]: """ Decode a set of CF encoded variables and attributes. @@ -553,7 +586,7 @@ def cf_decoder( mask_and_scale : bool Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). - decode_times : bool + decode_times : bool | CFDatetimeCoder | Mapping[str, bool | CFDatetimeCoder] Decode cf times ("hours since 2000-01-01") to np.datetime64. Returns diff --git a/xarray/convert.py b/xarray/convert.py index 14df7cadb9b..29d8f9650e3 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -4,7 +4,8 @@ import numpy as np -from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder +from xarray.coders import CFDatetimeCoder +from xarray.coding.times import CFTimedeltaCoder from xarray.conventions import decode_cf from xarray.core import duck_array_ops from xarray.core.dataarray import DataArray diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 79aa2027ca5..330dd1dac1f 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -49,6 +49,7 @@ from xarray.backends.pydap_ import PydapDataStore from xarray.backends.scipy_ import ScipyBackendEntrypoint from xarray.backends.zarr import ZarrStore +from xarray.coders import CFDatetimeCoder from xarray.coding.cftime_offsets import cftime_range from xarray.coding.strings import check_vlen_dtype, create_vlen_dtype from xarray.coding.variables import SerializationWarning @@ -3223,7 +3224,10 @@ def test_open_zarr_use_cftime(self) -> None: ds.to_zarr(store_target, **self.version_kwargs) ds_a = xr.open_zarr(store_target, **self.version_kwargs) assert_identical(ds, ds_a) - ds_b = xr.open_zarr(store_target, use_cftime=True, **self.version_kwargs) + decoder = CFDatetimeCoder(use_cftime=True) + ds_b = xr.open_zarr( + store_target, decode_times=decoder, **self.version_kwargs + ) assert xr.coding.times.contains_cftime_datetimes(ds_b.time.variable) def test_write_read_select_write(self) -> None: @@ -5671,7 +5675,8 @@ def test_use_cftime_true(calendar, units_year) -> None: with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) with warnings.catch_warnings(record=True) as record: - with open_dataset(tmp_file, use_cftime=True) as ds: + decoder = CFDatetimeCoder(use_cftime=True) + with open_dataset(tmp_file, decode_times=decoder) as ds: assert_identical(expected_x, ds.x) assert_identical(expected_time, ds.time) _assert_no_dates_out_of_range_warning(record) @@ -5723,7 +5728,8 @@ def test_use_cftime_false_standard_calendar_out_of_range(calendar, units_year) - with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) with pytest.raises((OutOfBoundsDatetime, ValueError)): - open_dataset(tmp_file, use_cftime=False) + decoder = CFDatetimeCoder(use_cftime=False) + open_dataset(tmp_file, decode_times=decoder) @requires_scipy_or_netCDF4 @@ -5741,7 +5747,8 @@ def test_use_cftime_false_nonstandard_calendar(calendar, units_year) -> None: with create_tmp_file() as tmp_file: original.to_netcdf(tmp_file) with pytest.raises((OutOfBoundsDatetime, ValueError)): - open_dataset(tmp_file, use_cftime=False) + decoder = CFDatetimeCoder(use_cftime=False) + open_dataset(tmp_file, decode_times=decoder) @pytest.mark.parametrize("engine", ["netcdf4", "scipy"]) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 685767b71bb..e05d303e17b 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -19,9 +19,9 @@ date_range, decode_cf, ) +from xarray.coders import CFDatetimeCoder from xarray.coding.times import _STANDARD_CALENDARS as _STANDARD_CALENDARS_UNSORTED from xarray.coding.times import ( - CFDatetimeCoder, _encode_datetime_with_cftime, _netcdf_to_numpy_timeunit, _numpy_to_netcdf_timeunit, @@ -123,7 +123,11 @@ def _all_cftime_date_types(): @pytest.mark.filterwarnings("ignore:Ambiguous reference date string") @pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully") @pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS) -def test_cf_datetime(num_dates, units, calendar) -> None: +def test_cf_datetime( + num_dates, + units, + calendar, +) -> None: import cftime expected = cftime.num2date( @@ -277,15 +281,15 @@ def test_decode_dates_outside_timestamp_range(calendar) -> None: @requires_cftime @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS) +@pytest.mark.parametrize("num_time", [735368, [735368], [[735368]]]) def test_decode_standard_calendar_single_element_inside_timestamp_range( - calendar, + calendar, num_time ) -> None: units = "days since 0001-01-01" - for num_time in [735368, [735368], [[735368]]]: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "Unable to decode time axis") - actual = decode_cf_datetime(num_time, units, calendar=calendar) - assert actual.dtype == np.dtype("M8[ns]") + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Unable to decode time axis") + actual = decode_cf_datetime(num_time, units, calendar=calendar) + assert actual.dtype == np.dtype("M8[ns]") @requires_cftime @@ -628,10 +632,10 @@ def test_cf_timedelta_2d() -> None: @pytest.mark.parametrize( ["deltas", "expected"], [ - (pd.to_timedelta(["1 day", "2 days"]), "days"), # type: ignore[arg-type, unused-ignore] - (pd.to_timedelta(["1 day", "2 days"]), "days"), # type: ignore[arg-type, unused-ignore] - (pd.to_timedelta(["1 day", "2 days"]), "days"), # type: ignore[arg-type, unused-ignore] - (pd.to_timedelta(["1 day", "2 days"]), "days"), # type: ignore[arg-type, unused-ignore] + (pd.to_timedelta(["1 day", "2 days"]), "days"), + (pd.to_timedelta(["1h", "1 day 1 hour"]), "hours"), + (pd.to_timedelta(["1m", "2m", np.nan]), "minutes"), + (pd.to_timedelta(["1m3s", "1m4s"]), "seconds"), ], ) def test_infer_timedelta_units(deltas, expected) -> None: @@ -675,7 +679,7 @@ def test_decode_cf(calendar) -> None: if calendar not in _STANDARD_CALENDARS: assert ds.test.dtype == np.dtype("O") else: - assert ds.test.dtype == np.dtype("M8[ns]") + assert ds.test.dtype == np.dtype("=M8[ns]") def test_decode_cf_time_bounds() -> None: @@ -700,7 +704,7 @@ def test_decode_cf_time_bounds() -> None: "calendar": "standard", } dsc = decode_cf(ds) - assert dsc.time_bnds.dtype == np.dtype("M8[ns]") + assert dsc.time_bnds.dtype == np.dtype("=M8[ns]") dsc = decode_cf(ds, decode_times=False) assert dsc.time_bnds.dtype == np.dtype("int64") @@ -1072,7 +1076,8 @@ def test_encode_decode_roundtrip_cftime(freq) -> None: ) variable = Variable(["time"], times) encoded = conventions.encode_cf_variable(variable) - decoded = conventions.decode_cf_variable("time", encoded, use_cftime=True) + decoder = CFDatetimeCoder(use_cftime=True) + decoded = conventions.decode_cf_variable("time", encoded, decode_times=decoder) assert_equal(variable, decoded) @@ -1182,7 +1187,7 @@ def test_decode_0size_datetime(use_cftime): if use_cftime and not has_cftime: pytest.skip() - dtype = object if use_cftime else "M8[ns]" + dtype = object if use_cftime else "=M8[ns]" expected = np.array([], dtype=dtype) actual = decode_cf_datetime( np.zeros(shape=0, dtype=np.int64), @@ -1209,6 +1214,28 @@ def test_decode_float_datetime(): np.testing.assert_equal(actual, expected) +def test_decode_float_datetime_with_decimals() -> None: + # test resolution enhancement for floats + values = np.array([0, 0.125, 0.25, 0.375, 0.75, 1.0], dtype="float32") + expected = np.array( + [ + "2000-01-01T00:00:00.000", + "2000-01-01T00:00:00.125", + "2000-01-01T00:00:00.250", + "2000-01-01T00:00:00.375", + "2000-01-01T00:00:00.750", + "2000-01-01T00:00:01.000", + ], + dtype="=M8[ns]", + ) + + units = "seconds since 2000-01-01" + calendar = "standard" + actual = decode_cf_datetime(values, units, calendar) + assert actual.dtype == expected.dtype + np.testing.assert_equal(actual, expected) + + @requires_cftime def test_scalar_unit() -> None: # test that a scalar units (often NaN when using to_netcdf) does not raise an error diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 495d760c534..7616f12957f 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -446,7 +446,9 @@ def test_dataset_repr_with_netcdf4_datetimes(self) -> None: assert "(time) object" in repr(ds) attrs = {"units": "days since 1900-01-01"} - ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)})) + ds = decode_cf( + Dataset({"time": ("time", [0, 1], attrs)}), + ) assert "(time) datetime64[ns]" in repr(ds) @requires_cftime diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index f684fd06b13..7f6673628aa 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -30,7 +30,7 @@ broadcast, set_options, ) -from xarray.coding.times import CFDatetimeCoder +from xarray.coders import CFDatetimeCoder from xarray.core import dtypes from xarray.core.common import full_like from xarray.core.coordinates import Coordinates