Skip to content

Commit

Permalink
Per-variable specification of boolean parameters in open_dataset (pyd…
Browse files Browse the repository at this point in the history
…ata#9218)

* allow per-variable choice of mask_and_scale in open_dataset

* simplify docstring datatype

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* dict -> Mapping in type annotation

Co-authored-by: Michael Niklas  <mick.niklas@gmail.com>

* use typevar for _item_or_default annotation

Otherwise you lose all typing when you use that because it returns Any.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* implement feature for 4 additional parameters

* fix default value inconsistency

* add what's new + None annotation

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* _item_or_default return type T | None

* remove deault default value _item_or_default

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* docstring dtype naming

---------

Co-authored-by: Mathijs Verhaegh <mathijs@chromodynamics.nl>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Michael Niklas <mick.niklas@gmail.com>
  • Loading branch information
4 people authored Jul 16, 2024
1 parent 076c0c2 commit 7477fd1
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 24 deletions.
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ v2024.06.1 (unreleased)

New Features
~~~~~~~~~~~~
- Allow per-variable specification of ``mask_and_scale``, ``decode_times``, ``decode_timedelta``
``use_cftime`` and ``concat_characters`` params in :py:func:`~xarray.open_dataset` (:pull:`9218`).
By `Mathijs Verhaegh <https://github.com/Ostheer>`_.
- Allow chunking for arrays with duplicated dimension names (:issue:`8759`, :pull:`9099`).
By `Martin Raspaud <https://github.com/mraspaud>`_.
- Extract the source url from fsspec objects (:issue:`9142`, :pull:`8923`).
Expand Down
34 changes: 22 additions & 12 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,11 +398,11 @@ def open_dataset(
chunks: T_Chunks = None,
cache: bool | None = None,
decode_cf: bool | None = None,
mask_and_scale: bool | None = None,
decode_times: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | None = None,
concat_characters: bool | None = None,
mask_and_scale: bool | Mapping[str, bool] | None = None,
decode_times: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
concat_characters: bool | Mapping[str, bool] | None = None,
decode_coords: Literal["coordinates", "all"] | bool | None = None,
drop_variables: str | Iterable[str] | None = None,
inline_array: bool = False,
Expand Down Expand Up @@ -451,25 +451,31 @@ def open_dataset(
decode_cf : bool, optional
Whether to decode these variables, assuming they were saved according
to CF conventions.
mask_and_scale : bool, optional
mask_and_scale : bool or dict-like, optional
If True, replace array values equal to `_FillValue` with NA and scale
values according to the formula `original_values * scale_factor +
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
taken from variable attributes (if they exist). If the `_FillValue` or
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA. This keyword may not be supported by all the backends.
decode_times : bool, optional
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_times : bool or dict-like, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_timedelta : bool, optional
decode_timedelta : bool or dict-like, optional
If True, decode variables and coordinates with time units in
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
into timedelta objects. If False, leave them encoded as numbers.
If None (default), assume the same value of decode_time.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
use_cftime: bool, optional
use_cftime: bool or dict-like, optional
Only relevant if encoded dates come from a standard calendar
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
specified). If None (default), attempt to decode times to
Expand All @@ -478,12 +484,16 @@ def open_dataset(
``cftime.datetime`` objects, regardless of whether or not they can be
represented using ``np.datetime64[ns]`` objects. If False, always
decode times to ``np.datetime64[ns]`` objects; if this is not possible
raise an error. This keyword may not be supported by all the backends.
concat_characters : bool, optional
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
concat_characters : bool or dict-like, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
removed) if they have no corresponding variable and if they are only
used as the last dimension of character arrays.
Pass a mapping, e.g. ``{"my_variable": False}``,
to toggle this feature per-variable individually.
This keyword may not be supported by all the backends.
decode_coords : bool or {"coordinates", "all"}, optional
Controls which variables are set as coordinate variables:
Expand Down
34 changes: 22 additions & 12 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from collections import defaultdict
from collections.abc import Hashable, Iterable, Mapping, MutableMapping
from typing import TYPE_CHECKING, Any, Literal, Union
from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -384,16 +384,26 @@ def _update_bounds_encoding(variables: T_Variables) -> None:
bounds_encoding.setdefault("calendar", encoding["calendar"])


T = TypeVar("T")


def _item_or_default(obj: Mapping[Any, T] | T, key: Hashable, default: T) -> T:
"""
Return item by key if obj is mapping and key is present, else return default value.
"""
return obj.get(key, default) if isinstance(obj, Mapping) else obj


def decode_cf_variables(
variables: T_Variables,
attributes: T_Attrs,
concat_characters: bool = True,
mask_and_scale: bool = True,
decode_times: bool = True,
concat_characters: bool | Mapping[str, bool] = True,
mask_and_scale: bool | Mapping[str, bool] = True,
decode_times: bool | Mapping[str, bool] = True,
decode_coords: bool | Literal["coordinates", "all"] = True,
drop_variables: T_DropVariables = None,
use_cftime: bool | None = None,
decode_timedelta: bool | None = None,
use_cftime: bool | Mapping[str, bool] | None = None,
decode_timedelta: bool | Mapping[str, bool] | None = None,
) -> tuple[T_Variables, T_Attrs, set[Hashable]]:
"""
Decode several CF encoded variables.
Expand Down Expand Up @@ -431,7 +441,7 @@ def stackable(dim: Hashable) -> bool:
if k in drop_variables:
continue
stack_char_dim = (
concat_characters
_item_or_default(concat_characters, k, True)
and v.dtype == "S1"
and v.ndim > 0
and stackable(v.dims[-1])
Expand All @@ -440,12 +450,12 @@ def stackable(dim: Hashable) -> bool:
new_vars[k] = decode_cf_variable(
k,
v,
concat_characters=concat_characters,
mask_and_scale=mask_and_scale,
decode_times=decode_times,
concat_characters=_item_or_default(concat_characters, k, True),
mask_and_scale=_item_or_default(mask_and_scale, k, True),
decode_times=_item_or_default(decode_times, k, True),
stack_char_dim=stack_char_dim,
use_cftime=use_cftime,
decode_timedelta=decode_timedelta,
use_cftime=_item_or_default(use_cftime, k, None),
decode_timedelta=_item_or_default(decode_timedelta, k, None),
)
except Exception as e:
raise type(e)(f"Failed to decode variable {k!r}: {e}") from e
Expand Down

0 comments on commit 7477fd1

Please sign in to comment.