Skip to content

Commit

Permalink
Merge pull request #19 from nsidc/support-ae-si12
Browse files Browse the repository at this point in the history
Refactor AMSR-related fetch code into `amsr` subpackage
  • Loading branch information
trey-stafford authored Jan 10, 2024
2 parents 2acca57 + 8368bbb commit 44e30ce
Show file tree
Hide file tree
Showing 10 changed files with 93 additions and 87 deletions.
Empty file.
11 changes: 3 additions & 8 deletions pm_tb_data/fetch/ae_si.py → pm_tb_data/fetch/amsr/ae_si.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
import xarray as xr

from pm_tb_data._types import Hemisphere
from pm_tb_data.fetch import au_si
from pm_tb_data.fetch.amsr.util import AMSR_RESOLUTIONS, normalize_amsr_tbs


def get_ae_si_tbs_from_disk(
*,
date: dt.date,
hemisphere: Hemisphere,
data_dir: Path,
resolution: au_si.AU_SI_RESOLUTIONS,
resolution: AMSR_RESOLUTIONS,
) -> xr.Dataset:
"""Return TB data from AE_SI12."""
expected_dir = data_dir / date.strftime("%Y.%m.%d")
Expand All @@ -36,12 +36,7 @@ def get_ae_si_tbs_from_disk(
# of the variables (no subgroups)
engine="netcdf4",
) as ds:
# TODO: extract normalize func to amsr util module? Make it more clear
# this is used generically for the AU/SI_* products. Need to be careful
# - not everything from the au_si module can be used for ae_si. E.g.,
# the data are stored differently and require a different invocation of
# `xr.open_dataset`
normalized = au_si._normalize_au_si_tbs(
normalized = normalize_amsr_tbs(
data_fields=ds,
resolution=resolution,
hemisphere=hemisphere,
Expand Down
54 changes: 9 additions & 45 deletions pm_tb_data/fetch/au_si.py → pm_tb_data/fetch/amsr/au_si.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@
import datetime as dt
import re
from pathlib import Path
from typing import Literal

import xarray as xr
from loguru import logger

from pm_tb_data._types import Hemisphere
from pm_tb_data.fetch.amsr.util import AMSR_RESOLUTIONS, normalize_amsr_tbs

AU_SI_RESOLUTIONS = Literal["25", "12"]
AU_SI_FN_REGEX = re.compile(
r"AMSR_U2_L3_SeaIce12km_(?P<file_type>P|R)(?P<file_version>.*)_(?P<file_date>\d{8}).he5"
)
Expand All @@ -22,7 +21,7 @@
def get_au_si_fp_on_disk(
data_dir: Path,
date: dt.date,
resolution: AU_SI_RESOLUTIONS,
resolution: AMSR_RESOLUTIONS,
) -> Path:
"""Get the filepath to a AU_SI data file on disk."""
glob_pattern = f"AMSR_U2_L3_SeaIce{resolution}km_*_{date:%Y%m%d}.he5"
Expand All @@ -40,7 +39,7 @@ def get_au_si_fp_on_disk(
def _get_au_si_data_fields(
*,
hemisphere: Hemisphere,
resolution: AU_SI_RESOLUTIONS,
resolution: AMSR_RESOLUTIONS,
data_filepath: Path,
) -> xr.Dataset:
"""Return the data fields from the given `data_filepath` as an xr ds.
Expand All @@ -61,47 +60,10 @@ def _get_au_si_data_fields(
return ds


def _normalize_au_si_tbs(
data_fields: xr.Dataset,
resolution: AU_SI_RESOLUTIONS,
hemisphere: Hemisphere,
) -> xr.Dataset:
"""Normalize the given AU_SI* Tbs.
Currently only returns daily average channels.
Filters out variables that are not Tbs and renames Tbs to the 'standard'
{channel}{polarization} name. E.g., `SI_25km_NH_06H_DAY` becomes `h06`
"""
var_pattern = re.compile(
f"SI_{resolution}km_{hemisphere[0].upper()}H_"
r"(?P<channel>\d{2})(?P<polarization>H|V)_DAY"
)

tb_data_mapping = {}
for var in data_fields.keys():
if match := var_pattern.match(str(var)):
# Preserve variable attrs, but rename the variable and it's dims for
# consistency.
tb_data_mapping[
f"{match.group('polarization').lower()}{match.group('channel')}"
] = xr.DataArray(
data_fields[var].data,
dims=("fake_y", "fake_x"),
attrs=data_fields[var].attrs,
)

normalized = xr.Dataset(
tb_data_mapping,
)

return normalized


def get_au_si_tbs_from_disk(
*,
hemisphere: Hemisphere,
resolution: AU_SI_RESOLUTIONS,
resolution: AMSR_RESOLUTIONS,
data_filepath: Path,
) -> xr.Dataset:
"""Access AU_SI brightness temperatures from data files on local disk."""
Expand All @@ -110,8 +72,10 @@ def get_au_si_tbs_from_disk(
resolution=resolution,
data_filepath=data_filepath,
)
tb_data = _normalize_au_si_tbs(
data_fields, resolution=resolution, hemisphere=hemisphere
tb_data = normalize_amsr_tbs(
data_fields,
resolution=resolution,
hemisphere=hemisphere,
)

return tb_data
Expand All @@ -121,7 +85,7 @@ def get_au_si_tbs(
*,
date: dt.date,
hemisphere: Hemisphere,
resolution: AU_SI_RESOLUTIONS,
resolution: AMSR_RESOLUTIONS,
) -> xr.Dataset:
"""Access NSIDC AU_SI{resolution} data from disk.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
from loguru import logger

from pm_tb_data._types import Hemisphere
from pm_tb_data.fetch import au_si
from pm_tb_data.fetch.amsr import au_si
from pm_tb_data.fetch.amsr.util import AMSR_RESOLUTIONS
from pm_tb_data.fetch.errors import FetchRemoteDataError

EXPECTED_LANCE_AMSR2_FILE_VERSION = "04"
Expand Down Expand Up @@ -233,7 +234,7 @@ def access_local_lance_data(
Returns full orbit daily average data TBs.
"""
data_resolution: au_si.AU_SI_RESOLUTIONS = "12"
data_resolution: AMSR_RESOLUTIONS = "12"
data_filepath = au_si.get_au_si_fp_on_disk(
data_dir=data_dir,
date=date,
Expand Down
45 changes: 45 additions & 0 deletions pm_tb_data/fetch/amsr/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import re
from typing import Literal

import xarray as xr

from pm_tb_data._types import Hemisphere

AMSR_RESOLUTIONS = Literal["25", "12"]


def normalize_amsr_tbs(
data_fields: xr.Dataset,
resolution: AMSR_RESOLUTIONS,
hemisphere: Hemisphere,
) -> xr.Dataset:
"""Normalize the given Tbs from AU_SI* and AE_SI* products.
Currently only returns daily average channels.
Filters out variables that are not Tbs and renames Tbs to the 'standard'
{channel}{polarization} name. E.g., `SI_25km_NH_06H_DAY` becomes `h06`
"""
var_pattern = re.compile(
f"SI_{resolution}km_{hemisphere[0].upper()}H_"
r"(?P<channel>\d{2})(?P<polarization>H|V)_DAY"
)

tb_data_mapping = {}
for var in data_fields.keys():
if match := var_pattern.match(str(var)):
# Preserve variable attrs, but rename the variable and it's dims for
# consistency.
tb_data_mapping[
f"{match.group('polarization').lower()}{match.group('channel')}"
] = xr.DataArray(
data_fields[var].data,
dims=("fake_y", "fake_x"),
attrs=data_fields[var].attrs,
)

normalized = xr.Dataset(
tb_data_mapping,
)

return normalized
2 changes: 1 addition & 1 deletion tests/integration/test_ae_si.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path

from pm_tb_data._types import NORTH
from pm_tb_data.fetch.ae_si import get_ae_si_tbs_from_disk
from pm_tb_data.fetch.amsr.ae_si import get_ae_si_tbs_from_disk

# Directory in which AE_SI12 V3 data is expected to be found.
# NOTE/TODO: This path is specifc to NSIDC infrastructure. Make more generic?
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_au_si.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path

from pm_tb_data._types import NORTH
from pm_tb_data.fetch.au_si import get_au_si_tbs
from pm_tb_data.fetch.amsr.au_si import get_au_si_tbs

# Directory in which AU_SI12 V3 data is expected to be found.
# NOTE/TODO: This path is specifc to NSIDC infrastructure. Make more generic?
Expand Down
29 changes: 29 additions & 0 deletions tests/unit/test_amsr_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np
import xarray as xr
from xarray.testing import assert_equal

from pm_tb_data._types import NORTH
from pm_tb_data.fetch.amsr.util import normalize_amsr_tbs


def test_normalize_amsr_tbs():
mock_au_si_data_fields = xr.Dataset(
data_vars={
"SI_25km_NH_06H_DAY": (("Y", "X"), np.arange(0, 6).reshape(2, 3)),
"SI_25km_NH_89V_DAY": (("Y", "X"), np.arange(5, 11).reshape(2, 3)),
},
)

expected = xr.Dataset(
data_vars={
"h06": (("fake_y", "fake_x"), np.arange(0, 6).reshape(2, 3)),
"v89": (("fake_y", "fake_x"), np.arange(5, 11).reshape(2, 3)),
},
)
actual = normalize_amsr_tbs(
data_fields=mock_au_si_data_fields,
resolution="25",
hemisphere=NORTH,
)

assert_equal(actual, expected)
30 changes: 1 addition & 29 deletions tests/unit/test_au_si.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,7 @@
import datetime as dt
from pathlib import Path

import numpy as np
import xarray as xr
from xarray.testing import assert_equal

from pm_tb_data._types import NORTH
from pm_tb_data.fetch import au_si


def test__normalize_au_si_tbs():
mock_au_si_data_fields = xr.Dataset(
data_vars={
"SI_25km_NH_06H_DAY": (("Y", "X"), np.arange(0, 6).reshape(2, 3)),
"SI_25km_NH_89V_DAY": (("Y", "X"), np.arange(5, 11).reshape(2, 3)),
},
)

expected = xr.Dataset(
data_vars={
"h06": (("fake_y", "fake_x"), np.arange(0, 6).reshape(2, 3)),
"v89": (("fake_y", "fake_x"), np.arange(5, 11).reshape(2, 3)),
},
)
actual = au_si._normalize_au_si_tbs(
data_fields=mock_au_si_data_fields,
resolution="25",
hemisphere=NORTH,
)

assert_equal(actual, expected)
from pm_tb_data.fetch.amsr import au_si


def test_get_au_si_fp_on_disk(fs):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_lance_amsr2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest

import pm_tb_data.fetch.lance_amsr2 as lance_amsr2
import pm_tb_data.fetch.amsr.lance_amsr2 as lance_amsr2
from pm_tb_data.fetch.errors import FetchRemoteDataError


Expand Down

0 comments on commit 44e30ce

Please sign in to comment.