Skip to content

Commit

Permalink
Merge pull request #10 from nsidc/handle-404-granule-lance-amsr2
Browse files Browse the repository at this point in the history
LANCE AMSR2 fetch: Handle 404 granule
  • Loading branch information
trey-stafford authored Oct 11, 2023
2 parents 9dca60e + f584d0c commit 4a724c4
Showing 1 changed file with 23 additions and 1 deletion.
24 changes: 23 additions & 1 deletion pm_tb_data/fetch/lance_amsr2.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,22 @@ def _filter_out_last_day(*, granules_by_date: GranuleInfoByDate) -> GranuleInfoB
# actual data. See associated issue here:
# https://github.com/nsidc/earthaccess/issues/307
def download_latest_lance_files(
*, output_dir: Path, overwrite: bool = False
*,
output_dir: Path,
overwrite: bool = False,
fail_on_404: bool = False,
) -> list[Path]:
"""Download the latest LANCE AMSR2 data files that are ready for NRT.
The latest available day of data ready for NRT is the day before the latest
available file, unless the latest available file is an `R` file.
NOTE: because of a problem with CMR providing results for ganules that do
not exist at the specified download location, attempts to fetch data files
that result in a 404 response code (Not Found) will cause a warning to be
logged and that granule will be skipped. Setting `fail_on_404=True` will
cause an HttpError to be raised for these cases instead.
Returns a list of paths to newly downloaded data.
"""
# LANCE only has the last 14 days worth of data at any given time. For
Expand Down Expand Up @@ -187,6 +196,19 @@ def download_latest_lance_files(
stream=True,
headers={"User-Agent": "pm_tb_data"},
) as resp:
if resp.status_code == 404 and not fail_on_404:
# If we receive a 404 response for a granule, log a warning and
# skip. We have observed this problem starting on Oct. 10,
# 2023. CMR reports an R file for 2023-10-09, but only a P file
# exists. This issue was raised on the earthdata forum, but
# cannot be fixed on NSIDC's side.
logger.warning(
"Got a 404 response for granule reported by CMR:"
f" url={granule_by_date['data_url']}."
" This may be a problem with the LANCE CMR record. Skipping..."
)
continue

resp.raise_for_status()
output_paths.append(output_path)
# TODO: it would be ideal to write this to a temp dir, then move it
Expand Down

0 comments on commit 4a724c4

Please sign in to comment.