diff --git a/config/example/datamodule.yaml b/config/example/datamodule.yaml index c1f1bea..4725d74 100644 --- a/config/example/datamodule.yaml +++ b/config/example/datamodule.yaml @@ -3,9 +3,9 @@ dataloader: datasets_spec: msg: - data_path: /path/to/your/data + data_path: /path/to/msg/data goes: - data_path: /path/to/your/data + data_path: /path/to/goes/data load_coords: True load_cloudmask: True diff --git a/config/example/download.yaml b/config/example/download.yaml index cffed2d..4c93590 100644 --- a/config/example/download.yaml +++ b/config/example/download.yaml @@ -1,15 +1,15 @@ # PERIOD period: - start_date: '2020-10-01' + start_date: '2020-01-01' start_time: '00:00:00' - end_date: '2020-10-31' + end_date: '2020-12-31' end_time: '23:59:00' # CLOUD MASK cloud_mask: True # PATH FOR SAVING DATA -save_dir: data +save_dir: /path/to/save/data defaults: - _self_ diff --git a/config/example/geoprocess.yaml b/config/example/geoprocess.yaml index 9198472..e623b4e 100644 --- a/config/example/geoprocess.yaml +++ b/config/example/geoprocess.yaml @@ -1,8 +1,8 @@ # PATH WITH RAW DATA -read_path: data +read_path: /path/to/donwloaded/data # PATH FOR SAVING GEOPROCESSED DATA -save_path: data +save_path: /path/to/save/geoprocessed/data defaults: - _self_ diff --git a/config/example/main.yaml b/config/example/main.yaml index 9749232..9e5f3e1 100644 --- a/config/example/main.yaml +++ b/config/example/main.yaml @@ -1,7 +1,7 @@ defaults: - download - geoprocess - - patch - - satellite: terra + # - patch + - satellite: msg -stage: patch \ No newline at end of file +stage: geoprocess \ No newline at end of file diff --git a/config/example/patch.yaml b/config/example/patch.yaml index bfc1bda..b08f7fa 100644 --- a/config/example/patch.yaml +++ b/config/example/patch.yaml @@ -1,8 +1,8 @@ # PATH WITH GEOPROCESSED DATA -read_path: data +read_path: /path/to/geoprocessed/data # PATH FOR SAVING PATCHES -save_path: data +save_path: /path/to/save/patches # PATCH PARAMETERS patch_size: 256 @@ -11,7 +11,7 @@ stride_size: 256 # NAN CUTOFF nan_cutoff: 0.5 -# FILETYPE TO SAVE [nc = netcdf, np = numpy] +# FILETYPE TO SAVE [nc = netcdf, tif = geotiff, np = numpy, npz = compressed numpy] save_filetype: nc defaults: diff --git a/config/example/preprocess.yaml b/config/example/preprocess.yaml deleted file mode 100644 index e69de29..0000000 diff --git a/config/example/satellite/aqua.yaml b/config/example/satellite/aqua.yaml index 20c048d..90d7d8d 100644 --- a/config/example/satellite/aqua.yaml +++ b/config/example/satellite/aqua.yaml @@ -13,8 +13,6 @@ geoprocess: save_path: ${save_path}/aqua/geoprocessed satellite: aqua -# preprocess: - patch: _target_: rs_tools._src.preprocessing.prepatcher.prepatch read_path: ${read_path}/aqua/geoprocessed diff --git a/config/example/satellite/goes.yaml b/config/example/satellite/goes.yaml index 56a48a4..29f07ed 100644 --- a/config/example/satellite/goes.yaml +++ b/config/example/satellite/goes.yaml @@ -5,20 +5,18 @@ download: start_time: ${period.start_time} end_date: ${period.end_date} end_time: ${period.end_time} - daily_window_t0: "14:00:00" - daily_window_t1: "20:00:00" + daily_window_t0: "00:00:00" + daily_window_t1: "23:59:00" time_step: "1:00:00" geoprocess: _target_: rs_tools._src.geoprocessing.goes.geoprocessor_goes16.geoprocess - read_path: ${read_path}/goes16/raw - save_path: ${save_path}/goes16/geoprocessed - resolution: null - region: "-130 -15 -90 5" + read_path: ${read_path}/goes-data/2020/split_${split} + save_path: ${save_path}/goes-geoprocessed/goes + resolution: null # MSG resolution = 3000.40 + region: null resample_method: bilinear -# preprocess: - patch: _target_: rs_tools._src.preprocessing.prepatcher.prepatch read_path: ${read_path}/goes16/geoprocessed diff --git a/config/example/satellite/msg.yaml b/config/example/satellite/msg.yaml index 8b8c86d..a7388a8 100644 --- a/config/example/satellite/msg.yaml +++ b/config/example/satellite/msg.yaml @@ -5,20 +5,18 @@ download: start_time: ${period.start_time} end_date: ${period.end_date} end_time: ${period.end_time} - daily_window_t0: "9:00:00" - daily_window_t1: "17:00:00" + daily_window_t0: "00:00:00" + daily_window_t1: "23:59:00" time_step: "1:00:00" geoprocess: _target_: rs_tools._src.geoprocessing.msg.geoprocessor_msg.geoprocess - read_path: ${read_path}/msg/raw - save_path: ${save_path}/msg/geoprocessed + read_path: ${read_path}/msg-data/msg + save_path: ${save_path}/msg-geoprocessed/msg resolution: null - region: "-70 -15 20 5" + region: null resample_method: bilinear -# preprocess: - patch: _target_: rs_tools._src.preprocessing.prepatcher.prepatch read_path: ${read_path}/msg/geoprocessed diff --git a/config/example/satellite/terra.yaml b/config/example/satellite/terra.yaml index ca158fd..aa31fd8 100644 --- a/config/example/satellite/terra.yaml +++ b/config/example/satellite/terra.yaml @@ -13,8 +13,6 @@ geoprocess: save_path: ${save_path}/terra/geoprocessed satellite: terra -# preprocess: - patch: _target_: rs_tools._src.preprocessing.prepatcher.prepatch read_path: ${read_path}/terra/geoprocessed diff --git a/environments/environment.yaml b/environments/environment.yaml index faa7240..fea3f32 100644 --- a/environments/environment.yaml +++ b/environments/environment.yaml @@ -38,6 +38,9 @@ dependencies: - typer - einops - cartopy + - google-cloud-storage + - gcsfs + - xrpatcher # formatting - rasterio - black diff --git a/notebooks/dev/goes/1.6-additional-download.ipynb b/notebooks/dev/goes/1.6-additional-download.ipynb new file mode 100644 index 0000000..e601000 --- /dev/null +++ b/notebooks/dev/goes/1.6-additional-download.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import glob\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "path = '/mnt/disks/goes-data/2020/CM/'" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "def get_list_filenames(data_path: str=\"./\", ext: str=\"*\"):\n", + " \"\"\"\n", + " Loads a list of file names within a directory.\n", + "\n", + " Args:\n", + " data_path (str, optional): The directory path to search for files. Defaults to \"./\".\n", + " ext (str, optional): The file extension to filter the search. Defaults to \"*\".\n", + "\n", + " Returns:\n", + " List[str]: A sorted list of file names matching the given extension within the directory.\n", + " \"\"\"\n", + " pattern = f\"*{ext}\"\n", + " return sorted(glob.glob(os.path.join(data_path, \"**\", pattern), recursive=True))" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "files = get_list_filenames(path, ext=\"nc\")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8733" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(files)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "rs_tools", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/dev/goes/1.7-separate-files.ipynb b/notebooks/dev/goes/1.7-separate-files.ipynb new file mode 100644 index 0000000..26a9775 --- /dev/null +++ b/notebooks/dev/goes/1.7-separate-files.ipynb @@ -0,0 +1,1200 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "L1b_path = '/mnt/disks/goes-data/2020/L1b/'\n", + "CM_path = '/mnt/disks/goes-data/2020/CM/'" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "L1b_folders = os.listdir(L1b_path)\n", + "L1b_folders.sort()\n", + "CM_folders = os.listdir(CM_path)\n", + "CM_folders.sort()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(365, 365)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(CM_folders), len(L1b_folders)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "split_size = (len(L1b_folders) // 10) + 1" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "# Split the L1b folders into 10 groups and move to a different directory\n", + "\n", + "for i in range(10):\n", + " new_dir = f'/mnt/disks/goes-data/2020/split_{i}/'\n", + " os.makedirs(new_dir, exist_ok=True)\n", + " os.makedirs(new_dir + 'L1b/', exist_ok=True)\n", + " os.makedirs(new_dir + 'CM/', exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(10):\n", + " start = i * split_size\n", + " end = (i + 1) * split_size\n", + " if end > len(L1b_folders):\n", + " end = len(L1b_folders)\n", + " new_dir_L1b = f'/mnt/disks/goes-data/2020/split_{i}/L1b/'\n", + " new_dir_CM = f'/mnt/disks/goes-data/2020/split_{i}/CM/'\n", + " for folder in L1b_folders[start:end]:\n", + " os.rename(os.path.join(L1b_path, folder), os.path.join(new_dir_L1b, folder))\n", + " for folder in CM_folders[start:end]:\n", + " os.rename(os.path.join(CM_path, folder), os.path.join(new_dir_CM, folder))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Undo changes and move all files back\n", + "\n", + "for i in range(10):\n", + " new_dir_L1b = f'/mnt/disks/goes-data/2020/split_{i}/L1b/'\n", + " new_dir_CM = f'/mnt/disks/goes-data/2020/split_{i}/CM/'\n", + " for folder in os.listdir(new_dir_L1b):\n", + " os.rename(os.path.join(new_dir_L1b, folder), os.path.join(L1b_path, folder))\n", + " for folder in os.listdir(new_dir_CM):\n", + " os.rename(os.path.join(new_dir_CM, folder), os.path.join(CM_path, folder))\n", + " os.rmdir(new_dir_L1b)\n", + " os.rmdir(new_dir_CM)\n", + " os.rmdir(f'/mnt/disks/goes-data/2020/split_{i}/')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/envs/rs_tools/lib/python3.11/site-packages/goes2go/data.py:519: FutureWarning: 'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.\n", + " within=pd.to_timedelta(config[\"nearesttime\"].get(\"within\", \"1H\")),\n", + "/opt/conda/envs/rs_tools/lib/python3.11/site-packages/goes2go/NEW.py:188: FutureWarning: 'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.\n", + " within=pd.to_timedelta(config[\"nearesttime\"].get(\"within\", \"1H\")),\n" + ] + } + ], + "source": [ + "import xarray as xr\n", + "import autoroot\n", + "from rs_tools._src.geoprocessing.goes.reproject import add_goes16_crs\n", + "\n", + "goes_ds = xr.open_dataset('/mnt/disks/goes-geoprocessed/goes/20200208050009_goes16.nc')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset> Size: 1GB\n", + "Dimensions: (band: 16, y: 3623, x: 3623, time: 1,\n", + " band_wavelength: 16)\n", + "Coordinates:\n", + " * x (x) float64 29kB -5.433e+06 -5.43e+06 ... 5.434e+06\n", + " * y (y) float64 29kB 5.433e+06 5.43e+06 ... -5.434e+06\n", + " * time (time) <U16 64B '2020-02-08 05:04'\n", + " * band_wavelength (band_wavelength) float32 64B 0.47 0.64 ... 13.27\n", + " latitude (y, x) float32 53MB ...\n", + " longitude (y, x) float32 53MB ...\n", + " * band (band) int64 128B 1 2 3 4 5 6 ... 11 12 13 14 15 16\n", + " cloud_mask (y, x) float64 105MB ...\n", + "Data variables:\n", + " Rad (band, y, x) float32 840MB ...\n", + " goes_imager_projection int64 8B ...\n", + "Attributes: (12/30)\n", + " naming_authority: gov.nesdis.noaa\n", + " Conventions: CF-1.7\n", + " standard_name_vocabulary: CF Standard Name Table (v35, 20 July 2016)\n", + " institution: DOC/NOAA/NESDIS > U.S. Department of Commerce,...\n", + " project: GOES\n", + " production_site: WCDAS\n", + " ... ...\n", + " timeline_id: ABI Mode 6\n", + " date_created: 2020-02-08T05:09:45.7Z\n", + " time_coverage_start: 2020-02-08T05:00:09.1Z\n", + " time_coverage_end: 2020-02-08T05:09:39.9Z\n", + " LUT_Filenames: SpaceLookParams(FM1A_CDRL79RevP_PR_08_01_01)-6...\n", + " id: 75f7f563-5db0-4adc-9fb7-d144223b4ef8
<xarray.Dataset> Size: 945MB\n", + "Dimensions: (band: 16, y: 3623, x: 3623, time: 1,\n", + " band_wavelength: 16)\n", + "Coordinates:\n", + " * x (x) float64 29kB -5.433e+06 -5.43e+06 ... 5.434e+06\n", + " * y (y) float64 29kB 5.433e+06 5.43e+06 ... -5.434e+06\n", + " * time (time) <U16 64B '2020-02-08 05:04'\n", + " * band_wavelength (band_wavelength) float32 64B 0.47 0.64 ... 13.27\n", + " goes_imager_projection int64 8B 0\n", + " * band (band) int64 128B 1 2 3 4 5 6 ... 11 12 13 14 15 16\n", + "Data variables:\n", + " Rad (band, y, x) float32 840MB ...\n", + " cloud_mask (y, x) float64 105MB ...\n", + "Attributes: (12/30)\n", + " naming_authority: gov.nesdis.noaa\n", + " Conventions: CF-1.7\n", + " standard_name_vocabulary: CF Standard Name Table (v35, 20 July 2016)\n", + " institution: DOC/NOAA/NESDIS > U.S. Department of Commerce,...\n", + " project: GOES\n", + " production_site: WCDAS\n", + " ... ...\n", + " timeline_id: ABI Mode 6\n", + " date_created: 2020-02-08T05:09:45.7Z\n", + " time_coverage_start: 2020-02-08T05:00:09.1Z\n", + " time_coverage_end: 2020-02-08T05:09:39.9Z\n", + " LUT_Filenames: SpaceLookParams(FM1A_CDRL79RevP_PR_08_01_01)-6...\n", + " id: 75f7f563-5db0-4adc-9fb7-d144223b4ef8