Skip to content

Commit

Permalink
Merge pull request #3 from spaceml-org/nkwalkthrough
Browse files Browse the repository at this point in the history
WIP: Update Solo and SDO data downloaders
  • Loading branch information
jejjohnson authored Dec 4, 2023
2 parents ed1a0ed + 4377e0d commit 93370ea
Show file tree
Hide file tree
Showing 18 changed files with 10,523 additions and 440 deletions.
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SDO_EMAIL=email@example.com
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,7 @@ datasets/*
\outputs/
\jbook/_build/
\_build/
\.idea/
\.idea/

**notebooks/*data*
**notebooks/*png*
19 changes: 14 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,32 @@ See [notebook](./notebooks/3.2_rastervision.ipynb) for details.
We can install it directly through pip

```bash
pip install git+https://github.com/jejjohnson/helio_tools
pip install git+https://github.com/spaceml-org/helio_tools
```

We also use poetry for the development environment.

```bash
git clone https://github.com/jejjohnson/helio_tools.git
git clone https://github.com/spaceml-org/helio_tools
cd helio_tools
conda create -n helio_tools python=3.11 poetry
conda activate helio_tools
poetry install
```



---
## References

**Software**

* [InstrumentToInstrument](https://github.com/RobertJaro/InstrumentToInstrument/tree/master) - Instrument-to-Instrument Translation.
* [InstrumentToInstrument](https://github.com/RobertJaro/InstrumentToInstrument/tree/master) - Instrument-to-Instrument Translation.

**Glossary**

* [SDO](https://sdo.gsfc.nasa.gov/) - Solar Dynamics Observatory.
* [AIA](https://sdo.gsfc.nasa.gov/data/) - Atmospheric Imaging Assembly.
* [HMI](https://sdo.gsfc.nasa.gov/data/) - Helioseismic and Magnetic Imager.
* [EVE](https://lasp.colorado.edu/home/eve/data/) - Extreme Ultraviolet Variability Experiment.
* [SolO](https://sci.esa.int/web/solar-orbiter) - Solar Orbiter.
* FSI - Full Sun Imager.
* [SOHO](https://soho.nascom.nasa.gov//) - Solar and Heliospheric Observatory.
2 changes: 2 additions & 0 deletions environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ dependencies:
- scikit-image
- astropy
- sunpy
- sunpy-soar
- conda-forge::aiapy
- pandas # Data structure
- scikit-learn # Machine Learning
- joblib # Parallelization
# PLOTTING LIBRARY
- matplotlib # standard plotting library
- seaborn # Stats viz library
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
DEFAULT_WAVELENGTHS = [171, 193, 211, 304]



class SDOData:
email: str
base_path: str
wavelengths: List[str | int | float]=DEFAULT_WAVELENGTHS
wavelengths: List[str | int | float] = DEFAULT_WAVELENGTHS
n_workers: int

def download_soho(

def download_sdo(
email: str, base_path: str,
wavelengths: List[str | int | float]=DEFAULT_WAVELENGTHS,
n_workers: int=5
wavelengths: List[str | int | float] = DEFAULT_WAVELENGTHS,
n_workers: int = 5
) -> None:
"""A simple download script do down
Expand All @@ -24,4 +24,4 @@ def download_soho(
wavelength (list[int|str|float]): the wavelengths we would like to download
n_workers (int): the number of workers for the download.
"""
pass
pass
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
"""
Script to download data products from the SDO database at http://jsoc.stanford.edu/
to a local directory. Uses the drms python package, the default package for downloading SDO data.
Documentation for DRMS: https://docs.sunpy.org/projects/drms/en/latest/
"""

import argparse
import logging
import multiprocessing
Expand All @@ -15,21 +22,76 @@

DEFAULT_WAVELENGTHS = [171, 193, 211, 304]


class SDODownloader:
def __init__(self, base_path: str = None,
email: str = None,
wavelengths: list[str | int | float] = DEFAULT_WAVELENGTHS,
n_workers: int = 5) -> None:
"""The SDO Downloader is an efficent way to download data from the SDO database.
Args:
base_path (str): the base path where the data should be downloaded to.
email (str): the email account needed
wavelength (list[int|str|float]): the wavelengths we would like to download
n_workers (int): the number of workers for the download.
def __init__(self, base_path, email, wavelengths=DEFAULT_WAVELENGTHS, n_workers=5):
Example Usage:
>>> downloader_sdo = SDODownloader(...)
>>> downloader_sdo.downloadDate(datetime(2022, 3, 1))
"""
self.ds_path = base_path
self.wavelengths = [str(wl) for wl in wavelengths]
self.n_workers = n_workers
[os.makedirs(os.path.join(base_path, wl), exist_ok=True) for wl in self.wavelengths + ['6173']]
[os.makedirs(os.path.join(base_path, wl), exist_ok=True)
for wl in self.wavelengths + ['6173']]

self.drms_client = drms.Client(email=email)

def download(self, sample):
def downloadDate(self, date: datetime):
"""Download FITS data for a specific date.
"""
id = date.isoformat()
logging.info('Start download: %s' % id)
time_param = '%sZ' % date.isoformat('_', timespec='seconds')

# query Magnetogram Instrument
ds_hmi = 'hmi.M_720s[%s]{magnetogram}' % time_param
keys_hmi = self.drms_client.keys(ds_hmi)
header_hmi, segment_hmi = self.drms_client.query(
ds_hmi, key=','.join(keys_hmi), seg='magnetogram')
if len(header_hmi) != 1 or np.any(header_hmi.QUALITY != 0):
self.fetchDataFallback(date)
return

# query EUV Instrument
ds_euv = 'aia.lev1_euv_12s[%s][%s]{image}' % (
time_param, ','.join(self.wavelengths))
keys_euv = self.drms_client.keys(ds_euv)
header_euv, segment_euv = self.drms_client.query(
ds_euv, key=','.join(keys_euv), seg='image')
if len(header_euv) != len(self.wavelengths) or np.any(header_euv.QUALITY != 0):
self.fetchDataFallback(date)
return

queue = []
for (idx, h), s in zip(header_hmi.iterrows(), segment_hmi.magnetogram):
queue += [(h.to_dict(), s, date)]
for (idx, h), s in zip(header_euv.iterrows(), segment_euv.image):
queue += [(h.to_dict(), s, date)]

with multiprocessing.Pool(self.n_workers) as p:
p.map(self.download, queue)
logging.info('Finished: %s' % id)

def download(self, sample: tuple[dict, str, datetime]):
header, segment, t = sample
try:
dir = os.path.join(self.ds_path, '%d' % header['WAVELNTH'])
map_path = os.path.join(dir, '%s.fits' % t.isoformat('T', timespec='seconds'))
map_path = os.path.join(dir, '%s.fits' %
t.isoformat('T', timespec='seconds'))
if os.path.exists(map_path):
return map_path
# load map
Expand All @@ -52,50 +114,22 @@ def download(self, sample):
logging.info(ex)
raise ex

def downloadDate(self, date):
id = date.isoformat()

logging.info('Start download: %s' % id)
# query Magnetogram
time_param = '%sZ' % date.isoformat('_', timespec='seconds')
ds_hmi = 'hmi.M_720s[%s]{magnetogram}' % time_param
keys_hmi = self.drms_client.keys(ds_hmi)
header_hmi, segment_hmi = self.drms_client.query(ds_hmi, key=','.join(keys_hmi), seg='magnetogram')
if len(header_hmi) != 1 or np.any(header_hmi.QUALITY != 0):
self.fetchDataFallback(date)
return

# query EUV
time_param = '%sZ' % date.isoformat('_', timespec='seconds')
ds_euv = 'aia.lev1_euv_12s[%s][%s]{image}' % (time_param, ','.join(self.wavelengths))
keys_euv = self.drms_client.keys(ds_euv)
header_euv, segment_euv = self.drms_client.query(ds_euv, key=','.join(keys_euv), seg='image')
if len(header_euv) != len(self.wavelengths) or np.any(header_euv.QUALITY != 0):
self.fetchDataFallback(date)
return

queue = []
for (idx, h), s in zip(header_hmi.iterrows(), segment_hmi.magnetogram):
queue += [(h.to_dict(), s, date)]
for (idx, h), s in zip(header_euv.iterrows(), segment_euv.image):
queue += [(h.to_dict(), s, date)]

with multiprocessing.Pool(self.n_workers) as p:
p.map(self.download, queue)
logging.info('Finished: %s' % id)

def fetchDataFallback(self, date):
def fetchDataFallback(self, date: datetime):
id = date.isoformat()

logging.info('Fallback download: %s' % id)
# query Magnetogram
t = date - timedelta(hours=24)
ds_hmi = 'hmi.M_720s[%sZ/12h@720s]{magnetogram}' % t.replace(tzinfo=None).isoformat('_', timespec='seconds')
ds_hmi = 'hmi.M_720s[%sZ/12h@720s]{magnetogram}' % t.replace(
tzinfo=None).isoformat('_', timespec='seconds')
keys_hmi = self.drms_client.keys(ds_hmi)
header_tmp, segment_tmp = self.drms_client.query(ds_hmi, key=','.join(keys_hmi), seg='magnetogram')
header_tmp, segment_tmp = self.drms_client.query(
ds_hmi, key=','.join(keys_hmi), seg='magnetogram')
assert len(header_tmp) != 0, 'No data found!'
date_str = header_tmp['DATE__OBS'].replace('MISSING', '').str.replace('60', '59') # fix date format
date_diff = np.abs(pd.to_datetime(date_str).dt.tz_localize(None) - date)
date_str = header_tmp['DATE__OBS'].replace(
'MISSING', '').str.replace('60', '59') # fix date format
date_diff = np.abs(pd.to_datetime(
date_str).dt.tz_localize(None) - date)
# sort and filter
header_tmp['date_diff'] = date_diff
header_tmp.sort_values('date_diff')
Expand All @@ -116,10 +150,13 @@ def fetchDataFallback(self, date):
euv_ds = 'aia.lev1_euv_12s[%sZ/12h@12s][%s]{image}' % (
t.replace(tzinfo=None).isoformat('_', timespec='seconds'), wl)
keys_euv = self.drms_client.keys(euv_ds)
header_tmp, segment_tmp = self.drms_client.query(euv_ds, key=','.join(keys_euv), seg='image')
header_tmp, segment_tmp = self.drms_client.query(
euv_ds, key=','.join(keys_euv), seg='image')
assert len(header_tmp) != 0, 'No data found!'
date_str = header_tmp['DATE__OBS'].replace('MISSING', '').str.replace('60', '59') # fix date format
date_diff = (pd.to_datetime(date_str).dt.tz_localize(None) - date).abs()
date_str = header_tmp['DATE__OBS'].replace(
'MISSING', '').str.replace('60', '59') # fix date format
date_diff = (pd.to_datetime(
date_str).dt.tz_localize(None) - date).abs()
# sort and filter
header_tmp['date_diff'] = date_diff
header_tmp.sort_values('date_diff')
Expand All @@ -145,9 +182,12 @@ def fetchDataFallback(self, date):


def main():
email = "chri.schirni@hotmail.de"
base_path = "/home/juanjohn/data/helio/sdo"
downloader_sdo = SDODownloader(base_path=base_path, email=email, n_workers=8)
import os
email = os.getenv('SDO_EMAIL')
base_path = os.path.join(os.path.expanduser('~'), 'sdo-data')

downloader_sdo = SDODownloader(
base_path=base_path, email=email, n_workers=8)

start_date = datetime(2022, 3, 1)
end_date = datetime(2023, 3, 2)
Expand Down
Empty file.
Loading

0 comments on commit 93370ea

Please sign in to comment.