From 300faa2a2f644a777d7a46faf8a75496e13d5afb Mon Sep 17 00:00:00 2001 From: Baudouin Raoult Date: Wed, 1 Dec 2021 17:09:58 +0000 Subject: [PATCH] First working version --- .gitignore | 3 + README.md | 4 +- ecmwf/opendata/client.py | 121 +++++++++++++++++++++++++++++++++++++-- ecmwf/opendata/date.py | 40 +++++++++++++ setup.py | 2 +- tests/test_opendata.py | 3 +- 6 files changed, 165 insertions(+), 8 deletions(-) create mode 100644 ecmwf/opendata/date.py diff --git a/.gitignore b/.gitignore index 8d06a8d..4e366fd 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,6 @@ test.py .vscode/ target data +*.grib +*.grib2 +? diff --git a/README.md b/README.md index 444171f..2657451 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# multiurl - +# ecmwf-opendata +A package to download ECMWF open data ### License [Apache License 2.0](LICENSE) In applying this licence, ECMWF does not waive the privileges and immunities diff --git a/ecmwf/opendata/client.py b/ecmwf/opendata/client.py index aa5cfef..f99e8b9 100644 --- a/ecmwf/opendata/client.py +++ b/ecmwf/opendata/client.py @@ -1,7 +1,120 @@ -class Client: +#!/usr/bin/env python +# (C) Copyright 2021 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# +import datetime +import itertools +import logging +import os +import re +import json +import requests +from multiurl import download + +from .date import fulldate + +LOG = logging.getLogger(__name__) - def __init__(self, url): +PATTERN = ( + "{_url}/{_yyyymmdd}/{_H}z/{resol}/{stream}/" + "{_yyyymmddHHMMSS}-{step}h-{stream}-{type}.grib2" +) + + +class Client: + def __init__(self, url, pattern=PATTERN): self.url = url + self.pattern = pattern + + self.url_components = {"date", "time"} + + for i, p in enumerate(re.split(r"{([^}]*)}", self.pattern)): + if i % 2 != 0: + if not p.startswith("_"): + self.url_components.add(p) + + LOG.debug("url_components are %s", self.url_components) + + def retrieve(self, request=None, target=None, **kwargs): + if request is None: + request = {} + + params = dict( + _url=self.url, + resol="0p4", + stream="oper", + type="fc", + date=-1, + step=0, + ) + params.update(request) + params.update(kwargs) + + if target is None: + target = params.pop("target", None) + + for_urls = {} + for_index = {} + for k, v in list(params.items()): + if not isinstance(v, (list, tuple)): + v = [v] + if not k.startswith("_") and k not in self.url_components: + for_index[k] = set([str(x) for x in v]) + else: + for_urls[k] = v + + params = None + + seen = set() + data_urls = [] + for args in ( + dict(zip(for_urls.keys(), x)) for x in itertools.product(*for_urls.values()) + ): + date = fulldate(args.pop("date", None), args.pop("time", None)) + args["_yyyymmdd"] = date.strftime("%Y%m%d") + args["_H"] = date.strftime("%H") + args["_yyyymmddHHMMSS"] = date.strftime("%Y%m%d%H%M%S") + url = self.pattern.format(**args) + if url not in seen: + data_urls.append(url) + seen.add(url) + + print(data_urls) + if for_index: + data_urls = self.get_parts(data_urls, for_index) + + assert target is not None + download(data_urls, target=target) + + def get_parts(self, data_urls, for_index): + print(for_index) + + count = len(for_index) + result = [] + + for url in data_urls: + base, _ = os.path.splitext(url) + index_url = f"{base}.index" + + r = requests.get(index_url) + r.raise_for_status() + + + parts = [] + for line in r.iter_lines(): + line = json.loads(line) + matches = 0 + for name, values in for_index.items(): + if line[name] in values: + matches +=1 + if matches == count: + parts.append((line['_offset'], line['_length'])) - def retrieve(self,request): - pass + if parts: + result.append((url, parts)) + return result diff --git a/ecmwf/opendata/date.py b/ecmwf/opendata/date.py new file mode 100644 index 0000000..bdce0b3 --- /dev/null +++ b/ecmwf/opendata/date.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# (C) Copyright 2021 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# +import datetime +import re + +VALID_DATE = re.compile(r"\d\d\d\d-?\d\d-?\d\d([T\s]\d\d:\d\d(:\d\d)?)?Z?") + + +def fulldate(date, time=None): + + if isinstance(date, datetime.date): + date = datetime.datetime(date.year, date.month, date.day) + + if isinstance(date, int): + if date < 0: + date = datetime.datetime.utcnow() + datetime.timedelta(days=date) + date = datetime.datetime(date.year, date.month, date.day) + else: + date = datetime.datetime(date // 10000, date % 10000 // 100, date % 100) + + if isinstance(date, str): + + if VALID_DATE.match(date): + date = datetime.datetime.fromisoformat(date) + + if not isinstance(date, datetime.datetime): + raise ValueError("Invalid date: {}".format(date)) + + if time is not None: + time = int(time) + date = datetime.datetime(date.year, date.month, date.day, time, 0, 0) + + return date diff --git a/setup.py b/setup.py index d6d8a86..f4b47df 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ def read(fname): author_email="software.support@ecmwf.int", license="Apache License Version 2.0", url="https://github.com/ecmwf/ecmwf-opendata", - packages=setuptools.find_namespace_packages(include=['ecmwf.*']), + packages=setuptools.find_namespace_packages(include=["ecmwf.*"]), include_package_data=True, install_requires=["multiurl"], zip_safe=True, diff --git a/tests/test_opendata.py b/tests/test_opendata.py index 00e60d5..d049081 100644 --- a/tests/test_opendata.py +++ b/tests/test_opendata.py @@ -1,4 +1,5 @@ from ecmwf.opendata import Client + def test_opendata(): - pass + Client(None)