Skip to content

Commit

Permalink
First working version
Browse files Browse the repository at this point in the history
  • Loading branch information
b8raoult committed Dec 1, 2021
1 parent 2af49fd commit 300faa2
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 8 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,6 @@ test.py
.vscode/
target
data
*.grib
*.grib2
?
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# multiurl

# ecmwf-opendata

A package to download ECMWF open data

### License
[Apache License 2.0](LICENSE) In applying this licence, ECMWF does not waive the privileges and immunities
Expand Down
121 changes: 117 additions & 4 deletions ecmwf/opendata/client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,120 @@
class Client:
#!/usr/bin/env python
# (C) Copyright 2021 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#
import datetime
import itertools
import logging
import os
import re
import json
import requests
from multiurl import download

from .date import fulldate

LOG = logging.getLogger(__name__)

def __init__(self, url):
PATTERN = (
"{_url}/{_yyyymmdd}/{_H}z/{resol}/{stream}/"
"{_yyyymmddHHMMSS}-{step}h-{stream}-{type}.grib2"
)


class Client:
def __init__(self, url, pattern=PATTERN):
self.url = url
self.pattern = pattern

self.url_components = {"date", "time"}

for i, p in enumerate(re.split(r"{([^}]*)}", self.pattern)):
if i % 2 != 0:
if not p.startswith("_"):
self.url_components.add(p)

LOG.debug("url_components are %s", self.url_components)

def retrieve(self, request=None, target=None, **kwargs):
if request is None:
request = {}

params = dict(
_url=self.url,
resol="0p4",
stream="oper",
type="fc",
date=-1,
step=0,
)
params.update(request)
params.update(kwargs)

if target is None:
target = params.pop("target", None)

for_urls = {}
for_index = {}
for k, v in list(params.items()):
if not isinstance(v, (list, tuple)):
v = [v]
if not k.startswith("_") and k not in self.url_components:
for_index[k] = set([str(x) for x in v])
else:
for_urls[k] = v

params = None

seen = set()
data_urls = []
for args in (
dict(zip(for_urls.keys(), x)) for x in itertools.product(*for_urls.values())
):
date = fulldate(args.pop("date", None), args.pop("time", None))
args["_yyyymmdd"] = date.strftime("%Y%m%d")
args["_H"] = date.strftime("%H")
args["_yyyymmddHHMMSS"] = date.strftime("%Y%m%d%H%M%S")
url = self.pattern.format(**args)
if url not in seen:
data_urls.append(url)
seen.add(url)

print(data_urls)
if for_index:
data_urls = self.get_parts(data_urls, for_index)

assert target is not None
download(data_urls, target=target)

def get_parts(self, data_urls, for_index):
print(for_index)

count = len(for_index)
result = []

for url in data_urls:
base, _ = os.path.splitext(url)
index_url = f"{base}.index"

r = requests.get(index_url)
r.raise_for_status()


parts = []
for line in r.iter_lines():
line = json.loads(line)
matches = 0
for name, values in for_index.items():
if line[name] in values:
matches +=1
if matches == count:
parts.append((line['_offset'], line['_length']))

def retrieve(self,request):
pass
if parts:
result.append((url, parts))
return result
40 changes: 40 additions & 0 deletions ecmwf/opendata/date.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python
# (C) Copyright 2021 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#
import datetime
import re

VALID_DATE = re.compile(r"\d\d\d\d-?\d\d-?\d\d([T\s]\d\d:\d\d(:\d\d)?)?Z?")


def fulldate(date, time=None):

if isinstance(date, datetime.date):
date = datetime.datetime(date.year, date.month, date.day)

if isinstance(date, int):
if date < 0:
date = datetime.datetime.utcnow() + datetime.timedelta(days=date)
date = datetime.datetime(date.year, date.month, date.day)
else:
date = datetime.datetime(date // 10000, date % 10000 // 100, date % 100)

if isinstance(date, str):

if VALID_DATE.match(date):
date = datetime.datetime.fromisoformat(date)

if not isinstance(date, datetime.datetime):
raise ValueError("Invalid date: {}".format(date))

if time is not None:
time = int(time)
date = datetime.datetime(date.year, date.month, date.day, time, 0, 0)

return date
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def read(fname):
author_email="software.support@ecmwf.int",
license="Apache License Version 2.0",
url="https://github.com/ecmwf/ecmwf-opendata",
packages=setuptools.find_namespace_packages(include=['ecmwf.*']),
packages=setuptools.find_namespace_packages(include=["ecmwf.*"]),
include_package_data=True,
install_requires=["multiurl"],
zip_safe=True,
Expand Down
3 changes: 2 additions & 1 deletion tests/test_opendata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ecmwf.opendata import Client


def test_opendata():
pass
Client(None)

0 comments on commit 300faa2

Please sign in to comment.