Skip to content

Commit

Permalink
read shp from local and minio
Browse files Browse the repository at this point in the history
  • Loading branch information
silencesoup committed Aug 13, 2024
1 parent e955ebe commit be48fd1
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 13 deletions.
2 changes: 2 additions & 0 deletions environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ dependencies:
- grip
- pytest
- pytest-runner
- tzfpy
- geopandas

# for pypi package
- pip
121 changes: 121 additions & 0 deletions hydroutils/hydro_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import os
from pathlib import Path
import boto3
import s3fs
import yaml
from minio import Minio
import psycopg2


def read_setting(setting_path):
if not os.path.exists(setting_path):
raise FileNotFoundError(f"Configuration file not found: {setting_path}")

with open(setting_path, "r", encoding="utf-8") as file: # 指定编码为 UTF-8
setting = yaml.safe_load(file)

example_setting = (
"minio:\n"
" server_url: 'http://minio.waterism.com:9090' # Update with your URL\n"
" client_endpoint: 'http://minio.waterism.com:9000' # Update with your URL\n"
" access_key: 'your minio access key'\n"
" secret: 'your minio secret'\n\n"
"local_data_path:\n"
" root: 'D:\\data\\waterism' # Update with your root data directory\n"
" datasets-origin: 'D:\\data\\waterism\\datasets-origin'\n"
" datasets-interim: 'D:\\data\\waterism\\datasets-interim'\n"
"postgres:\n"
" server_url: your_postgres_server_url\n"
" port: 5432\n"
" username: your_postgres_username\n"
" password: your_postgres_secret_code\n"
" database: your_postgres_database\n"
)

if setting is None:
raise ValueError(
f"Configuration file is empty or has invalid format.\n\nExample configuration:\n{example_setting}"
)

# Define the expected structure
expected_structure = {
"minio": ["server_url", "client_endpoint", "access_key", "secret"],
"local_data_path": ["root", "datasets-origin", "datasets-interim"],
"postgres": ["server_url", "port", "username", "password", "database"],
}

# Validate the structure
try:
for key, subkeys in expected_structure.items():
if key not in setting:
raise KeyError(f"Missing required key in config: {key}")

if isinstance(subkeys, list):
for subkey in subkeys:
if subkey not in setting[key]:
raise KeyError(f"Missing required subkey '{subkey}' in '{key}'")
except KeyError as e:
raise ValueError(
f"Incorrect configuration format: {e}\n\nExample configuration:\n{example_setting}"
) from e

return setting


SETTING_FILE = os.path.join(Path.home(), "hydro_setting.yml")
try:
SETTING = read_setting(SETTING_FILE)
except ValueError as e:
print(e)
except Exception as e:
print(f"Unexpected error: {e}")

LOCAL_DATA_PATH = SETTING["local_data_path"]["root"]

MINIO_PARAM = {
"endpoint_url": SETTING["minio"]["client_endpoint"],
"key": SETTING["minio"]["access_key"],
"secret": SETTING["minio"]["secret"],
}

FS = s3fs.S3FileSystem(
client_kwargs={"endpoint_url": MINIO_PARAM["endpoint_url"]},
key=MINIO_PARAM["key"],
secret=MINIO_PARAM["secret"],
use_ssl=False,
)

# remote_options parameters for xr open_dataset from minio
RO = {
"client_kwargs": {"endpoint_url": MINIO_PARAM["endpoint_url"]},
"key": MINIO_PARAM["key"],
"secret": MINIO_PARAM["secret"],
"use_ssl": False,
}


# Set up MinIO client
S3 = boto3.client(
"s3",
endpoint_url=SETTING["minio"]["client_endpoint"],
aws_access_key_id=MINIO_PARAM["key"],
aws_secret_access_key=MINIO_PARAM["secret"],
)
MC = Minio(
SETTING["minio"]["client_endpoint"].replace("http://", ""),
access_key=MINIO_PARAM["key"],
secret_key=MINIO_PARAM["secret"],
secure=False, # True if using HTTPS
)
STATION_BUCKET = "stations"
STATION_OBJECT = "sites.csv"

GRID_INTERIM_BUCKET = "grids-interim"

PS = psycopg2.connect(
database=SETTING["postgres"]["database"],
user=SETTING["postgres"]["username"],
password=SETTING["postgres"]["password"],
host=SETTING["postgres"]["server_url"],
port=SETTING["postgres"]["port"],
)
38 changes: 26 additions & 12 deletions hydroutils/hydro_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@
Copyright (c) 2023-2024 Wenyu Ouyang. All rights reserved.
"""

import contextlib
import datetime
import tempfile
from typing import Union
import numpy as np
import pytz
import tzfpy
import geopandas as gpd

from hydroutils.hydro_configs import FS


def t2str(t_: Union[str, datetime.datetime]):
if type(t_) is str:
Expand Down Expand Up @@ -154,9 +158,7 @@ def calculate_utc_offset(lat, lng, date=None):
if date is None:
date = datetime.datetime.utcnow()

# Get the timezone string using tzfpy
timezone_str = tzfpy.get_tz(lng, lat)
if timezone_str:
if timezone_str := tzfpy.get_tz(lng, lat):
# Get the timezone object using pytz
tz = pytz.timezone(timezone_str)
# Get the UTC offset for the specified date
Expand All @@ -166,7 +168,7 @@ def calculate_utc_offset(lat, lng, date=None):
return None


def calculate_basin_offsets(shp_file):
def calculate_basin_offsets(shp_file_path):
"""
Calculate the UTC offset for each basin based on the outlet shapefile.
Expand All @@ -177,7 +179,26 @@ def calculate_basin_offsets(shp_file):
dict: A dictionary where the keys are the BASIN_ID and the values are the corresponding UTC offsets.
"""
# read shapefile
gdf = gpd.read_file(shp_file)
if "s3://" in shp_file_path:
# related list
extensions = [".shp", ".shx", ".dbf", ".prj"]

# create a temporary directory
with tempfile.TemporaryDirectory() as tmpdir:
# download all related files to the temporary directory
base_name = shp_file_path.rsplit(".", 1)[0]
extensions = [".shp", ".shx", ".dbf", ".prj"]

for ext in extensions:
remote_file = f"{base_name}{ext}"
local_file = f"{tmpdir}/shp_file{ext}"
with contextlib.suppress(FileNotFoundError):
FS.get(remote_file, local_file)
gdf = gpd.read_file(f"{tmpdir}/shp_file.shp")

else:
# If the file is not on S3 (MinIO), read it directly
gdf = gpd.read_file(shp_file_path)

# create an empty dictionary
basin_offset_dict = {}
Expand All @@ -191,10 +212,3 @@ def calculate_basin_offsets(shp_file):
basin_offset_dict[basin_id] = offset

return basin_offset_dict


if __name__ == "main":
offset_dict = calculate_basin_offsets(
"/ftproot/basins-interim/shapes/basinoutlets.shp"
)
print(offset_dict)
4 changes: 3 additions & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,6 @@ tqdm
rich
boto3==1.34.34
minio
s3fs==2024.2
s3fs==2024.2
tzfpy
geopandas

0 comments on commit be48fd1

Please sign in to comment.