-
Notifications
You must be signed in to change notification settings - Fork 0
/
pare_and_merge_original_files.py
59 lines (52 loc) · 1.85 KB
/
pare_and_merge_original_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from dataset import (
add_variables_and_attributes,
build_dataset_from_multiple_files,
build_dataset_from_single_file,
)
from model_variables import model_variables
from nonmetric_calculated_variables import (
nonmetric_calculated_variables,
nonmetric_variable_attrs,
)
from original_file_loaders import (
DatasetSpecs,
get_He2022_specs,
get_LHSTE_specs,
get_LHSTR_specs,
)
from user_filepaths import LOCAL_PROCESSED_DATASET_DIRECTORY
LIST_OF_VARIABLES: list[str] = list(model_variables.keys())
# TRAINING DATA
LHSTR_specs: DatasetSpecs = get_LHSTR_specs()
training_data = build_dataset_from_multiple_files(
variables=LIST_OF_VARIABLES, **LHSTR_specs
)
training_data = training_data.sortby("rotation_period")
training_data = add_variables_and_attributes(
dataset=training_data,
calculators=nonmetric_calculated_variables,
attributes=nonmetric_variable_attrs,
)
training_data.to_netcdf(LOCAL_PROCESSED_DATASET_DIRECTORY / "LHSTR_data.nc")
# TEST DATA
LHSTE_specs: DatasetSpecs = get_LHSTE_specs()
test_data = build_dataset_from_single_file(variables=LIST_OF_VARIABLES, **LHSTE_specs)
test_data = test_data.sortby("rotation_period")
test_data = add_variables_and_attributes(
dataset=test_data,
calculators=nonmetric_calculated_variables,
attributes=nonmetric_variable_attrs,
)
test_data.to_netcdf(LOCAL_PROCESSED_DATASET_DIRECTORY / "LHSTE_data.nc")
# CIRCULAR DATA
He2022_specs: DatasetSpecs = get_He2022_specs()
circular_data = build_dataset_from_multiple_files(
variables=LIST_OF_VARIABLES, **He2022_specs, month_name="time"
)
# circular_data = circular_data.sortby("rotation_period")
circular_data = add_variables_and_attributes(
dataset=circular_data,
calculators=nonmetric_calculated_variables,
attributes=nonmetric_variable_attrs,
)
circular_data.to_netcdf(LOCAL_PROCESSED_DATASET_DIRECTORY / "He_data.nc")