-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheis_module.py
31 lines (25 loc) · 1.41 KB
/
eis_module.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
from idatamation_module import IdatamationFlow
class EISIdatamation(IdatamationFlow):
def __init__(self):
self.fab_folder = "S3"
self.data_source = "EIS"
self.type_dict = {"PartNo": str, "LotNo": str, "FabId": str, "MachineNo": str, "Step": str, "Description": str,
"VALUE": float, "TimeStamp": str}
super().__init__()
# After import data, the first step is to capitalize column names.
self.replace_column_list = {"TIMESTAMP": "TIME", "PARTNO": "PROD_ID_RAW", "LOTNO": "LOT_ID", "FABID": "FAB_ID",
"MACHINENO": "EQP_ID", "DESCRIPTION": "PARAMETER_ID"}
self.data_type = {"FAB_ID": str, "LOT_ID": str, "TIME": object, "EQP_ID": str, "PARAMETER_ID": str, "VALUE": float}
def data_transformat(self, df, filename):
df["TIMESTAMP"] = pd.to_datetime(df["TIMESTAMP"])
df["TIMESTAMP"] = df["TIMESTAMP"].dt.tz_localize("Etc/GMT-8").dt.tz_convert("UTC")
df = df.rename(columns=self.replace_column_list)
df = df[list(self.data_type.keys())]
# final check column type is correct
df["TIME"] = pd.to_datetime(df["TIME"])
df = self.data_type_check(df, self.data_type)
self.mongo_insert_data(df, "ms_original_lot", filename, set(), set(), duplicated_data=True)
return df.shape[0]
process_data = EISIdatamation()
process_data.main_function()