-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_data.py
62 lines (51 loc) · 2 KB
/
process_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import pandas as pd
import numpy as np
def convert_result_to_df(results):
"""Converts the JSON response from the API to a pandas DataFrame.
Args:
results (dict): The JSON response from the API.
Returns:
pandas.DataFrame: The data from the JSON response, converted to a DataFrame.
Raises:
ValueError: If the input is not a dictionary or does not contain 'elementList'.
TypeError: If 'elementList' is not a list.
"""
if not isinstance(results, dict):
raise ValueError("The input results must be a dictionary.")
if 'elementList' not in results:
raise ValueError("The input dictionary must contain the key 'elementList'.")
if not isinstance(results['elementList'], list):
raise TypeError("The 'elementList' must be a list.")
df = pd.DataFrame(results['elementList'])
return df
def concat_dfs(df, total_df):
"""Concatenates the current DataFrame with the total DataFrame.
Args:
df (pandas.DataFrame): The current DataFrame to concatenate.
total_df (pandas.DataFrame): The total DataFrame to concatenate with.
Returns:
pandas.DataFrame: The concatenated DataFrame.
Raises:
ValueError: If either of the inputs is not a pandas DataFrame.
"""
if not isinstance(df, pd.DataFrame):
raise ValueError("The input df must be a pandas DataFrame.")
if not isinstance(total_df, pd.DataFrame):
raise ValueError("The input total_df must be a pandas DataFrame.")
total_df = pd.concat([total_df, df], ignore_index=True)
return total_df
def save_df_to_csv(df, province):
"""Saves the DataFrame to a CSV file.
Args:
df (pandas.DataFrame): The DataFrame to save.
Returns:
None
"""
os.makedirs('./datasets', exist_ok=True)
df.reset_index()
df.to_csv(f'./datasets/all_rent_{province}.csv', index=False)
return None
def load_data():
df = pd.read_csv('./datasets/all_rent_asturias.csv')
return df