-
Notifications
You must be signed in to change notification settings - Fork 0
/
describe.py
61 lines (51 loc) · 1.4 KB
/
describe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
from stats import (
ft_mean,
ft_std,
ft_min,
ft_25,
ft_median,
ft_75,
ft_max,
ft_var,
)
from preprocessing import load_data
import sys
def main():
"""Entry point. Loads the .csv and converts it to a pandas
dataframe to process it.
"""
try:
if len(sys.argv) != 2:
raise IndexError('Please enter one argument.')
df, features = load_data(sys.argv[1])
describe_df = pd.DataFrame(columns=['Stat name', *features])
operations = {
'Count': len,
'Mean': ft_mean,
'Std': ft_std,
'Var': ft_var,
'Min': ft_min,
'25%': ft_25,
'50%': ft_median,
'75%': ft_75,
'Max': ft_max,
}
# Perform every operation on every column
stats = [
[
operation,
*[ # "*" is the spread operator, like "...array" in JavaScript
operations[operation](df[column])
for column in features
]
] for operation in operations.keys()
]
# Append every stat to the describe dataframe
for stat in stats:
describe_df.loc[len(describe_df)] = stat
print(describe_df)
except BaseException as e:
print(type(e).__name__, ':', e)
if __name__ == "__main__":
main()