-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathA3_demo.py
153 lines (86 loc) · 3.22 KB
/
A3_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np # To use np.arrays
import pandas as pd # To use dataframes
from pandas.plotting import autocorrelation_plot as auto_corr
# To plot
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib as mpl
import seaborn as sns
#For date-time
import math
from datetime import datetime
from datetime import timedelta
# Another imports if needs
import itertools
import statsmodels.api as sm
import statsmodels.tsa.api as smt
import statsmodels.formula.api as smf
from sklearn.model_selection import train_test_split
from statsmodels.tsa.seasonal import seasonal_decompose as season
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn import preprocessing
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
# In[2]:
def forecasting(X, y):
X_train, X_test, y_train, y_test = train_test_split(np.array(X), np.array(Y), test_size = 0.3, random_state=42)
model = RandomForestRegressor()
model.fit(X_train,y_train)
# Get the mean absolute error on the validation data
predicted_prices = model.predict(X_test)
MAE = mean_absolute_error(y_test , predicted_prices)
print('Random forest validation MAE = ', MAE)
return model, predicted_prices, X_train, X_test, y_train, y_test
# In[3]:
df_store = pd.read_csv('stores.csv') #store data
df_train = pd.read_csv('train.csv') # train set
df_features = pd.read_csv('features.csv')
# In[4]:
df = df_train.merge(df_features, on=['Store', 'Date'], how='inner').merge(df_store, on=['Store'], how='inner')
df.head(5)
# In[5]:
df.head(150)
# In[6]:
df.drop(['IsHoliday_y'], axis=1,inplace=True)
df.rename(columns={'IsHoliday_x':'IsHoliday'},inplace=True)
df.shape
# In[7]:
# Removing rows with incorrect (i.e. negative) sales values
df = df.loc[df['Weekly_Sales'] > 0]
df.shape
# In[8]:
df = df.fillna(0)
df.head(5)
# In[9]:
df_encoded = df.copy()
type_group = {'A':1, 'B': 2, 'C': 3}
df_encoded['Type'] = df_encoded['Type'].replace(type_group)
df_encoded.head(5)
# In[10]:
df_encoded['IsHoliday'] = df_encoded['IsHoliday'].astype(bool).astype(int)
df_encoded.head(5)
# In[11]:
df_encoded.drop(['Date'], axis=1,inplace=True)
df_encoded.head(5)
feature_cols = [c for c in df_encoded.columns.to_list() if c not in ["Weekly_Sales"]]
X = df_encoded[feature_cols]
Y = df_encoded['Weekly_Sales']
# In[12]:
df_encoded.head(5)
# In[14]:
model, predicted_test, X_train, X_test, y_train, y_test = forecasting(X, Y)
# In[15]:
np.set_printoptions(suppress=True, formatter={'float_kind':'{:0.2f}'.format})
# In[17]:
for i in range(5):
print(X_test[i], predicted_test[i], y_test[i])