-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocessing.py
24 lines (21 loc) · 1.02 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from sklearn.preprocessing import StandardScaler
import numpy as np
from scipy.special import boxcox1p
import pandas as pd
class Preprocessing():
def standard_scaler(self, X):
x = StandardScaler().fit_transform(X)
X = pd.DataFrame(x, columns=X.columns)
return X
def boxcox_transform(self, X,y=None):
X['AveRooms']=X['AveRooms'].apply(lambda x: boxcox1p(x,0.25))
X['AveBedrms']=X['AveBedrms'].apply(lambda x: boxcox1p(x,0.25))
X['HouseAge']=X['HouseAge'].apply(lambda x: boxcox1p(x,0.25))
X['Population']=X['Population'].apply(lambda x: boxcox1p(x,0.25))
X['AveOccup']=X['AveOccup'].apply(lambda x: boxcox1p(x,0.25))
X['Latitude']=X['Latitude'].apply(lambda x: boxcox1p(x,0.25))
X['MedInc']=X['MedInc'].apply(lambda x: boxcox1p(x,0.25))
# an offset is needed becouse the data is negative
X['Longitude']=X['Longitude'].apply(lambda x: boxcox1p(x+125,0.25))
X['Target']=X['Target'].apply(lambda x: boxcox1p(x,0.25))
return X