creditcard.py

# -*- coding: utf-8 -*-
"""minor_project.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1ogDiWkCQF5Ckzs0ieb_VTKpx1NiT1YI9
"""

# mounting google drive on colab notebook
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import random
from sklearn.neighbors import NearestNeighbors
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score as acc
from sklearn.metrics import make_scorer, f1_score, recall_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import VotingClassifier
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.utils import class_weight
from sklearn.neighbors import KNeighborsClassifier
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import NearMiss
import warnings
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
warnings.filterwarnings("ignore")

!pip install lightgbm

from lightgbm import LGBMClassifier

data=pd.read_csv("/content/drive/MyDrive/creditcard.csv")

data

for i in data.columns:
  data[i].fillna(data[i].mean(), inplace=True)

data

y=data['Class']
x=data.drop(['Class'],axis=1)

x=np.array(x)
y=np.array(y)

X_train,X_test,Y_train,Y_test=tts(x,y,test_size=0.35)

"""# Exploratory Data Analysis"""

data.info()

sns.scatterplot(x=x[:,2],y=x[:,3],hue=y)

plt.hist(x=y)

fig, axes = plt.subplots(nrows=6, ncols=5,figsize=(11,17))
fig.suptitle('Features vs Class\n', size = 18)

sns.boxplot(ax=axes[0, 0], data=data, x='Class', y='Time', palette='Spectral')
axes[0,0].set_title("Time distribution")

sns.boxplot(ax=axes[0, 1], data=data, x='Class', y='V1', palette='Spectral')
axes[0,1].set_title("V1 distribution")

sns.boxplot(ax=axes[0, 2], data=data, x='Class', y='V2', palette='Spectral')
axes[0,2].set_title("V2 distribution")

sns.boxplot(ax=axes[0, 3], data=data, x='Class', y='V3', palette='Spectral')
axes[0,3].set_title("V3 distribution")

sns.boxplot(ax=axes[0, 4], data=data, x='Class', y='V4', palette='Spectral')
axes[0,4].set_title("V4 distribution")

sns.boxplot(ax=axes[1, 0], data=data, x='Class', y='V5', palette='Spectral')
axes[1,0].set_title("V5 distribution")

sns.boxplot(ax=axes[1, 1], data=data, x='Class', y='V6', palette='Spectral')
axes[1,1].set_title("V6 distribution")

sns.boxplot(ax=axes[1, 2], data=data, x='Class', y='V7', palette='Spectral')
axes[1,2].set_title("V7 distribution")

sns.boxplot(ax=axes[1, 3], data=data, x='Class', y='V8', palette='Spectral')
axes[1,3].set_title("V8 distribution")

sns.boxplot(ax=axes[1, 4], data=data, x='Class', y='V9', palette='Spectral')
axes[1,4].set_title("V9 distribution")

sns.boxplot(ax=axes[2, 0], data=data, x='Class', y='V10', palette='Spectral')
axes[2,0].set_title("V10 distribution")

sns.boxplot(ax=axes[2, 1], data=data, x='Class', y='V11', palette='Spectral')
axes[2,1].set_title("V11 distribution")

sns.boxplot(ax=axes[2, 2], data=data, x='Class', y='V12', palette='Spectral')
axes[2,2].set_title("V12 distribution")

sns.boxplot(ax=axes[2, 3], data=data, x='Class', y='V13', palette='Spectral')
axes[2,3].set_title("V13 distribution")

sns.boxplot(ax=axes[2, 4], data=data, x='Class', y='V14', palette='Spectral')
axes[2,4].set_title("V14 distribution")

sns.boxplot(ax=axes[3, 0], data=data, x='Class', y='V15', palette='Spectral')
axes[3,0].set_title("V15 distribution")

sns.boxplot(ax=axes[3, 1], data=data, x='Class', y='V16', palette='Spectral')
axes[3,1].set_title("V16 distribution")

sns.boxplot(ax=axes[3, 2], data=data, x='Class', y='V17', palette='Spectral')
axes[3,2].set_title("V17 distribution")

sns.boxplot(ax=axes[3, 3], data=data, x='Class', y='V18', palette='Spectral')
axes[3,2].set_title("V18 distribution")

sns.boxplot(ax=axes[3, 4], data=data, x='Class', y='V19', palette='Spectral')
axes[3,4].set_title("V19 distribution")

sns.boxplot(ax=axes[4, 0], data=data, x='Class', y='V20', palette='Spectral')
axes[4,0].set_title("V20 distribution")

sns.boxplot(ax=axes[4, 1], data=data, x='Class', y='V21', palette='Spectral')
axes[4,1].set_title("V21 distribution")

sns.boxplot(ax=axes[4, 2], data=data, x='Class', y='V22', palette='Spectral')
axes[4,2].set_title("V22 distribution")

sns.boxplot(ax=axes[4, 3], data=data, x='Class', y='V23', palette='Spectral')
axes[4,3].set_title("V23 distribution")

sns.boxplot(ax=axes[4, 4], data=data, x='Class', y='V24', palette='Spectral')
axes[4,4].set_title("V24 distribution")

sns.boxplot(ax=axes[5, 0], data=data, x='Class', y='V25', palette='Spectral')
axes[5,0].set_title("V25 distribution")

sns.boxplot(ax=axes[5, 1], data=data, x='Class', y='V26', palette='Spectral')
axes[5,1].set_title("V26 distribution")

sns.boxplot(ax=axes[5, 2], data=data, x='Class', y='V27', palette='Spectral')
axes[5,2].set_title("V27 distribution")

sns.boxplot(ax=axes[5, 3], data=data, x='Class', y='V28', palette='Spectral')
axes[5,3].set_title("V28 distribution")

sns.boxplot(ax=axes[5, 4], data=data, x='Class', y='Amount', palette='Spectral')
axes[5,4].set_title("Amount distribution")

plt.tight_layout()
plt.show()

fig, axes = plt.subplots(nrows=6, ncols=5,figsize=(13,8))
fig.suptitle('Features vs Class\n', size = 20)

axes[0,0].hist(data['Time'], bins=60, linewidth=0.5, edgecolor="white")
axes[0,0].set_title("Time distribution")

axes[0,1].hist(data['V1'], bins=60, linewidth=0.5, edgecolor="white")
axes[0,1].set_title("V1 distribution");

axes[0,2].hist(data['V2'], bins=60, linewidth=0.5, edgecolor="white")
axes[0,2].set_title("V2 distribution");

axes[0,3].hist(data['V3'], bins=60, linewidth=0.5, edgecolor="white")
axes[0,3].set_title("V3 distribution");

axes[0,4].hist(data['V4'], bins=60, linewidth=0.5, edgecolor="white")
axes[0,4].set_title("V4 distribution");

axes[1,0].hist(data['V5'], bins=60, linewidth=0.5, edgecolor="white")
axes[1,0].set_title("V5 distribution");

axes[1,1].hist(data['V6'], bins=60, linewidth=0.5, edgecolor="white")
axes[1,1].set_title("V6 distribution");

axes[1,2].hist(data['V7'], bins=60, linewidth=0.5, edgecolor="white")
axes[1,2].set_title("V7 distribution");

axes[1,3].hist(data['V8'], bins=60, linewidth=0.5, edgecolor="white")
axes[1,3].set_title("V8 distribution");

axes[1,4].hist(data['V9'], bins=60, linewidth=0.5, edgecolor="white")
axes[1,4].set_title("V9 distribution");

axes[2,0].hist(data['V10'], bins=60, linewidth=0.5, edgecolor="white")
axes[2,0].set_title("V10 distribution");

axes[2,1].hist(data['V11'], bins=60, linewidth=0.5, edgecolor="white")
axes[2,1].set_title("V11 distribution");

axes[2,2].hist(data['V12'], bins=60, linewidth=0.5, edgecolor="white")
axes[2,2].set_title("V12 distribution");

axes[2,3].hist(data['V13'], bins=60, linewidth=0.5, edgecolor="white")
axes[2,3].set_title("V13 distribution");

axes[2,4].hist(data['V14'], bins=60, linewidth=0.5, edgecolor="white")
axes[2,4].set_title("V14 distribution");

axes[3,0].hist(data['V15'], bins=60, linewidth=0.5, edgecolor="white")
axes[3,0].set_title("V15 distribution");

axes[3,1].hist(data['V16'], bins=60, linewidth=0.5, edgecolor="white")
axes[3,1].set_title("V16 distribution");

axes[3,2].hist(data['V17'], bins=60, linewidth=0.5, edgecolor="white")
axes[3,2].set_title("V17 distribution");

axes[3,3].hist(data['V18'], bins=60, linewidth=0.5, edgecolor="white")
axes[3,3].set_title("V18 distribution");

axes[3,4].hist(data['V19'], bins=60, linewidth=0.5, edgecolor="white")
axes[3,4].set_title("V19 distribution");

axes[4,0].hist(data['V20'], bins=60, linewidth=0.5, edgecolor="white")
axes[4,0].set_title("V20 distribution");

axes[4,1].hist(data['V21'], bins=60, linewidth=0.5, edgecolor="white")
axes[4,1].set_title("V21 distribution");

axes[4,2].hist(data['V22'], bins=60, linewidth=0.5, edgecolor="white")
axes[4,2].set_title("V22 distribution");

axes[4,3].hist(data['V23'], bins=60, linewidth=0.5, edgecolor="white")
axes[4,3].set_title("V23 distribution");

axes[4,4].hist(data['V24'], bins=60, linewidth=0.5, edgecolor="white")
axes[4,4].set_title("V24 distribution");

axes[5,0].hist(data['V25'], bins=60, linewidth=0.5, edgecolor="white")
axes[5,0].set_title("V25 distribution");

axes[5,1].hist(data['V26'], bins=60, linewidth=0.5, edgecolor="white")
axes[5,1].set_title("V26 distribution");

axes[5,2].hist(data['V27'], bins=60, linewidth=0.5, edgecolor="white")
axes[5,2].set_title("V27 distribution");

axes[5,3].hist(data['V28'], bins=60, linewidth=0.5, edgecolor="white")
axes[5,3].set_title("V28 distribution");

axes[5,4].hist(data['Amount'], bins=60, linewidth=0.5, edgecolor="white")
axes[5,4].set_title("Amount distribution");

plt.tight_layout()
plt.show()

"""Distribution of Amount for Fradulent & Genuine transactions"""

fig, axs = plt.subplots(ncols=2,figsize=(16,4))
sns.distplot(data[data['Class'] == 1]['Amount'], bins=100, color='red', ax=axs[0])
axs[0].set_title("Distribution of Fraud Transactions")

sns.distplot(data[data['Class'] == 0]['Amount'], bins=100, color='green', ax=axs[1])
axs[1].set_title("Distribution of Genuine Transactions")

plt.show()

"""Distribution of Time for Fradulent & Genuine transactions"""

fig, axs = plt.subplots(ncols=2,figsize=(16,4))
sns.distplot(data[data['Class'] == 1]['Time'], color='red', ax=axs[0])
axs[0].set_title("Distribution of Fraud Transactions")

sns.distplot(data[data['Class'] == 0]['Time'], bins=100, color='green', ax=axs[1])
axs[1].set_title("Distribution of Genuine Transactions")

plt.show()

print("Fraud Transaction distribution : \n",data[(data['Class'] == 1)]['Amount'].value_counts().head())

"""There are 113 fraud transactions for just one dollar and 27 fraud transaction for 99.99 dollars. There are also 27 transactions for zero amount"""

plt.figure(figsize=(35,20))
sns.heatmap(data.corr(),annot=True,cmap="tab20c")

"""# LightGBM Classifier"""

X_train, X_test, Y_train, Y_test = tts(x, y, test_size=0.35, random_state=42)

clf1 = LGBMClassifier()
clf1.fit(X_train, Y_train)

Y_pred_clf1 = clf1.predict(X_test)

print(classification_report(Y_test,Y_pred_clf1))

confusion_matrix(Y_test,Y_pred_clf1)

y_pred_proba = clf1.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(Y_test,  y_pred_proba)

#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

"""Hyperparameter tuning"""

params = {
    'max_depth': np.arange(2, 10, 1),
    'learning_rate': np.logspace(-4, 0, 100),
    'num_leaves': np.arange(10, 200, 5),
    'min_child_samples': np.arange(5, 30, 1),
}

random_search = RandomizedSearchCV(clf1, param_distributions=params, scoring='f1', n_jobs=-1, verbose=1, random_state=42)
random_search.fit(X_train, Y_train)

clf1_tuned = LGBMClassifier(**random_search.best_params_)
clf1_tuned.fit(X_train, Y_train)
y_pred = clf1_tuned.predict(X_test)

print(classification_report(Y_test, y_pred))

confusion_matrix(Y_test, y_pred)

y_pred_proba = clf1_tuned.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(Y_test,  y_pred_proba)

#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

"""# PCA

Scaling
"""

X_s = (x - x.mean()) / x.std()

pca = PCA()
X_pca = pca.fit_transform(X_s)

explained_variances = pca.explained_variance_ratio_
cumulative_variances = np.cumsum(explained_variances)
plt.plot(explained_variances, label="Explained variance")
plt.plot(cumulative_variances, label="Cumulative variance")
plt.xlabel("Number of principal components")
plt.ylabel("Variance explained")
plt.legend()
plt.show()

print("Cumulative explained variance ratio:")
for i in range(1, 5):
    print(f"{i} principal components: {cumulative_variances[i-1]:.7f}")

"""We can see that the optimum number of Principal Components is 2"""

pca_optimum = PCA(n_components = 2)
X_pca = pca_optimum.fit_transform(X_s)

sns.scatterplot(x = X_pca[:,0], y = X_pca[:,1], hue = y)

X_train_pca, X_test_pca, Y_train_pca, Y_test_pca = tts(X_pca, y, test_size=0.35, random_state=42)

clf6 = DTC(random_state=42)
clf6.fit(X_train_pca, Y_train_pca)

Y_pred_clf6 = clf6.predict(X_test_pca)

print(classification_report(Y_test, Y_pred_clf6))

confusion_matrix(Y_test_pca ,Y_pred_clf6)

y_pred_proba = clf6.predict_proba(X_test_pca)[::,1]
fpr, tpr, _ = metrics.roc_curve(Y_test_pca,  y_pred_proba)

#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

"""Using Smote on the dataset after PCA"""

oversample = SMOTE()
X_smote_pca, y_smote_pca = oversample.fit_resample(X_pca, y)

X_train_pcamote, X_test_pcamote, Y_train_pcamote, Y_test_smote = tts(X_smote_pca, y_smote_pca, test_size=0.35, random_state=42)

clf7 = LGBMClassifier()
clf7.fit(X_train_pcamote, Y_train_pcamote)

Y_pred_clf7 = clf7.predict(X_test_pcamote)

print(classification_report(Y_test_smote ,Y_pred_clf7))

confusion_matrix(Y_test_smote, Y_pred_clf7)

y_pred_proba = clf7.predict_proba(X_test_pcamote)[::,1]
fpr, tpr, _ = metrics.roc_curve(Y_test_smote,  y_pred_proba)

#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

"""Undersampling after PCA"""

undersample = NearMiss()
X_us_pca, y_us_pca = undersample.fit_resample(X_train_pca, Y_train_pca)

clf8 = LGBMClassifier()
clf8.fit(X_us_pca, y_us_pca)

Y_pred_clf8 = clf8.predict(X_test_pca)

print(classification_report(Y_test_pca ,Y_pred_clf8))

confusion_matrix(Y_test_pca ,Y_pred_clf8)

y_pred_proba = clf8.predict_proba(X_test_pca)[::,1]
fpr, tpr, _ = metrics.roc_curve(Y_test_pca,  y_pred_proba)

#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

"""# Under Sampling"""

y=np.array(y)
x=np.array(x)

pos_indices=np.where(Y_train==1)[0]
neg_indices=np.where(Y_train==0)[0]

pos_pts=[X_train[k] for k in pos_indices]
neg_pts=[X_train[k] for k in neg_indices]

pos_cent=np.mean(pos_pts,axis=0)
neg_cent=np.mean(neg_pts,axis=0)
r = np.linalg.norm(pos_cent - neg_cent)

np.shape(pos_cent)

print(r)

samps=[]
for i in neg_pts:
  # print(np.linalg.norm(i - pos_cent))
  if(np.linalg.norm(i - pos_cent) < r):
    samps.append(i)

len(samps)

dists=[]
for i in samps:
  samp=np.concatenate(([i],pos_pts))
  dist,_=NearestNeighbors(n_neighbors=3).fit(samp).kneighbors([i])
  dists.append(np.mean(dist))
print(dists)

dists=np.array(dists)
samp_neg_inds=[]
for i in range(len(pos_pts)):
  samp_neg_inds.append(np.argmin(dists))
  dists=np.delete(dists,np.argmin(dists))
train_indices=np.concatenate((samp_neg_inds,pos_indices))

samp_pts=np.array([X_train[k] for k in samp_neg_inds])

len(samp_neg_inds)

len(train_indices)

# samp_neg_inds=np.random.choice(neg_indices,len(pos_indices),replace=False)
# train_indices=np.concatenate((samp_neg_inds,pos_indices))

# undersamps=[]
# l=len(pos_indices)
# for i in range(l):
#   k=random.randint(0,len(neg_indices)-len(undersamps)-1)
#   undersamps.append(neg_indices[k])
#   np.delete(neg_indices,k)
# train_indices=np.array(undersamps)
# train_indices=np.concatenate((train_indices,pos_indices))

us_x=np.array([X_train[k] for k in train_indices])
us_y=np.array([Y_train[k] for k in train_indices])

print(us_y)

sns.scatterplot(x=samp_pts[:,3],y=samp_pts[:,4])

sns.scatterplot(x=us_x[:,3],y=us_x[:,4],hue=us_y)

plt.hist(x=us_y)

# us_x_train,us_x_test,us_y_train,us_y_test=tts(us_x , us_y , test_size=0.35)

clf2=LGBMClassifier()
clf2.fit(us_x,us_y)

Y_pred_clf2 = clf2.predict(X_test)

print(classification_report(Y_test,Y_pred_clf2))

confusion_matrix(Y_test,Y_pred_clf2)
# FN    FP
# TN    TP

"""# Over Sampling"""

pos_indices=np.where(Y_train==1)[0]
neg_indices=np.where(Y_train==0)[0]

pos_pts=np.array([X_train[k] for k in pos_indices])

_,nn=NearestNeighbors(n_neighbors=2).fit(pos_pts).kneighbors(pos_pts)

generated_pts=[]
generated_class=[]
for i in range(len(neg_indices)):
  k=random.randint(0,len(pos_indices)-1)
  a=pos_pts[nn[k][0]]
  b=pos_pts[nn[k][1]]
  r=random.uniform(0,1)
  g_pt=r*(a)+(1-r)*(b)
  generated_pts.append(g_pt)
  generated_class.append(1)
generated_pts=np.array(generated_pts)
generated_class=np.array(generated_class)

sns.scatterplot(x=generated_pts[:,2],y=generated_pts[:,3],alpha=0.5,color='green',label="generated")
sns.scatterplot(x=pos_pts[:,2],y=pos_pts[:,3],color='red',label="original")
plt.legend()

os_x=np.concatenate((X_train,generated_pts))
os_y=np.concatenate((Y_train,generated_class))

sns.scatterplot(x=os_x[:,2],y=os_x[:,3],hue=os_y)

plt.hist(x=os_y)

os_x_train,os_x_test,os_y_train,os_y_test=tts(os_x,os_y,test_size=0.35)

clf_os=LGBMClassifier()
clf_os.fit(os_x,os_y)

Y_pred_clf_os = clf_os.predict(X_test)

print(classification_report(Y_test,Y_pred_clf_os))

confusion_matrix(Y_test,Y_pred_clf_os)

y_pred_proba = clf_os.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(Y_test,  y_pred_proba)

#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

"""# Boosting"""

clf4 = AdaBoostClassifier()
clf4.fit(X_train, Y_train)

Y_pred_clf4 = clf4.predict(X_test)
print(classification_report(Y_test, Y_pred_clf4))

confusion_matrix(Y_test, Y_pred_clf4)
# TP    FN
# FP    TN

y_pred_proba = clf4.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(Y_test,  y_pred_proba)

#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

"""# KNN"""

weights = 'distance' # use distance weights

knn = KNeighborsClassifier(n_neighbors=5, weights= weights)
knn.fit(X_train, Y_train)

Y_pred_knn = knn.predict(X_test)

print(classification_report(Y_test, Y_pred_knn))

confusion_matrix(Y_test, Y_pred_knn)
# TP    FN
# FP    TN

y_pred_proba = knn.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(Y_test,  y_pred_proba)

#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

"""# Weighted Data

Weighted SVM
"""

clf5=SVC(class_weight='balanced')
clf5.fit(X_train_pca, Y_train_pca)

class_weight.compute_class_weight('balanced', classes = np.unique(Y_train_pca), y = Y_train_pca)

Y_pred_clf5 = clf5.predict(X_test_pca)
print(classification_report(Y_test, Y_pred_clf5))

confusion_matrix(Y_test, Y_pred_clf5)

"""Hyperparameter Tuning"""

weights = np.linspace(0.05, 0.50, 8)
param_grid = {'class_weight' : [{0 : x, 1: 1.0-x} for x in weights]}
gsc = GridSearchCV(clf5, param_grid = param_grid, scoring = 'f1')
gsc.get_params()

gsc.fit(X_train, Y_train)

print("Best hyperparameters:", gsc.best_params_)
print("F1 score:", gsc.best_score_)

"""# Voting Classifier"""

vote = VotingClassifier(estimators = [('lgmbm',clf1_tuned), ('oversampling',clf_os), ('adaboost',clf4)], voting='soft')
vote = vote.fit(X_train, Y_train)

Y_pred_vote = vote.predict(X_test)

print(classification_report(Y_test ,Y_pred_vote))

confusion_matrix(Y_test_pca ,Y_pred_vote)

y_pred_proba = vote.predict_proba(X_test)[::,1]
fpr, tpr, _ = metrics.roc_curve(Y_test_pca,  y_pred_proba)

#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

"""# Combination of successful methods"""

random_search1 = RandomizedSearchCV(clf_os, param_distributions=params, scoring='f1', n_jobs=-1, verbose=1, random_state=42)
random_search1.fit(os_x, os_y)

clf_os_tuned=LGBMClassifier(**random_search1.best_params_)
clf_os_tuned.fit(os_x, os_y)

clf_combi=AdaBoostClassifier(estimator=clf_os_tuned)
clf_combi.fit(os_x,os_y)

Y_pred_clf_combi = clf_combi.predict(X_test)
print(classification_report(Y_test, Y_pred_clf_combi))