-
Notifications
You must be signed in to change notification settings - Fork 75
/
dataset.py
25 lines (21 loc) · 917 Bytes
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# for more information read "19-Intro2ML-HodaDataset.ipynb"
import cv2
import numpy as np
from scipy import io
def load_hoda(training_sample_size=1000, test_sample_size=200, size=5):
#load dataset
trs = training_sample_size
tes = test_sample_size
dataset = io.loadmat('./dataset/Data_hoda_full.mat')
#test and training set
X_train_orginal = np.squeeze(dataset['Data'][:trs])
y_train = np.squeeze(dataset['labels'][:trs])
X_test_original = np.squeeze(dataset['Data'][trs:trs+tes])
y_test = np.squeeze(dataset['labels'][trs:trs+tes])
#resize
X_train_5by5 = [cv2.resize(img, dsize=(size, size)) for img in X_train_orginal]
X_test_5by_5 = [cv2.resize(img, dsize=(size, size)) for img in X_test_original]
#reshape
X_train = np.reshape(X_train_5by5, [-1,size**2])
X_test = np.reshape(X_test_5by_5, [-1,size**2])
return X_train, y_train, X_test, y_test