Skip to content

Commit

Permalink
added DIF solver + SMAP dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Jad-yehya committed May 22, 2024
1 parent eca255b commit 95fc389
Show file tree
Hide file tree
Showing 6 changed files with 318 additions and 80 deletions.
2 changes: 1 addition & 1 deletion datasets/glass.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ class Dataset(BaseDataset):
def get_data(self):
data = np.load("datasets/14_glass.npz")
X, y = data["X"], data["y"]
return dict(X=X, y=y)
return dict(X=X, y=y, X_test=None)
1 change: 0 additions & 1 deletion datasets/simulated.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from benchopt import BaseDataset, safe_import_context

with safe_import_context() as import_ctx:
# import module to generate normal 1d data
from sklearn.datasets import make_regression
import numpy as np

Expand Down
24 changes: 24 additions & 0 deletions datasets/smap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from benchopt import BaseDataset
from benchopt import safe_import_context

with safe_import_context() as import_ctx:
import os
import pandas as pd


class Dataset(BaseDataset):
name = "SMAP"

install_cmd = "conda"
requirements = ["pandas"]

def get_data(self):

path = "/storage/store/work/jyehya/Benchmarks/processing/processed/SMAP"
dataset = "SMAP"

X_train = pd.read_pickle(os.path.join(path, dataset + "_train.pkl"))
X_test = pd.read_pickle(os.path.join(path, dataset + "_test.pkl"))
y_test = pd.read_pickle(os.path.join(path, dataset + "_test_label.pkl"))

return dict(X=X_train, y=y_test, X_test=X_test)
335 changes: 258 additions & 77 deletions exploratory/data_exploration.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ def get_one_result(self):
"""
return np.zeros(self.X.shape[0])

def set_data(self, X, y):
def set_data(self, X, y, X_test):
"Set the data to compute the objective."
self.X, self.y = X, y
self.X_test = X_test

def evaluate_result(self, y_hat):
"Evaluate the result provided by the solver."
Expand Down
33 changes: 33 additions & 0 deletions solvers/dif.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Deep Isolation Forest
from benchopt import BaseSolver
from benchopt import safe_import_context

with safe_import_context() as import_ctx:
from pyod.models.dif import DIF


class Solver(BaseSolver):
name = "DIF"

install_cmd = "conda"
requirements = ["pyod"]

parameters = {
"contamination": [0.05, 0.1, 0.2],
}

sampling_strategy = "run_once"

def set_objective(self, X, y, X_test=None):
# y is y_test, the learning is unsupervised
self.X = X
self.X_test = X_test
self.y = y

def run(self, _):
clf = DIF(contamination=self.contamination)
clf.fit(self.X)
self.y_hat = clf.predict(self.X_test)

def get_result(self):
return {"y_hat": self.y_hat}

0 comments on commit 95fc389

Please sign in to comment.