diff --git a/choice_learn/datasets/base.py b/choice_learn/datasets/base.py index d6da3e1..b44c2be 100644 --- a/choice_learn/datasets/base.py +++ b/choice_learn/datasets/base.py @@ -4,6 +4,7 @@ import gzip import os from importlib import resources +from pathlib import Path import numpy as np import pandas as pd @@ -12,7 +13,7 @@ from choice_learn.data.choice_dataset import ChoiceDataset OS_DATA_MODULE = os.path.join(os.path.abspath(".."), "choice_learn", "datasets", "data") -DATA_MODULE = "choice_learn.datasets.data" +DATA_MODULE = "choice_learn/datasets/data" def get_path(data_file_name, module=DATA_MODULE): @@ -35,10 +36,12 @@ def get_path(data_file_name, module=DATA_MODULE): import sys if sys.version >= "3.9": - return resources.files(module) / data_file_name + return resources.files(module.replace("/", ".")) / data_file_name - with resources.path(module, data_file_name) as path: - return path + # with resources.path(module, data_file_name) as path: + # return path + path = Path(module).resolve() / data_file_name + return path.as_posix() def load_csv(data_file_name, data_module=DATA_MODULE, encoding="utf-8"): @@ -140,7 +143,7 @@ def download_from_url(url): full_path = get_path(local_filename, module=DATA_MODULE) # Check that the file is not already downloaded in the DATA_MODULE directory - if not os.path.isfile(full_path): + if not Path.is_file(Path(full_path)): print(f"Downloading {local_filename} from {url}") try: with requests.get(url, stream=True, timeout=20) as r: @@ -152,7 +155,7 @@ def download_from_url(url): print(f"Couldn't download automatically the dataset from {url}") # Move the downloaded file to the DATA_MODULE directory - os.rename(local_filename, full_path) + Path(local_filename).rename(full_path) print(f"Download completed. File saved as {local_filename} in {full_path}") return local_filename diff --git a/choice_learn/datasets/expedia.py b/choice_learn/datasets/expedia.py index e081d16..eae39c7 100644 --- a/choice_learn/datasets/expedia.py +++ b/choice_learn/datasets/expedia.py @@ -10,8 +10,8 @@ from choice_learn.data.storage import OneHotStorage from choice_learn.datasets.base import get_path -DATA_MODULE = "choice_learn.datasets.data" -CACHE_MODULE = "choice_learn.datasets.cache" +DATA_MODULE = "choice_learn/datasets/data" +CACHE_MODULE = "choice_learn/datasets/cache" def load_expedia(as_frame=False, preprocessing="rumnet"): @@ -26,7 +26,7 @@ def load_expedia(as_frame=False, preprocessing="rumnet"): """ filename = "expedia.csv" data_path = get_path(filename, module=DATA_MODULE) - if not Path.exists(data_path): + if not Path.exists(Path(data_path)): print("In order to use the Expedia dataset, please download it from:") print("https://www.kaggle.com/c/expedia-personalized-sort") print("and save it in the following location:") diff --git a/choice_learn/datasets/tafeng.py b/choice_learn/datasets/tafeng.py index 32c4f7d..bd1c991 100644 --- a/choice_learn/datasets/tafeng.py +++ b/choice_learn/datasets/tafeng.py @@ -6,7 +6,7 @@ from choice_learn.data.choice_dataset import ChoiceDataset from choice_learn.datasets.base import get_path -DATA_MODULE = "choice_learn.datasets.data" +DATA_MODULE = "choice_learn/datasets/data" def load_tafeng(as_frame=False, return_desc=False, preprocessing=None): diff --git a/tests/integration_tests/models/test_reslogit.py b/tests/integration_tests/models/test_reslogit.py index a1eeaa0..0c0996a 100644 --- a/tests/integration_tests/models/test_reslogit.py +++ b/tests/integration_tests/models/test_reslogit.py @@ -1,12 +1,14 @@ """Tests ResLogit.""" import numpy as np +import pytest import tensorflow as tf from choice_learn.datasets import load_swissmetro # from choice_learn.models import ResLogit, SimpleMNL from choice_learn.models import ResLogit +from choice_learn.models.reslogit import ResLayer dataset = load_swissmetro() dataset = dataset[:100] # Reduce the dataset size for faster testing @@ -352,3 +354,12 @@ def test_that_endpoints_run(): model.evaluate(dataset, mode="optim") model.predict_probas(dataset) assert True + + +def test_activation(): + """Tests ResLogit activation.""" + layer = ResLayer() + for act in ["linear", "relu", "-relu", "softplus", "tanh", "sigmoid"]: + _ = layer.get_activation_function(act) + with pytest.raises(ValueError): + layer.get_activation_function("abc") diff --git a/tests/unit_tests/datasets/test_download.py b/tests/unit_tests/datasets/test_download.py new file mode 100644 index 0000000..c920561 --- /dev/null +++ b/tests/unit_tests/datasets/test_download.py @@ -0,0 +1,9 @@ +"""Test the file downloading function.""" + +from choice_learn.datasets.base import download_from_url + + +def test_download(): + """Tests downloading a dummy csv file.""" + url = "https://github.com/artefactory/choice-learn/blob/main/tests/data/test_data.csv" + download_from_url(url) diff --git a/tests/unit_tests/models/test_tastenet.py b/tests/unit_tests/models/test_tastenet.py new file mode 100644 index 0000000..bab9b70 --- /dev/null +++ b/tests/unit_tests/models/test_tastenet.py @@ -0,0 +1,146 @@ +"""Simple tests for the TasteNet model.""" + +import pytest + +from choice_learn.datasets import load_swissmetro +from choice_learn.models import TasteNet + +customers_id, dataset = load_swissmetro(preprocessing="tastenet", as_frame=False) +dataset = dataset[:20] + +taste_net_layers = [] +taste_net_activation = "relu" +items_features_by_choice_parametrization = [ + [-1.0, "-exp", "-exp", 0.0, "linear", 0.0, 0.0], + [-1.0, "-exp", "-exp", "linear", 0.0, "linear", 0.0], + [-1.0, "-exp", 0.0, 0.0, 0.0, 0.0, 0.0], +] + + +def test_activation(): + """Tests TasteNet activation.""" + tastenet = TasteNet( + taste_net_layers=taste_net_layers, + taste_net_activation=taste_net_activation, + items_features_by_choice_parametrization=items_features_by_choice_parametrization, + optimizer="Adam", + epochs=40, + lr=0.001, + batch_size=32, + ) + for act in ["linear", "relu", "-relu", "exp", "-exp", "tanh", "sigmoid"]: + _ = tastenet.get_activation_function(act) + with pytest.raises(ValueError): + tastenet.get_activation_function("abc") + + +def test_fit_adam_and_predict(): + """Test fit with Gradient Descent.""" + tastenet = TasteNet( + taste_net_layers=taste_net_layers, + taste_net_activation=taste_net_activation, + items_features_by_choice_parametrization=items_features_by_choice_parametrization, + optimizer="Adam", + epochs=5, + lr=0.001, + batch_size=32, + ) + _ = tastenet.fit(dataset) + _ = tastenet.predict_tastes(dataset.shared_features_by_choice) + assert True + + +def test_fit_lbfgs(): + """Test fit with Gradient Descent.""" + tastenet = TasteNet( + taste_net_layers=[4], + taste_net_activation=taste_net_activation, + items_features_by_choice_parametrization=items_features_by_choice_parametrization, + optimizer="lbfgs", + epochs=5, + lr=0.001, + batch_size=32, + ) + _ = tastenet.fit(dataset) + assert True + + +def test_errors_raised(): + """Test diverse errors that should be raised.""" + with pytest.raises(ValueError): + tastenet = TasteNet( + taste_net_layers=taste_net_layers, + taste_net_activation=taste_net_activation, + items_features_by_choice_parametrization=[ + [-1.0, "-exp", "-exp", 0.0, "linear", 0.0, 0.0], + [-1.0, "-exp", "-exp", "linear", 0.0, "linear", 0.0], + ], + optimizer="Adam", + epochs=5, + lr=0.001, + batch_size=32, + ) + _ = tastenet.fit(dataset) + + with pytest.raises(ValueError): + tastenet = TasteNet( + taste_net_layers=taste_net_layers, + taste_net_activation=taste_net_activation, + items_features_by_choice_parametrization=[ + [-1.0, "-exp", "-exp", 0.0, "linear", 0.0, 0.0, 0.0], + [-1.0, "-exp", "-exp", "linear", 0.0, "linear", 0.0, 0.0], + [-1.0, "-exp", 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + ], + optimizer="Adam", + epochs=5, + lr=0.001, + batch_size=32, + ) + _ = tastenet.fit(dataset) + + with pytest.raises(ValueError): + tastenet = TasteNet( + taste_net_layers=taste_net_layers, + taste_net_activation=taste_net_activation, + items_features_by_choice_parametrization=[ + [-1.0, "-exp", "-exp", 0.0, "linear", 0.0, 0.0], + [-1.0, "-exp", "-exp", "linear", 0.0, "linear", 0.0], + ], + optimizer="lbfgs", + epochs=5, + lr=0.001, + batch_size=32, + ) + _ = tastenet.fit(dataset) + + with pytest.raises(ValueError): + tastenet = TasteNet( + taste_net_layers=taste_net_layers, + taste_net_activation=taste_net_activation, + items_features_by_choice_parametrization=[ + [-1.0, "-exp", "-exp", 0.0, "linear", 0.0, 0.0, 0.0], + [-1.0, "-exp", "-exp", "linear", 0.0, "linear", 0.0, 0.0], + [-1.0, "-exp", 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + ], + optimizer="lbfgs", + epochs=5, + lr=0.001, + batch_size=32, + ) + _ = tastenet.fit(dataset) + + with pytest.raises(ValueError): + tastenet = TasteNet( + taste_net_layers=taste_net_layers, + taste_net_activation=taste_net_activation, + items_features_by_choice_parametrization=[ + [-1.0, "-exp", "-exp", 0.0, "linear", 0.0, 0.0], + [-1.0, "-exp", "-exp", "linear", 0.0, "linear", 0.0, 0.0], + [-1.0, "-exp", 0.0, 0.0, 0.0, 0.0, 0.0], + ], + optimizer="lbfgs", + epochs=5, + lr=0.001, + batch_size=32, + ) + _ = tastenet.fit(dataset)