-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #33 from artefactory/optim
Additions: - TaFeng Dataset - AssortmentOptimizer class based on Gurobi - An example on the TaFeng Dataset Fixes: - Optimized NLL - some typos
- Loading branch information
Showing
6 changed files
with
585 additions
and
1 deletion.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
"""Some datasets used for personal examples.""" | ||
import os | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
from choice_learn.data.choice_dataset import ChoiceDataset | ||
|
||
DATA_MODULE = os.path.join(os.path.abspath(".."), "choice_learn", "datasets", "data") | ||
|
||
|
||
def load_tafeng(as_frame=False, preprocessing=None): | ||
"""Function to load the TaFeng dataset. | ||
Orginal file and informations can be found here: | ||
https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset/ | ||
Parameters | ||
---------- | ||
as_frame : bool, optional | ||
Whether to return the original file as pd.DF, by default False | ||
preprocessing : str, optional | ||
predefined pre-processing to apply, by default None | ||
Returns: | ||
-------- | ||
pd.DF or ChoiceDataset | ||
TaFeng Grocery Dataset. | ||
""" | ||
filepath = os.path.join(DATA_MODULE, "ta_feng.csv.zip") | ||
# url = "https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset/download?datasetVersionNumber=1" | ||
# if not os.path.exists(filepath): | ||
# with urllib.request.urlopen(url) as f: | ||
# file = f.read().decode("utf-8") | ||
|
||
tafeng_df = pd.read_csv(filepath) | ||
if as_frame: | ||
return tafeng_df | ||
|
||
if preprocessing == "assort_example": | ||
subdf = tafeng_df.loc[tafeng_df.PRODUCT_SUBCLASS == 100505] | ||
prods = subdf.PRODUCT_ID.value_counts().index[ | ||
(subdf.PRODUCT_ID.value_counts() > 20).to_numpy() | ||
] | ||
subdf = tafeng_df.loc[tafeng_df.PRODUCT_ID.isin(prods)] | ||
subdf = subdf.dropna() | ||
subdf = subdf.reset_index(drop=True) | ||
|
||
# Create Prices | ||
items = list(subdf.PRODUCT_ID.unique()) | ||
init_prices = [] | ||
for item in items: | ||
first_price = subdf.loc[subdf.PRODUCT_ID == item].SALES_PRICE.to_numpy()[0] | ||
init_prices.append(first_price) | ||
|
||
# Encode Age Groups | ||
age_groups = {} | ||
for i, j in enumerate(subdf.AGE_GROUP.unique()): | ||
age_groups[j] = i | ||
age_groups = { | ||
"<25": 0, | ||
"25-29": 0, | ||
"30-34": 0, | ||
"35-39": 1, | ||
"40-44": 1, | ||
"45-49": 1, | ||
"50-54": 2, | ||
"55-59": 2, | ||
"60-64": 2, | ||
">65": 2, | ||
} | ||
age_groups = { | ||
"<25": [1, 0, 0], | ||
"25-29": [0, 1, 0], | ||
"30-34": [0, 1, 0], | ||
"35-39": [0, 1, 0], | ||
"40-44": [0, 1, 0], | ||
"45-49": [0, 1, 0], | ||
"50-54": [0, 0, 1], | ||
"55-59": [0, 0, 1], | ||
"60-64": [0, 0, 1], | ||
">65": [0, 0, 1], | ||
} | ||
|
||
all_prices = [] | ||
customer_features = [] | ||
choices = [] | ||
|
||
curr_prices = [i for i in init_prices] | ||
|
||
for n_row, row in subdf.iterrows(): | ||
for _ in range(int(row.AMOUNT)): | ||
item = row.PRODUCT_ID | ||
price = row.SALES_PRICE / row.AMOUNT | ||
age = row.AGE_GROUP | ||
|
||
item_index = items.index(item) | ||
|
||
# customer_features.append([age_groups[age]]) | ||
customer_features.append(age_groups[age]) | ||
choices.append(item_index) | ||
curr_prices[item_index] = price | ||
all_prices.append([i for i in curr_prices]) | ||
|
||
all_prices = np.expand_dims(np.array(all_prices), axis=-1) | ||
customer_features = np.array(customer_features).astype("float32") | ||
choices = np.array(choices) | ||
|
||
# Create Dataset | ||
return ChoiceDataset( | ||
contexts_features=customer_features, | ||
choices=choices, | ||
contexts_items_features=all_prices, | ||
contexts_items_availabilities=np.ones((len(choices), 25)).astype("float32"), | ||
) | ||
|
||
return tafeng_df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
"""Tool function for assortment optimization.""" | ||
import gurobipy as gp | ||
import numpy as np | ||
|
||
"""TODO: clarify outside good integration | ||
TODO 2: ADD easy integration of additionnal constraints | ||
""" | ||
|
||
|
||
class AssortmentOptimizer(object): | ||
"""Base class for assortment optimization.""" | ||
|
||
def __init__(self, utilities, itemwise_values, assortment_size, outside_option_given=False): | ||
"""Initializes the AssortmentOptimizer object. | ||
Parameters | ||
---------- | ||
utilities : Iterable | ||
List of utilities for each item. | ||
itemwise_values: Iterable | ||
List of to-be-optimized values for each item, e.g. prices. | ||
assortment_size : int | ||
maximum size of the requested assortment. | ||
outside_option_given : bool | ||
Whether the outside option is given or not (and thus is automatically added). | ||
""" | ||
if len(utilities) != len(itemwise_values): | ||
raise ValueError( | ||
f"You should provide as many utilities as itemwise values.\ | ||
Found {len(utilities)} and {len(itemwise_values)} instead." | ||
) | ||
self.outside_option_given = outside_option_given | ||
if not self.outside_option_given: | ||
self.utilities = np.concatenate([[np.exp(0.0)], utilities], axis=0) | ||
self.itemwise_values = np.concatenate([[0.0], itemwise_values], axis=0) | ||
self.n_items = len(self.utilities) - 1 | ||
self.assortment_size = assortment_size | ||
|
||
self.solver = self.base_instantiate() | ||
self.set_base_constraints() | ||
|
||
def base_instantiate(self): | ||
"""Base instantiation of the solver. | ||
Returns: | ||
-------- | ||
gurobipy.Model | ||
solver with basic variables and constraints. | ||
""" | ||
# Create a new model | ||
solver = gp.Model("Assortment_IP") | ||
solver.ModelSense = -1 | ||
solver.setParam("OutputFlag", False) | ||
|
||
# Create variables | ||
y = {} | ||
|
||
for j in range(self.n_items + 1): | ||
y[j] = solver.addVar( | ||
vtype=gp.GRB.CONTINUOUS, obj=self.itemwise_values[j], name="y_%s" % j | ||
) | ||
self.y = y | ||
# Integrate new variables | ||
solver.update() | ||
|
||
return solver | ||
|
||
def set_base_constraints(self): | ||
"""Functions to set LP base constraints. | ||
In particular, ensures Charnes-Cooper transformation constraints | ||
and assortment size constraint. | ||
""" | ||
# Base Charnes-Cooper Constraints for Integers | ||
for j in range(1, self.n_items + 1): | ||
self.solver.addConstr(self.y[j] <= self.y[0]) | ||
|
||
# Base Charnes-Cooper Constraint for Normalization | ||
charnes_cooper = gp.quicksum(self.y[j] for j in range(self.n_items + 1)) | ||
self.solver.addConstr(charnes_cooper == 1) | ||
|
||
# Assortment size constraint | ||
if self.assortment_size is not None: | ||
self.solver.addConstr( | ||
gp.quicksum([self.y[j] for j in range(1, self.n_items)]) | ||
<= self.assortment_size * self.y[0] | ||
) | ||
self.solver.addConstr( | ||
gp.quicksum([-self.y[j] for j in range(1, self.n_items)]) | ||
<= -self.assortment_size * self.y[0] | ||
) | ||
|
||
# Integrate constraints | ||
self.solver.update() | ||
|
||
def set_objective_function(self, itemwise_values): | ||
"""Function to define the objective function to maximize with the assortment. | ||
Parameters: | ||
----------- | ||
itemwise_values : list-like | ||
List of values for each item - total value to be optimized. | ||
""" | ||
raise NotImplementedError | ||
|
||
def add_constraint(self): | ||
"""Function to add constraints.""" | ||
raise NotImplementedError | ||
|
||
def solve(self): | ||
"""Function to solve the optimization problem. | ||
Returns: | ||
-------- | ||
np.ndarray: | ||
Array of 0s and 1s, indicating the presence of each item in the optimal assortment. | ||
""" | ||
self.solver.update() | ||
|
||
# -- Optimize -- | ||
self.solver.optimize() | ||
self.status = self.solver.Status | ||
|
||
if self.outside_option_given: | ||
assortment = np.zeros(self.n_items + 1) | ||
for i in range(0, self.n_items + 1): | ||
if self.y[i].x > 0: | ||
assortment[i - 1] = 1 | ||
else: | ||
assortment = np.zeros(self.n_items) | ||
for i in range(1, self.n_items + 1): | ||
if self.y[i].x > 0: | ||
assortment[i] = 1 | ||
|
||
return assortment, self.solver.objVal |
Oops, something went wrong.