Skip to content

Commit

Permalink
Merge pull request #33 from artefactory/optim
Browse files Browse the repository at this point in the history
Additions:

- TaFeng Dataset
- AssortmentOptimizer class based on Gurobi
- An example on the TaFeng Dataset

Fixes:

- Optimized NLL
- some typos
  • Loading branch information
VincentAuriau authored Mar 5, 2024
2 parents f079757 + 257f9b3 commit 4fbf727
Show file tree
Hide file tree
Showing 6 changed files with 585 additions and 1 deletion.
Binary file added choice_learn/datasets/data/ta_feng.csv.zip
Binary file not shown.
117 changes: 117 additions & 0 deletions choice_learn/datasets/examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""Some datasets used for personal examples."""
import os

import numpy as np
import pandas as pd

from choice_learn.data.choice_dataset import ChoiceDataset

DATA_MODULE = os.path.join(os.path.abspath(".."), "choice_learn", "datasets", "data")


def load_tafeng(as_frame=False, preprocessing=None):
"""Function to load the TaFeng dataset.
Orginal file and informations can be found here:
https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset/
Parameters
----------
as_frame : bool, optional
Whether to return the original file as pd.DF, by default False
preprocessing : str, optional
predefined pre-processing to apply, by default None
Returns:
--------
pd.DF or ChoiceDataset
TaFeng Grocery Dataset.
"""
filepath = os.path.join(DATA_MODULE, "ta_feng.csv.zip")
# url = "https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset/download?datasetVersionNumber=1"
# if not os.path.exists(filepath):
# with urllib.request.urlopen(url) as f:
# file = f.read().decode("utf-8")

tafeng_df = pd.read_csv(filepath)
if as_frame:
return tafeng_df

if preprocessing == "assort_example":
subdf = tafeng_df.loc[tafeng_df.PRODUCT_SUBCLASS == 100505]
prods = subdf.PRODUCT_ID.value_counts().index[
(subdf.PRODUCT_ID.value_counts() > 20).to_numpy()
]
subdf = tafeng_df.loc[tafeng_df.PRODUCT_ID.isin(prods)]
subdf = subdf.dropna()
subdf = subdf.reset_index(drop=True)

# Create Prices
items = list(subdf.PRODUCT_ID.unique())
init_prices = []
for item in items:
first_price = subdf.loc[subdf.PRODUCT_ID == item].SALES_PRICE.to_numpy()[0]
init_prices.append(first_price)

# Encode Age Groups
age_groups = {}
for i, j in enumerate(subdf.AGE_GROUP.unique()):
age_groups[j] = i
age_groups = {
"<25": 0,
"25-29": 0,
"30-34": 0,
"35-39": 1,
"40-44": 1,
"45-49": 1,
"50-54": 2,
"55-59": 2,
"60-64": 2,
">65": 2,
}
age_groups = {
"<25": [1, 0, 0],
"25-29": [0, 1, 0],
"30-34": [0, 1, 0],
"35-39": [0, 1, 0],
"40-44": [0, 1, 0],
"45-49": [0, 1, 0],
"50-54": [0, 0, 1],
"55-59": [0, 0, 1],
"60-64": [0, 0, 1],
">65": [0, 0, 1],
}

all_prices = []
customer_features = []
choices = []

curr_prices = [i for i in init_prices]

for n_row, row in subdf.iterrows():
for _ in range(int(row.AMOUNT)):
item = row.PRODUCT_ID
price = row.SALES_PRICE / row.AMOUNT
age = row.AGE_GROUP

item_index = items.index(item)

# customer_features.append([age_groups[age]])
customer_features.append(age_groups[age])
choices.append(item_index)
curr_prices[item_index] = price
all_prices.append([i for i in curr_prices])

all_prices = np.expand_dims(np.array(all_prices), axis=-1)
customer_features = np.array(customer_features).astype("float32")
choices = np.array(choices)

# Create Dataset
return ChoiceDataset(
contexts_features=customer_features,
choices=choices,
contexts_items_features=all_prices,
contexts_items_availabilities=np.ones((len(choices), 25)).astype("float32"),
)

return tafeng_df
1 change: 1 addition & 0 deletions choice_learn/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,7 @@ def _em_fit(self, dataset, verbose=0):
"""
hist_logits = []
hist_loss = []

# Initialization
for model in self.models:
# model.instantiate()
Expand Down
135 changes: 135 additions & 0 deletions choice_learn/toolbox/assortment_optimizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""Tool function for assortment optimization."""
import gurobipy as gp
import numpy as np

"""TODO: clarify outside good integration
TODO 2: ADD easy integration of additionnal constraints
"""


class AssortmentOptimizer(object):
"""Base class for assortment optimization."""

def __init__(self, utilities, itemwise_values, assortment_size, outside_option_given=False):
"""Initializes the AssortmentOptimizer object.
Parameters
----------
utilities : Iterable
List of utilities for each item.
itemwise_values: Iterable
List of to-be-optimized values for each item, e.g. prices.
assortment_size : int
maximum size of the requested assortment.
outside_option_given : bool
Whether the outside option is given or not (and thus is automatically added).
"""
if len(utilities) != len(itemwise_values):
raise ValueError(
f"You should provide as many utilities as itemwise values.\
Found {len(utilities)} and {len(itemwise_values)} instead."
)
self.outside_option_given = outside_option_given
if not self.outside_option_given:
self.utilities = np.concatenate([[np.exp(0.0)], utilities], axis=0)
self.itemwise_values = np.concatenate([[0.0], itemwise_values], axis=0)
self.n_items = len(self.utilities) - 1
self.assortment_size = assortment_size

self.solver = self.base_instantiate()
self.set_base_constraints()

def base_instantiate(self):
"""Base instantiation of the solver.
Returns:
--------
gurobipy.Model
solver with basic variables and constraints.
"""
# Create a new model
solver = gp.Model("Assortment_IP")
solver.ModelSense = -1
solver.setParam("OutputFlag", False)

# Create variables
y = {}

for j in range(self.n_items + 1):
y[j] = solver.addVar(
vtype=gp.GRB.CONTINUOUS, obj=self.itemwise_values[j], name="y_%s" % j
)
self.y = y
# Integrate new variables
solver.update()

return solver

def set_base_constraints(self):
"""Functions to set LP base constraints.
In particular, ensures Charnes-Cooper transformation constraints
and assortment size constraint.
"""
# Base Charnes-Cooper Constraints for Integers
for j in range(1, self.n_items + 1):
self.solver.addConstr(self.y[j] <= self.y[0])

# Base Charnes-Cooper Constraint for Normalization
charnes_cooper = gp.quicksum(self.y[j] for j in range(self.n_items + 1))
self.solver.addConstr(charnes_cooper == 1)

# Assortment size constraint
if self.assortment_size is not None:
self.solver.addConstr(
gp.quicksum([self.y[j] for j in range(1, self.n_items)])
<= self.assortment_size * self.y[0]
)
self.solver.addConstr(
gp.quicksum([-self.y[j] for j in range(1, self.n_items)])
<= -self.assortment_size * self.y[0]
)

# Integrate constraints
self.solver.update()

def set_objective_function(self, itemwise_values):
"""Function to define the objective function to maximize with the assortment.
Parameters:
-----------
itemwise_values : list-like
List of values for each item - total value to be optimized.
"""
raise NotImplementedError

def add_constraint(self):
"""Function to add constraints."""
raise NotImplementedError

def solve(self):
"""Function to solve the optimization problem.
Returns:
--------
np.ndarray:
Array of 0s and 1s, indicating the presence of each item in the optimal assortment.
"""
self.solver.update()

# -- Optimize --
self.solver.optimize()
self.status = self.solver.Status

if self.outside_option_given:
assortment = np.zeros(self.n_items + 1)
for i in range(0, self.n_items + 1):
if self.y[i].x > 0:
assortment[i - 1] = 1
else:
assortment = np.zeros(self.n_items)
for i in range(1, self.n_items + 1):
if self.y[i].x > 0:
assortment[i] = 1

return assortment, self.solver.objVal
Loading

0 comments on commit 4fbf727

Please sign in to comment.