From 2eaf42780e81083f87ad4d7680930e77eb07200b Mon Sep 17 00:00:00 2001
From: VincentAURIAU <auriau.vincent@gmail.com>
Date: Thu, 26 Dec 2024 23:22:14 +0100
Subject: [PATCH] ENH: efficiency of EM algorithm

---
 choice_learn/models/base_model.py             |  3 +-
 .../models/latent_class_base_model.py         | 31 ++++++++++++++-----
 choice_learn/models/latent_class_mnl.py       | 14 ++++++++-
 3 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/choice_learn/models/base_model.py b/choice_learn/models/base_model.py
index ec6ecd44..d95c0bf5 100644
--- a/choice_learn/models/base_model.py
+++ b/choice_learn/models/base_model.py
@@ -444,7 +444,7 @@ def fit(
         self.callbacks.on_train_end(logs=temps_logs)
         return losses_history
 
-    @tf.function
+    @tf.function(reduce_retracing=True)
     def batch_predict(
         self,
         shared_features_by_choice,
@@ -731,7 +731,6 @@ def f(params_1d):
             # calculate gradients and convert to 1D tf.Tensor
             grads = tape.gradient(loss_value, self.trainable_weights)
             grads = tf.dynamic_stitch(idx, grads)
-            # print out iteration & loss
             f.iter.assign_add(1)
 
             # store loss value so we can retrieve later
diff --git a/choice_learn/models/latent_class_base_model.py b/choice_learn/models/latent_class_base_model.py
index 2f9321af..e1c6d3c6 100644
--- a/choice_learn/models/latent_class_base_model.py
+++ b/choice_learn/models/latent_class_base_model.py
@@ -104,12 +104,18 @@ def instantiate(self, **kwargs):
             name="Latent-Logits",
         )
         self.latent_logits = init_logit
-        self.models = [self.model_class(**mp) for mp in self.model_parameters]
-        for model in self.models:
-            model.instantiate(**kwargs)
 
+        self.models = self.instantiate_latent_models(**kwargs)
         self.instantiated = True
 
+    def instantiate_latent_models(self, **kwargs):
+        """Instantiate latent models."""
+        models = [self.model_class(**mp) for mp in self.model_parameters]
+        for model in models:
+            model.instantiate(**kwargs)
+
+        return models
+
     # @tf.function
     def batch_predict(
         self,
@@ -824,7 +830,7 @@ def _expectation(self, choice_dataset):
         )
 
         return tf.clip_by_value(
-            predicted_probas / np.sum(predicted_probas, axis=1, keepdims=True), 1e-10, 1
+            predicted_probas / np.sum(predicted_probas, axis=1, keepdims=True), 1e-6, 1
         ), loss
 
     def _maximization(self, choice_dataset, verbose=0):
@@ -842,10 +848,17 @@ def _maximization(self, choice_dataset, verbose=0):
         np.ndarray
             latent probabilities resulting of maximization step
         """
-        self.models = [self.model_class(**mp) for mp in self.model_parameters]
+        # models = [self.model_class(**mp) for mp in self.model_parameters]
+        # for i in range(len(models)):
+        #     for j, var in enumerate(self.models[i].trainable_weights):
+        #         models[i]._trainable_weights[j] = var
+        # self.instantiate_latent_models(choice_dataset)
+
         # M-step: MNL estimation
         for q in range(self.n_latent_classes):
-            self.models[q].fit(choice_dataset, sample_weight=self.weights[:, q], verbose=verbose)
+            self.models[q].fit(
+                choice_dataset, sample_weight=self.weights[:, q].numpy(), verbose=verbose
+            )
 
         # M-step: latent probability estimation
         latent_probas = np.sum(self.weights, axis=0)
@@ -876,7 +889,9 @@ def _em_fit(self, choice_dataset, sample_weight=None, verbose=0):
 
         # Initialization
         init_sample_weight = np.random.rand(self.n_latent_classes, len(choice_dataset))
-        init_sample_weight = init_sample_weight / np.sum(init_sample_weight, axis=0, keepdims=True)
+        init_sample_weight = np.clip(
+            init_sample_weight / np.sum(init_sample_weight, axis=0, keepdims=True), 1e-6, 1
+        )
         for i, model in enumerate(self.models):
             # model.instantiate()
             model.fit(choice_dataset, sample_weight=init_sample_weight[i], verbose=verbose)
@@ -888,7 +903,7 @@ def _em_fit(self, choice_dataset, sample_weight=None, verbose=0):
             if np.sum(np.isnan(self.latent_logits)) > 0:
                 print("Nan in logits")
                 break
-        return hist_logits, hist_loss
+        return hist_loss, hist_logits
 
     def predict_probas(self, choice_dataset, batch_size=-1):
         """Predicts the choice probabilities for each choice and each product of a ChoiceDataset.
diff --git a/choice_learn/models/latent_class_mnl.py b/choice_learn/models/latent_class_mnl.py
index 1c5b6b83..c53e0478 100644
--- a/choice_learn/models/latent_class_mnl.py
+++ b/choice_learn/models/latent_class_mnl.py
@@ -4,6 +4,8 @@
 
 import tensorflow as tf
 
+import choice_learn.tf_ops as tf_ops
+
 from .conditional_logit import ConditionalLogit, MNLCoefficients
 from .latent_class_base_model import BaseLatentClassModel
 from .simple_mnl import SimpleMNL
@@ -23,6 +25,7 @@ def __init__(
         intercept=None,
         optimizer="Adam",
         lr=0.001,
+        epochs_maximization=1000,
         **kwargs,
     ):
         """Initialize model.
@@ -56,7 +59,7 @@ def __init__(
             "batch_size": batch_size,
             "lbfgs_tolerance": lbfgs_tolerance,
             "lr": lr,
-            "epochs": 1000,
+            "epochs": epochs_maximization,
         }
 
         super().__init__(
@@ -88,6 +91,15 @@ def instantiate_latent_models(self, n_items, n_shared_features, n_items_features
             model.indexes, model.weights = model.instantiate(
                 n_items, n_shared_features, n_items_features
             )
+            model.exact_nll = tf_ops.CustomCategoricalCrossEntropy(
+                from_logits=False,
+                label_smoothing=0.0,
+                sparse=False,
+                axis=-1,
+                epsilon=1e-25,
+                name="exact_categorical_crossentropy",
+                reduction="sum_over_batch_size",
+            )
             model.instantiated = True
 
     def instantiate(self, n_items, n_shared_features, n_items_features):