Skip to content

Commit

Permalink
WIP Implementing Mehestan, last missing step of the pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
lenhoanglnh committed Jan 19, 2024
1 parent 78c3b05 commit efdaa39
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 4 deletions.
2 changes: 1 addition & 1 deletion solidago/src/solidago/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class DefaultPipeline:
scaling: Scaling = ScalingCompose(
Mehestan(
lipschitz=0.1,
min_comparison=10,
min_n_judged_entities=10,
n_scalers_max=1000,
error=1e-5
),
Expand Down
131 changes: 131 additions & 0 deletions solidago/src/solidago/scaling/mehestan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import numpy as np
import pandas as pd

from . import Scaling

from solidago.privacy_settings import PrivacySettings
from solidago.scoring_model import ScoringModel


class Mehestan(Scaling):
def __init__(self, lipschitz=0.1, min_n_judged_entities=10, n_scalers_max=1000, error=1e-5):
""" Mehestan performs Lipschitz-resilient ollaborative scaling.
See "Robust Sparse Voting", Youssef Allouah, Rachid Guerraoui, Lȩ Nguyên Hoang
and Oscar Villemaud, published at AISTATS 2024.
Parameters
----------
lipschitz: float
Resilience parameters. Larger values are more resilient, but less accurate.
min_n_comparison: float
Minimal number of comparisons to be a potential scaling-calibration user
n_scalers_max: int
Maximal number of scaling-calibration users
error: float
Error bound
"""
self.lipschitz = lipschitz
self.min_n_judged_entities = min_n_judged_entities
self.n_scalers_max = n_scalers_max
self.error = error

def compute_n_judged_entities(self, user_models, users, entities, voting_rights):
results = np.zeros(len(users))
for user in user_models:
scored_entities = user_models.scored_entities(entities)
for entity in scored_entities:
output = user_models[user](entity, entities.loc[entity])
if output is not None:
results[user] += voting_rights[user, entity]
return results

def compute_scalers(self, n_judged_entities):
argsort = np.argsort(n_judged_entities)
is_scaler = np.array([False] * len(n_judged_entities))
for user in range(min(self.n_scalers_max, len(n_judged_entities))):
if n_judged_entities[argsort[-user]] < self.min_n_judged_entities:
break
is_scaler[argsort[-user]] = True
return is_scaler

def compute_score_diffs(self, user_models, users, entities):
score_diffs = list()
for user in user_models:
score_diffs.append(dict())
scored_entities = user_models.scored_entities(entities)
for index, a in enumerate(scored_entities):
for b in scored_entities[index + 1:]:
score_a, left_a, right_a = user_models[user](a, entities.loc[a])
score_b, left_b, right_b = user_models[user](b, entities.loc[b])
if score_a - score_b >= left_a + right_b:
if a not in score_diffs[user]:
score_diffs[user][a] = dict()
score_diffs[user][a][b] = (
score_a - score_b,
score_a - score_b - left_a - right_b,
score_a - score_b + right_a + left_b
)
if score_b - score_a >= left_b + right_a:
if a not in score_diffs[user]:
score_diffs[user][a] = dict()
score_diffs[user][a][b] = (
score_b - score_a,
score_b - score_a - left_b - right_a,
score_b - score_a + right_b + left_a
)
return score_diffs

def scale_scalers(self, user_models, scalers, entities, voting_rights, score_diffs):
scaled_models = dict()
raise NotImplementedError
return scaled_models

def scale_non_scalers(self, user_models, non_scalers, entities,
voting_rights, scalers, scaled_models, pairs):
scaled_models = dict()
raise NotImplementedError
return scaled_models

def __call__(
self,
user_models: dict[int, ScoringModel],
users: pd.DataFrame,
entities: pd.DataFrame,
voting_rights: VotingRights,
privacy: PrivacySettings
) -> dict[int, ScoringModel]:
""" Returns scaled user models
Parameters
----------
user_models: dict[int, ScoringModel]
user_models[user] is user's scoring model
users: DataFrame with columns
* user_id (int, index)
* trust_score (float)
entities: DataFrame with columns
* entity_id (int, ind)
voting_rights: VotingRights
voting_rights[user, entity]: float
privacy: PrivacySettings
privacy[user, entity] in { True, False, None }
Returns
-------
out[user]: ScoringModel
Will be scaled by the Scaling method
"""
n_judged_entities = self.compute_n_judged_entities(user_models,
users, entities, voting_rights)
users.assign(is_scaler=self.compute_scalers(n_judged_entities))
scalers = users[users["is_scaler"]]
non_scalers = users[not users["is_scaler"]]

score_diffs = self.compute_score_diffs(user_models, users, entities)

scaled_models = scale_scalers(user_models, scalers, entities, voting_rights, pairs)
scaled_models = scale_non_scalers(user_models, non_scalers, entities,
voting_rights, scalers, scaled_models, pairs)

return scaled_models

23 changes: 20 additions & 3 deletions solidago/src/solidago/scoring_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,22 @@ def __call__(
out: (score, left_uncertainty, right_uncertainty) or None
"""
raise NotImplementedError

def scored_entities(self, entities) -> set[int]:
""" If not None, then the scoring model only scores a subset of entities. """
return set(range(len(entities)))


def DirectScoringModel(ScoringModel):
def __init__(self, dct: dict[int, tuple[float, float, float]]=dict()):
self.dct = dct
self._dct = dct

def __call__(self, entity_id: int, entity_features=None) -> Optional[float]:
""" Returns both score and uncertainty
"""
if entity_id not in self.dct:
return None
return self.dct[entity_id]
return self._dct[entity_id]

def __getitem__(self, entity_id: int) -> Optional[tuple[float. float]]:
return self(entity_id)
Expand All @@ -43,7 +48,13 @@ def __setitem__(self, entity_id: int, score_and_uncertainties: tuple[float, floa
score_and_uncertainties[1],
score_and_uncertainties[1]
)
self.scores[entity_id] = score_and_uncertainties
self._dct[entity_id] = score_and_uncertainties

def scored_entities(self, entities=None) -> set[int]:
if entities is None:
return set(self._dct.keys())
return set(range(len(entities))).intersection(set(self._dct.keys()))


def ScaledScoringModel(ScoringModel):
def __init__(
Expand All @@ -62,6 +73,10 @@ def __call__(self, entity_id, entity_features):
left = self.multiplicative_scale * base_left
right = self.multiplicative_scale * base_right
return score, left, right

def scored_entities(self) -> set[int]:
return self.base_model.scored_entities()


def PostProcessedScoringModel(ScoringModel):
def __init__(self, base_model: ScoringModel, post_process: callable):
Expand All @@ -88,3 +103,5 @@ def __call__(self, entity_id, entity_features):
right = - temp
return score, left, right

def scored_entities(self) -> set[int]:
return self.base_model.scored_entities()

0 comments on commit efdaa39

Please sign in to comment.