Skip to content

Commit

Permalink
Voting rights assignment debugged!
Browse files Browse the repository at this point in the history
Next is the biggest challenge: Mehestan!
  • Loading branch information
lenhoanglnh committed Jan 10, 2025
1 parent a8468bd commit b1d1537
Show file tree
Hide file tree
Showing 176 changed files with 13,133 additions and 3,277 deletions.
7 changes: 1 addition & 6 deletions solidago/experiments/toy.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,11 @@

# s = State.load("tests/pipeline/saved")
pipeline= Sequential.load("tests/pipeline/test_pipeline.json")
states = [ State.load(f"tests/pipeline/saved_{seed}") for seed in range(5) ]
states = [ State.load(f"tests/pipeline/saved/{seed}") for seed in range(5) ]

for seed in range(5):
s = states[seed]
s = pipeline.trust_propagation.state2state_function(s, save_directory=f"tests/pipeline/saved_{seed}")
s = pipeline.preference_learning.state2state_function(s, save_directory=f"tests/pipeline/saved_{seed}")

# s = pipeline(s, "tests/pipeline/saved")

# s = pipeline.voting_rights.state2state_function(s, save_directory="tests/pipeline/saved")
# s = pipeline.scaling.state2state_function(s, save_directory="tests/pipeline/saved")
# s = pipeline.aggregation.state2state_function(s, save_directory="tests/pipeline/saved")
# s = pipeline.post_process.state2state_function(s, save_directory="tests/pipeline/saved")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def __init__(self,
which is exp(uncertainty_nll_increase) times lower than score.
max_uncertainty: float=1e3
Replaces infinite uncertainties with max_uncertainty
last_comparison_only: bool=True
Ignores all comparisons between two entities prior to the last provided.
"""
self.prior_std_dev = prior_std_dev
self.uncertainty_nll_increase = uncertainty_nll_increase
Expand Down Expand Up @@ -281,6 +283,8 @@ def __init__(self,
which is exp(uncertainty_nll_increase) times lower than score.
max_uncertainty: float=1e3
Replaces infinite uncertainties with max_uncertainty
last_comparison_only: bool=True
Ignores all comparisons between two entities prior to the last provided.
"""
super().__init__(
prior_std_dev=prior_std_dev,
Expand All @@ -296,16 +300,15 @@ def cumulant_generating_function(self, score_diffs: npt.NDArray) -> npt.NDArray:
or where it is large (because sinh explodes).
"""
score_diffs_abs = np.abs(score_diffs)
with np.errstate(all='ignore'):
return np.where(
score_diffs_abs > 1e-1,
np.where(
score_diffs_abs < 20.0,
np.log(np.sinh(score_diffs) / score_diffs),
score_diffs_abs - np.log(2) - np.log(score_diffs_abs),
),
score_diffs_abs ** 2 / 6 - score_diffs_abs ** 4 / 180,
)
return np.where(
score_diffs_abs > 1e-1,
np.where(
score_diffs_abs < 20.0,
np.log(np.sinh(score_diffs) / score_diffs),
score_diffs_abs - np.log(2) - np.log(score_diffs_abs),
),
score_diffs_abs ** 2 / 6 - score_diffs_abs ** 4 / 180,
)

def cumulant_generating_function_derivative(self, score_diffs: npt.NDArray) -> npt.NDArray:
""" The cgf derivative of UniformGBT is simply
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def __init__(self,
Replaces infinite uncertainties with max_uncertainty
max_iter: int=100
Maximal number of iterations used
last_comparison_only: bool=True
Ignores all comparisons between two entities prior to the last provided.
"""
super().__init__(
prior_std_dev=prior_std_dev,
Expand Down Expand Up @@ -153,9 +155,30 @@ def __init__(self,
device: torch.device=default_device,
last_comparison_only: bool=True,
):
"""
Parameters (TODO)
""" Generalized Bradley Terry with a uniform root law is a straightforward
instance of the models introduced in the paper "Generalized Bradley-Terry
Models for Score Estimation from Paired Comparisons" by Julien Fageot,
Sadegh Farhadkhani, Lê-Nguyên Hoang and Oscar Villemaud, and published at AAAI'24.
This implementation leverages Limited-memory Broyden-Fletcher-Goldfarb-Shanno
(LBFGS) algorithm, a second-order quasi-Newton method with limited demands
of computer memory. In particular we use its pytorch implementation.
Parameters
----------
prior_std_dev: float=7.0
Typical scale of scores.
Technical, it should be the standard deviation of the gaussian prior.
convergence_error: float=1e-5
Admissible error in score computations (obtained through optimization).
high_likelihood_range_threshold: float=1.0
To determine the uncertainty, we compute left_unc (respectively, right_unc)
such that score - left_unc (respectively, + right_unc) has a likelihood
which is exp(high_likelihood_range_threshold) times lower than score.
max_uncertainty: float=1e3
Replaces infinite uncertainties with max_uncertainty
last_comparison_only: bool=True
Ignores all comparisons between two entities prior to the last provided.
"""
super().__init__(
prior_std_dev=prior_std_dev,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ def __init__(self,
which is exp(high_likelihood_range_threshold) times lower than score.
max_uncertainty: float=1e3
Replaces infinite uncertainties with max_uncertainty
last_comparison_only: bool=True
Ignores all comparisons between two entities prior to the last provided.
"""
super().__init__(
prior_std_dev=prior_std_dev,
Expand Down Expand Up @@ -115,7 +117,7 @@ def partial_derivative(self) -> Callable[[int, np.ndarray[np.float64], dict, dic
prior_var = self.prior_std_dev**2
cfg_deriv = self.cumulant_generating_function_derivative

# @njit
@njit
def njit_partial_derivative(
entity_index: int,
scores: float,
Expand All @@ -138,10 +140,29 @@ def __init__(self,
convergence_error: float=1e-5,
last_comparison_only: bool=True,
):
"""
Parameters (TODO)
""" Generalized Bradley Terry with a uniform root law is a straightforward
instance of the models introduced in the paper "Generalized Bradley-Terry
Models for Score Estimation from Paired Comparisons" by Julien Fageot,
Sadegh Farhadkhani, Lê-Nguyên Hoang and Oscar Villemaud, and published at AAAI'24.
This implementation leverages coordinate descent, and makes heavy use of numba
to accelerate the computations.
Parameters
----------
prior_std_dev: float=7.0
Typical scale of scores.
Technical, it should be the standard deviation of the gaussian prior.
convergence_error: float=1e-5
Admissible error in score computations (obtained through optimization).
high_likelihood_range_threshold: float=1.0
To determine the uncertainty, we compute left_unc (respectively, right_unc)
such that score - left_unc (respectively, + right_unc) has a likelihood
which is exp(high_likelihood_range_threshold) times lower than score.
max_uncertainty: float=1e3
Replaces infinite uncertainties with max_uncertainty
last_comparison_only: bool=True
Ignores all comparisons between two entities prior to the last provided.
"""
super().__init__(
prior_std_dev=prior_std_dev,
Expand All @@ -160,13 +181,12 @@ def cumulant_generating_function_derivative(self) -> Callable[[npt.NDArray], npt
and as it must be njit to be used by coordinate_descent,
we write it as a cached property njit function.
"""
# @njit
@njit
def njit_cumulant_generating_function_derivative(score_diffs: npt.NDArray):
with np.errstate(all='ignore'):
return np.where(
np.abs(score_diffs) < 1e-2,
score_diffs / 3,
1 / np.tanh(score_diffs) - 1 / score_diffs,
)
return np.where(
np.abs(score_diffs) < 1e-2,
score_diffs / 3,
1 / np.tanh(score_diffs) - 1 / score_diffs,
)

return njit_cumulant_generating_function_derivative
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,23 @@ def __call__(self,
voting_rights = VotingRights()
assessments = assessments.reorder_keys(["criterion", "entity_name", "username"])
comparisons = comparisons.reorder_keys(["criterion", "left_name", "right_name", "username"])

for criterion in assessments.get_set("criterion") | comparisons.get_set("criterion"):
entity_names = assessments[criterion].get_set("entity_name")
entity_names |= comparisons[criterion].get_set("left_name")
entity_names |= comparisons[criterion].get_set("right_name")

for entity_name in entity_names:
evaluators = assessments[criterion, entity_name].get_set("username")
evaluators |= comparisons[criterion, entity_name].get_set("username")
evaluators |= comparisons[criterion, any, entity_name].get_set("username")
trust_scores = { username: users.loc[username, "trust_score"] for username in evaluators }
public = { username: made_public[username, entity_name] for username in evaluators }
sub_voting_rights, sub_statistics = self.sub_main(trust_scores, public)

for username, voting_right in sub_voting_rights.items():
voting_rights[username, entity_name, criterion] = voting_right

cumulative_trust, min_voting_right, overtrust = sub_statistics
entities.loc[entity_name, f"{criterion}_cumulative_trust"] = cumulative_trust
entities.loc[entity_name, f"{criterion}_min_voting_right"] = min_voting_right
Expand All @@ -84,7 +88,6 @@ def sub_main(self,
privacy_weights=( np.array(list(public.values())) * (1 - self.privacy_penalty) + self.privacy_penalty )
)
return { username: voting_rights[i] for i, username in enumerate(trust_scores) }, statistics


def computing_voting_rights_and_statistics(self,
trust_scores: np.ndarray,
Expand Down
8 changes: 3 additions & 5 deletions solidago/src/solidago/_pipeline/_voting_rights/is_trust.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from solidago._pipeline.base import StateFunction


class IsTrust(StateFunction):
class Trust2VotingRights(StateFunction):
def __init__(self, privacy_penalty: float=0.5):
""" Computes voting_rights simply as the user trust scores,
potentially multiplied by the privacy penalty if the vote is private.
Expand All @@ -28,9 +28,7 @@ def __call__(self,
for user in users:
for entity in entities:
for criterion in criteria:
voting_right = users.loc[user, "trust_score"]
if not made_public[user, entity]:
voting_right *= self.privacy_penalty
voting_rights.add_row((user, entity), { criterion: voting_right })
penalty = made_public.penalty(self.privacy_penalty, user, entity)
voting_rights[user, entity, criterion] = penalty * user["trust_score"]

return voting_rights
3 changes: 3 additions & 0 deletions solidago/src/solidago/_state/_made_public/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ def __init__(self,
def __setitem__(self, keys: Union[str, tuple, list], value: bool) -> None:
if value:
super().__setitem__(keys, value)

def penalty(self, privacy_penalty: float, *keys) -> float:
return 1 if self[keys] else privacy_penalty
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def reorder_keys(self, key_names: list[str]) -> "NestedDict":
new2self_index = { i: self.key_names.index(key_names[i]) for i in range(len(key_names)) }
result = type(self)(key_names=key_names)
for self_keys, value in self.iter(value_process=False, key_process=False):
result[ [self_keys[new2self_index[i]] for i in range(len(key_names))] ] = value
result.add_row([self_keys[new2self_index[i]] for i in range(len(key_names))], value)
return result

@classmethod
Expand Down
6 changes: 3 additions & 3 deletions solidago/src/solidago/primitives/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
_rtol = 4 * np.finfo(float).eps


# @njit
@njit
def _bisect_interval(a, b, fa, fb) -> Tuple[float, int]:
"""Conditional checks for intervals in methods involving bisection"""
if fa * fb > 0:
Expand All @@ -41,7 +41,7 @@ def _bisect_interval(a, b, fa, fb) -> Tuple[float, int]:
return root, status


# @njit
@njit
def njit_brentq(
f,
args=(),
Expand Down Expand Up @@ -289,7 +289,7 @@ def coordinate_function(
coordinate: int,
variable: np.ndarray[np.float64],
) -> Callable[[float, Tuple], float]:
# @njit
@njit
def f(value: np.float64, *partial_derivative_args) -> np.float64:
return partial_derivative(coordinate, np.array([
variable[i] if i != coordinate else value
Expand Down
Loading

0 comments on commit b1d1537

Please sign in to comment.