pass all tests

tatsu-lab · Dec 30, 2023 · e29f3cb · e29f3cb
1 parent 76f7057
commit e29f3cb
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 10 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -3,4 +3,5 @@ datasets
 openai>=1.0.0
 pandas
 tiktoken>=0.3.2
-fire
+fire
+scipy
diff --git a/src/alpaca_eval/analyze.py b/src/alpaca_eval/analyze.py
@@ -129,6 +129,7 @@ def agreement_of_annotations(
         >>> analyzer.agreement_of_annotations(df_crossannotations, annotations_2=None,
         ...                                   n_majority_vote_1=1,  n_majority_vote_2=1)
         score             0.750000
+        error             0.250000
         sem_samples       0.250000
         counts            2.000000
         sem_annotators    0.075378
@@ -138,6 +139,7 @@ def agreement_of_annotations(
         >>> analyzer.agreement_of_annotations(df_crossannotations, annotations_2=None,
         ...                                   n_majority_vote_1=1,  n_majority_vote_2=3)
         score             0.875
+        error             0.125
         sem_samples       0.125
         counts            2.000
         sem_annotators    0.125
@@ -544,11 +546,11 @@ class ZeroOneScoringRule(BaseScoringRule):
     """Scoring rule for binary predictions."""
 
     def _score(self, prediction, target):
-        """Score a single prediction."""
+        # accuracy
         return (target == prediction).mean()
 
     def _bayes_estimator(self, predictions):
-        """Compute the bayes estimator of the predictions."""
+        # mode
         return _random_mode(predictions)
 
     def preprocess_predictions(self, predictions: npt.ArrayLike) -> pd.Series:
@@ -562,12 +564,10 @@ class AbsoluteScoringRule(BaseScoringRule):
     """Absolute loss scoring rule (i.e. MAE)."""
 
     def _score(self, prediction, target):
-        """Score a single prediction."""
+        # 1 - MAE
         return 1 - (target - prediction).abs().mean()
 
     def _bayes_estimator(self, predictions):
-        """Compute the median in a backward compatible way."""
-
         # if all the values are 0.0, 1.0, 2.0, nan, or 1.5 then for backward compatibility we return the random mode
         # note that this doesn't change the expected value of the estimator, but increases the variance. The new version
         # is thus better
@@ -581,11 +581,10 @@ class SquaredScoringRule(BaseScoringRule):
     """Squared loss scoring rule (i.e. MSE)."""
 
     def _score(self, prediction, target):
-        """Score a single prediction."""
+        # 1 - MSE
         return 1 - ((target - prediction) ** 2).mean()
 
     def _bayes_estimator(self, predictions):
-        """Compute the bayes estimator of the predictions."""
         return predictions.mean()
 
 

diff --git a/src/alpaca_eval/processors.py b/src/alpaca_eval/processors.py
@@ -59,7 +59,7 @@ class RandomSwitchTwoColumnsProcessor(BaseProcessor):
     >>> df = pd.DataFrame([dict(instruction='2+2', output_1='10', output_2='4', preference=2),
     ...                    dict(instruction='2+3', output_1='5', output_2='7', preference=1)])
     >>> processor = RandomSwitchTwoColumnsProcessor(two_columns_to_switch=['output_1', 'output_2'],
-    ...                                             fn_replace_if_switch = lambda x: x.replace({"preference":{1: 2, 2: 1}})
+    ...                                             fn_replace_if_switch = lambda x: x.replace({"preference":{1: 2, 2: 1}}))
     >>> processor.preprocess(df)
         instruction output_1 output_2  preference is_switch_output_1_output_2
     0         2+2         4       10           1                         True
@@ -116,7 +116,7 @@ def postprocess(self, df_annotated: pd.DataFrame) -> pd.DataFrame:
         """When postprocessing, we undo the switch and remove the switch column."""
         df_annotated = df_annotated.copy()
         df_annotated = self._switch_or_unswitch(df_annotated, is_switch=False)
-        # df_annotated = df_annotated.drop(columns=[self._switch_column])
+        df_annotated = df_annotated.drop(columns=[self._switch_column])
         return df_annotated
 
     @property
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,4 +3,5 @@ datasets @@
     openai>=1.0.0
     pandas
     tiktoken>=0.3.2
-    fire
+    fire
+    scipy