Merge pull request #67 from satzbeleg/dev

Add MimMax Scaling
ulf1 · Jun 7, 2021 · 54f3b74 · 54f3b74
2 parents 1db41eb + 704c086
commit 54f3b74
Show file tree

Hide file tree

Showing 8 changed files with 47 additions and 16 deletions.
diff --git a/.gitignore b/.gitignore
@@ -110,3 +110,4 @@ profile/data*
 .theia
 .DS_Store
 tmp
+README.rst
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,2 +1,3 @@
 include README.md
+include README.rst
 recursive-include test *.py
diff --git a/README.md b/README.md
@@ -1,6 +1,10 @@
 [![PyPI version](https://badge.fury.io/py/bwsample.svg)](https://badge.fury.io/py/bwsample)
 [![DOI](https://zenodo.org/badge/335090754.svg)](https://zenodo.org/badge/latestdoi/335090754)
 [![bwsample](https://snyk.io/advisor/python/bwsample/badge.svg)](https://snyk.io/advisor/python/bwsample)
+[![Total alerts](https://img.shields.io/lgtm/alerts/g/satzbeleg/bwsample.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/satzbeleg/bwsample/alerts/)
+[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/satzbeleg/bwsample.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/satzbeleg/bwsample/context:python)
+[![deepcode](https://www.deepcode.ai/api/gh/badge?key=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJwbGF0Zm9ybTEiOiJnaCIsIm93bmVyMSI6InNhdHpiZWxlZyIsInJlcG8xIjoiYndzYW1wbGUiLCJpbmNsdWRlTGludCI6ZmFsc2UsImF1dGhvcklkIjoyOTQ1MiwiaWF0IjoxNjE5NTM1ODEyfQ.Vk73_cXxw-nf-ZmipCeFNm-zrgqy8Bkdt0oVd73ryhQ)](https://www.deepcode.ai/app/gh/satzbeleg/bwsample/_/dashboard?utm_content=gh%2Fsatzbeleg%2Fbwsample)
+
 
 # bwsample: Sampling and Evaluation of Best-Worst Scaling sets
 Sampling algorithm for best-worst scaling (BWS) sets, extracting pairs from evaluated BWS sets, count in dictionary of keys sparse matrix, and compute scores based on it.

diff --git a/bwsample/__init__.py b/bwsample/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '0.6.5'
+__version__ = '0.6.6'
 
 from .sampling import sample
 from .counting import count

diff --git a/bwsample/ranking.py b/bwsample/ranking.py
@@ -1,6 +1,7 @@
 from typing import List, Dict, Tuple, Optional
 from .utils import to_scipy
 from .utils import adjustscore
+from .utils import minmax
 import numpy as np
 import scipy.sparse
 import scipy.sparse.linalg
@@ -591,13 +592,6 @@ def mle_btl_sparse(cnt: scipy.sparse.csr_matrix,
     return np.array(x1.flatten())[0], False
 
 
-def minmax(arr: np.array) -> np.array:
-    data = np.array(arr)
-    xmin = data.min()
-    xmax = data.max()
-    return (data - xmin) / (xmax - xmin)
-
-
 def bradley_terry_probability(cnt: scipy.sparse.csr_matrix,
                               indices: List[str],
                               prefit: Optional[bool] = True,

diff --git a/bwsample/utils.py b/bwsample/utils.py
@@ -84,6 +84,13 @@ def add_dok(a: Dict[Tuple[ItemID, ItemID], int],
     return out
 
 
+def minmax(arr: np.array) -> np.array:
+    data = np.array(arr)
+    xmin = data.min()
+    xmax = data.max()
+    return (data - xmin) / (xmax - xmin)
+
+
 def adjustscore(scores: np.array,
                 method: Optional[str] = 'quantile',
                 n_quantiles: Optional[int] = 10000,
@@ -100,6 +107,7 @@ def adjustscore(scores: np.array,
             - 'quantile' -- sklearn's quantile transform
             - 'sig3iqr' -- sigmoid 3x sklearn's robust scaler with (25%,75%)
             - 'platt' -- calibrate scores with the binary labels (Platt, 1999)
+            - 'minmax' -- Min-Max scaling
 
     n_quantiles: Optional[int] = 10000
         Parameter for `method='quantile'`
@@ -119,7 +127,8 @@ def adjustscore(scores: np.array,
         comparisons to regularized likelihood methods.
     """
     scores = np.array(scores)
-    labels = np.array(labels)
+    if labels:
+        labels = np.array(labels)
 
     if method == 'quantile':
         return sklearn.preprocessing.quantile_transform(
@@ -137,5 +146,8 @@ def adjustscore(scores: np.array,
         cls.fit(X=scores.reshape(-1, 1), y=labels)
         return cls.predict_proba(scores.reshape(-1, 1))[:, 1].reshape(-1)
 
+    elif method == 'minmax':
+        return minmax(scores)
+
     else:
         raise Exception(f"The method='{method}' is not implemented.")
diff --git a/setup.py b/setup.py
@@ -1,4 +1,11 @@
 from setuptools import setup
+import os
+
+
+def read(fname):
+    with open(os.path.join(os.path.dirname(__file__), fname)) as fp:
+        s = fp.read()
+    return s
 
 
 def get_version(path):
@@ -15,7 +22,7 @@ def get_version(path):
     name='bwsample',
     version=get_version("bwsample/__init__.py"),
     description='Sampling algorithm for best-worst scaling sets.',
-    long_description='README.rst',
+    long_description=read('README.rst'),
     url='http://github.com/ulf1/bwsample',
     author='Ulf Hamster',
     author_email='554c46@gmail.com',

diff --git a/test/test_adjustscore.py b/test/test_adjustscore.py
@@ -6,36 +6,48 @@
 def test1():
     scores = [random.random() for _ in range(1000)]
     adjusted = bws.adjustscore(scores, method='quantile')
-    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+    assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()
 
 
 def test2():
     scores = [.1, .3, .5, .7]
     adjusted = bws.adjustscore(scores, method='quantile')
-    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+    assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()
 
 
 def test3():
     scores = [random.random() for _ in range(1000)]
     adjusted = bws.adjustscore(scores, method='sig3iqr')
-    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+    assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()
 
 
 def test4():
     scores = [.1, .3, .5, .7]
     adjusted = bws.adjustscore(scores, method='sig3iqr')
-    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+    assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()
 
 
 def test5():
     scores = [random.random() for _ in range(1000)]
     labels = [s > 0.5 for s in scores]
     adjusted = bws.adjustscore(scores, method='platt', labels=labels)
-    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+    assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()
 
 
 def test6():
     scores = [.1, .3, .5, .7]
     labels = [s > 0.5 for s in scores]
     adjusted = bws.adjustscore(scores, method='platt', labels=labels)
-    assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
+    assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()
+
+
+def test7():
+    scores = [random.random() for _ in range(1000)]
+    adjusted = bws.adjustscore(scores, method='minmax')
+    assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()
+
+
+def test8():
+    scores = [.1, .3, .5, .7]
+    adjusted = bws.adjustscore(scores, method='minmax')
+    assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()
-Original file line number
+Diff line change
@@ Expand Up / @@ -110,3 +110,4 @@ profile/data* @@
     .theia
     .DS_Store
     tmp
+    README.rst