Skip to content

Commit

Permalink
Merge pull request #67 from satzbeleg/dev
Browse files Browse the repository at this point in the history
Add MimMax Scaling
  • Loading branch information
ulf1 authored Jun 7, 2021
2 parents 1db41eb + 704c086 commit 54f3b74
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 16 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,4 @@ profile/data*
.theia
.DS_Store
tmp
README.rst
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
include README.md
include README.rst
recursive-include test *.py
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
[![PyPI version](https://badge.fury.io/py/bwsample.svg)](https://badge.fury.io/py/bwsample)
[![DOI](https://zenodo.org/badge/335090754.svg)](https://zenodo.org/badge/latestdoi/335090754)
[![bwsample](https://snyk.io/advisor/python/bwsample/badge.svg)](https://snyk.io/advisor/python/bwsample)
[![Total alerts](https://img.shields.io/lgtm/alerts/g/satzbeleg/bwsample.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/satzbeleg/bwsample/alerts/)
[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/satzbeleg/bwsample.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/satzbeleg/bwsample/context:python)
[![deepcode](https://www.deepcode.ai/api/gh/badge?key=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJwbGF0Zm9ybTEiOiJnaCIsIm93bmVyMSI6InNhdHpiZWxlZyIsInJlcG8xIjoiYndzYW1wbGUiLCJpbmNsdWRlTGludCI6ZmFsc2UsImF1dGhvcklkIjoyOTQ1MiwiaWF0IjoxNjE5NTM1ODEyfQ.Vk73_cXxw-nf-ZmipCeFNm-zrgqy8Bkdt0oVd73ryhQ)](https://www.deepcode.ai/app/gh/satzbeleg/bwsample/_/dashboard?utm_content=gh%2Fsatzbeleg%2Fbwsample)


# bwsample: Sampling and Evaluation of Best-Worst Scaling sets
Sampling algorithm for best-worst scaling (BWS) sets, extracting pairs from evaluated BWS sets, count in dictionary of keys sparse matrix, and compute scores based on it.
Expand Down
2 changes: 1 addition & 1 deletion bwsample/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '0.6.5'
__version__ = '0.6.6'

from .sampling import sample
from .counting import count
Expand Down
8 changes: 1 addition & 7 deletions bwsample/ranking.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import List, Dict, Tuple, Optional
from .utils import to_scipy
from .utils import adjustscore
from .utils import minmax
import numpy as np
import scipy.sparse
import scipy.sparse.linalg
Expand Down Expand Up @@ -591,13 +592,6 @@ def mle_btl_sparse(cnt: scipy.sparse.csr_matrix,
return np.array(x1.flatten())[0], False


def minmax(arr: np.array) -> np.array:
data = np.array(arr)
xmin = data.min()
xmax = data.max()
return (data - xmin) / (xmax - xmin)


def bradley_terry_probability(cnt: scipy.sparse.csr_matrix,
indices: List[str],
prefit: Optional[bool] = True,
Expand Down
14 changes: 13 additions & 1 deletion bwsample/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ def add_dok(a: Dict[Tuple[ItemID, ItemID], int],
return out


def minmax(arr: np.array) -> np.array:
data = np.array(arr)
xmin = data.min()
xmax = data.max()
return (data - xmin) / (xmax - xmin)


def adjustscore(scores: np.array,
method: Optional[str] = 'quantile',
n_quantiles: Optional[int] = 10000,
Expand All @@ -100,6 +107,7 @@ def adjustscore(scores: np.array,
- 'quantile' -- sklearn's quantile transform
- 'sig3iqr' -- sigmoid 3x sklearn's robust scaler with (25%,75%)
- 'platt' -- calibrate scores with the binary labels (Platt, 1999)
- 'minmax' -- Min-Max scaling
n_quantiles: Optional[int] = 10000
Parameter for `method='quantile'`
Expand All @@ -119,7 +127,8 @@ def adjustscore(scores: np.array,
comparisons to regularized likelihood methods.
"""
scores = np.array(scores)
labels = np.array(labels)
if labels:
labels = np.array(labels)

if method == 'quantile':
return sklearn.preprocessing.quantile_transform(
Expand All @@ -137,5 +146,8 @@ def adjustscore(scores: np.array,
cls.fit(X=scores.reshape(-1, 1), y=labels)
return cls.predict_proba(scores.reshape(-1, 1))[:, 1].reshape(-1)

elif method == 'minmax':
return minmax(scores)

else:
raise Exception(f"The method='{method}' is not implemented.")
9 changes: 8 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
from setuptools import setup
import os


def read(fname):
with open(os.path.join(os.path.dirname(__file__), fname)) as fp:
s = fp.read()
return s


def get_version(path):
Expand All @@ -15,7 +22,7 @@ def get_version(path):
name='bwsample',
version=get_version("bwsample/__init__.py"),
description='Sampling algorithm for best-worst scaling sets.',
long_description='README.rst',
long_description=read('README.rst'),
url='http://github.com/ulf1/bwsample',
author='Ulf Hamster',
author_email='554c46@gmail.com',
Expand Down
24 changes: 18 additions & 6 deletions test/test_adjustscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,48 @@
def test1():
scores = [random.random() for _ in range(1000)]
adjusted = bws.adjustscore(scores, method='quantile')
assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()


def test2():
scores = [.1, .3, .5, .7]
adjusted = bws.adjustscore(scores, method='quantile')
assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()


def test3():
scores = [random.random() for _ in range(1000)]
adjusted = bws.adjustscore(scores, method='sig3iqr')
assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()


def test4():
scores = [.1, .3, .5, .7]
adjusted = bws.adjustscore(scores, method='sig3iqr')
assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()


def test5():
scores = [random.random() for _ in range(1000)]
labels = [s > 0.5 for s in scores]
adjusted = bws.adjustscore(scores, method='platt', labels=labels)
assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()


def test6():
scores = [.1, .3, .5, .7]
labels = [s > 0.5 for s in scores]
adjusted = bws.adjustscore(scores, method='platt', labels=labels)
assert np.argsort(adjusted).tolist() == np.argsort(adjusted).tolist()
assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()


def test7():
scores = [random.random() for _ in range(1000)]
adjusted = bws.adjustscore(scores, method='minmax')
assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()


def test8():
scores = [.1, .3, .5, .7]
adjusted = bws.adjustscore(scores, method='minmax')
assert np.argsort(scores).tolist() == np.argsort(adjusted).tolist()

0 comments on commit 54f3b74

Please sign in to comment.