Skip to content

Commit

Permalink
Add Poisson regression support (WIP). (#46)
Browse files Browse the repository at this point in the history
* Add util and Poisson family.

* Add dataset generator for PR.

* Add PR estimator.

* Add PR test.

* Fix sign error.

* Remove dispatch for sigmoid.

* Add efficient slicing for PR hessian.

* Add PR to parametrized tests.

* Fix flake.

* Fix a typo.
  • Loading branch information
mpancia authored and cicdw committed May 3, 2017
1 parent d8c6251 commit b251bfc
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ wheels/
*.egg
dask_glm/.ropeproject/*
.ipynb_checkpoints/
.idea/*
13 changes: 13 additions & 0 deletions dask_glm/datasets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import dask.array as da
from dask_glm.utils import exp


def make_classification(n_samples=1000, n_features=100, n_informative=2, scale=1.0,
Expand All @@ -22,3 +23,15 @@ def make_regression(n_samples=1000, n_features=100, n_informative=2, scale=1.0,
z0 = X[:, informative_idx].dot(beta[informative_idx])
y = da.random.random(z0.shape, chunks=(chunksize,))
return X, y


def make_poisson(n_samples=1000, n_features=100, n_informative=2, scale=1.0,
chunksize=100):
X = da.random.normal(0, 1, size=(n_samples, n_features),
chunks=(chunksize, n_features))
informative_idx = np.random.choice(n_features, n_informative)
beta = (np.random.random(n_features) - 1) * scale
z0 = X[:, informative_idx].dot(beta[informative_idx])
rate = exp(z0)
y = da.random.poisson(rate, size=1, chunks=(chunksize,))
return X, y
54 changes: 52 additions & 2 deletions dask_glm/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from . import algorithms
from . import families
from .utils import (
sigmoid, dot, add_intercept, mean_squared_error, accuracy_score
sigmoid, dot, add_intercept, mean_squared_error, accuracy_score, exp, poisson_deviance
)


Expand Down Expand Up @@ -120,8 +120,9 @@ def score(self, X, y):

class LinearRegression(_GLM):
"""
Ordinary Lest Square regression
Ordinary Least Squares regression
"""

@property
def family(self):
return families.Normal
Expand All @@ -132,3 +133,52 @@ def predict(self, X):

def score(self, X, y):
return mean_squared_error(y, self.predict(X))


class PoissonRegression(_GLM):
"""
Parameters
----------
fit_intercept : bool, default True
Specifies if a constant (a.k.a. bias or intercept) should be
added to the decision function.
solver : {'admm', 'gradient_descent', 'newton', 'bfgs', 'proximal_grad'}
Solver to use. See :ref:`algorithms` for details
regularizer : {'l1', 'l2'}
Regularizer to use. See :ref:`regularizers` for details.
Only used with ``admm`` and ``proximal_grad`` solvers.
max_iter : int, default 100
Maximum number of iterations taken for the solvers to converge
tol : float, default 1e-4
Tolerance for stopping criteria. Ignored for ``admm`` solver
lambduh : float, default 1.0
Only used with ``admm`` and ``proximal_grad`` solvers
rho, over_relax, abstol, reltol : float
Only used with the ``admm`` solver. See :ref:`algorithms.admm`
for details
Attributes
----------
coef_ : array, shape (n_classes, n_features)
intercept_ : float
Examples
--------
>>> from dask_glm.datasets import make_poisson
>>> X, y = make_poisson()
>>> pr = PoissonRegression()
>>> pr.fit(X, y)
>>> pr.predict(X)
>>> pr.get_deviance(X, y)
"""

@property
def family(self):
return families.Poisson

def predict(self, X):
X_ = self._maybe_add_intercept(X)
return exp(dot(X_, self._coef))

def get_deviance(self, X, y):
return poisson_deviance(y, self.predict(X))
39 changes: 37 additions & 2 deletions dask_glm/families.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


class Logistic(object):

@staticmethod
def loglike(Xbeta, y):
eXbeta = exp(Xbeta)
Expand Down Expand Up @@ -38,7 +37,7 @@ def hessian(Xbeta, X):
class Normal(object):
@staticmethod
def loglike(Xbeta, y):
return ((y - Xbeta)**2).sum()
return ((y - Xbeta) ** 2).sum()

@staticmethod
def pointwise_loss(beta, X, y):
Expand All @@ -59,3 +58,39 @@ def gradient(Xbeta, X, y):
@staticmethod
def hessian(Xbeta, X):
return 2 * dot(X.T, X)


class Poisson(object):
"""
This implements Poisson regression for count data.
See https://en.wikipedia.org/wiki/Poisson_regression.
"""

@staticmethod
def loglike(Xbeta, y):
eXbeta = exp(Xbeta)
yXbeta = y * Xbeta
return (eXbeta - yXbeta).sum()

@staticmethod
def pointwise_loss(beta, X, y):
beta, y = beta.ravel(), y.ravel()
Xbeta = X.dot(beta)
return Poisson.loglike(Xbeta, y)

@staticmethod
def pointwise_gradient(beta, X, y):
beta, y = beta.ravel(), y.ravel()
Xbeta = X.dot(beta)
return Poisson.gradient(Xbeta, X, y)

@staticmethod
def gradient(Xbeta, X, y):
eXbeta = exp(Xbeta)
return dot(X.T, eXbeta - y)

@staticmethod
def hessian(Xbeta, X):
eXbeta = exp(Xbeta)
x_diag_eXbeta = eXbeta[:, None] * X
return dot(X.T, x_diag_eXbeta)
6 changes: 3 additions & 3 deletions dask_glm/tests/test_algos_families.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from dask_glm.algorithms import (newton, bfgs, proximal_grad,
gradient_descent, admm)
from dask_glm.families import Logistic, Normal
from dask_glm.families import Logistic, Normal, Poisson
from dask_glm.regularizers import L1, L2
from dask_glm.utils import sigmoid, make_y

Expand Down Expand Up @@ -63,7 +63,7 @@ def test_methods(N, p, seed, opt):
])
@pytest.mark.parametrize('N', [1000])
@pytest.mark.parametrize('nchunks', [1, 10])
@pytest.mark.parametrize('family', [Logistic, Normal])
@pytest.mark.parametrize('family', [Logistic, Normal, Poisson])
def test_basic_unreg_descent(func, kwargs, N, nchunks, family):
beta = np.random.normal(size=2)
M = len(beta)
Expand All @@ -87,7 +87,7 @@ def test_basic_unreg_descent(func, kwargs, N, nchunks, family):
])
@pytest.mark.parametrize('N', [1000])
@pytest.mark.parametrize('nchunks', [1, 10])
@pytest.mark.parametrize('family', [Logistic, Normal])
@pytest.mark.parametrize('family', [Logistic, Normal, Poisson])
@pytest.mark.parametrize('lam', [0.01, 1.2, 4.05])
@pytest.mark.parametrize('reg', [L1, L2])
def test_basic_reg_descent(func, kwargs, N, nchunks, family, lam, reg):
Expand Down
21 changes: 19 additions & 2 deletions dask_glm/tests/test_estimators.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from dask_glm.estimators import LogisticRegression, LinearRegression
from dask_glm.datasets import make_classification, make_regression
from dask_glm.estimators import LogisticRegression, LinearRegression, PoissonRegression
from dask_glm.datasets import make_classification, make_regression, make_poisson
from dask_glm.algorithms import _solvers
from dask_glm.regularizers import _regularizers

Expand Down Expand Up @@ -39,6 +39,10 @@ def test_lr_init(solver):
LogisticRegression(solver=solver)


def test_pr_init(solver):
PoissonRegression(solver=solver)


@pytest.mark.parametrize('fit_intercept', [True, False])
def test_fit(fit_intercept):
X, y = make_classification(n_samples=100, n_features=5, chunksize=10)
Expand Down Expand Up @@ -71,6 +75,19 @@ def test_big(fit_intercept):
assert lr.intercept_ is not None


@pytest.mark.parametrize('fit_intercept', [True, False])
def test_poisson_fit(fit_intercept):
import dask
dask.set_options(get=dask.get)
X, y = make_poisson()
pr = PoissonRegression(fit_intercept=fit_intercept)
pr.fit(X, y)
pr.predict(X)
pr.get_deviance(X, y)
if fit_intercept:
assert pr.intercept_ is not None


def test_in_pipeline():
from sklearn.pipeline import make_pipeline
X, y = make_classification(n_samples=100, n_features=5, chunksize=10)
Expand Down
4 changes: 4 additions & 0 deletions dask_glm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ def accuracy_score(y_true, y_pred):
return (y_true == y_pred).mean()


def poisson_deviance(y_true, y_pred):
return 2 * (y_true * log1p(y_true / y_pred) - (y_true - y_pred)).sum()


try:
import sparse
except ImportError:
Expand Down

0 comments on commit b251bfc

Please sign in to comment.