Skip to content

Commit

Permalink
Merge pull request #101 from milesgranger/add-pre-commit
Browse files Browse the repository at this point in the history
Add pre-commit: black, flake8, isort, codespell
  • Loading branch information
milesgranger authored Sep 21, 2023
2 parents 56d40f6 + 297c9d7 commit 5194d6f
Show file tree
Hide file tree
Showing 17 changed files with 609 additions and 400 deletions.
12 changes: 12 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name: Linting

on: [push, pull_request]

jobs:
checks:
name: "pre-commit hooks"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: pre-commit/action@v3.0.0
28 changes: 28 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
repos:
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black
language_version: python3

- repo: https://github.com/pycqa/flake8
rev: 5.0.4
hooks:
- id: flake8
language_version: python3

- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
language_version: python3
args: ["--profile", "black"]

- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
hooks:
- id: codespell
additional_dependencies:
- tomli
types_or: [rst, markdown]
files: docs
3 changes: 2 additions & 1 deletion dask_glm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pkg_resources import get_distribution, DistributionNotFound
from pkg_resources import DistributionNotFound, get_distribution

try:
__version__ = get_distribution(__name__).version
except DistributionNotFound:
Expand Down
175 changes: 118 additions & 57 deletions dask_glm/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,38 @@

from __future__ import absolute_import, division, print_function

import dask
from dask import delayed, persist, compute
import functools
import numpy as np

import dask
import dask.array as da
from scipy.optimize import fmin_l_bfgs_b
import numpy as np
from dask import compute, delayed, persist
from dask.array.utils import normalize_to_array
from scipy.optimize import fmin_l_bfgs_b

from dask_glm.utils import dot, normalize, scatter_array, get_distributed_client, maybe_to_cupy
from dask_glm.families import Logistic
from dask_glm.regularizers import Regularizer


def compute_stepsize_dask(beta, step, Xbeta, Xstep, y, curr_val,
family=Logistic, stepSize=1.0,
armijoMult=0.1, backtrackMult=0.1):
from dask_glm.utils import (
dot,
get_distributed_client,
maybe_to_cupy,
normalize,
scatter_array,
)


def compute_stepsize_dask(
beta,
step,
Xbeta,
Xstep,
y,
curr_val,
family=Logistic,
stepSize=1.0,
armijoMult=0.1,
backtrackMult=0.1,
):
"""Compute the optimal stepsize
Parameters
Expand All @@ -43,10 +59,12 @@ def compute_stepsize_dask(beta, step, Xbeta, Xstep, y, curr_val,
"""

loglike = family.loglike
beta, step, Xbeta, Xstep, y, curr_val = persist(beta, step, Xbeta, Xstep, y, curr_val)
beta, step, Xbeta, Xstep, y, curr_val = persist(
beta, step, Xbeta, Xstep, y, curr_val
)
obeta, oXbeta = beta, Xbeta
(step,) = compute(step)
steplen = (step ** 2).sum()
steplen = (step**2).sum()
lf = curr_val
func = 0
for ii in range(100):
Expand Down Expand Up @@ -110,19 +128,28 @@ def gradient_descent(X, y, max_iter=100, tol=1e-14, family=Logistic, **kwargs):

# backtracking line search
lf = func
stepSize, _, _, func = compute_stepsize_dask(beta, grad,
Xbeta, Xgradient,
y, func, family=family,
backtrackMult=backtrackMult,
armijoMult=armijoMult,
stepSize=stepSize)
stepSize, _, _, func = compute_stepsize_dask(
beta,
grad,
Xbeta,
Xgradient,
y,
func,
family=family,
backtrackMult=backtrackMult,
armijoMult=armijoMult,
stepSize=stepSize,
)

beta, stepSize, Xbeta, lf, func, grad, Xgradient = persist(
beta, stepSize, Xbeta, lf, func, grad, Xgradient)
beta, stepSize, Xbeta, lf, func, grad, Xgradient
)

stepSize, lf, func, grad = compute(stepSize, lf, func, grad)

beta = beta - stepSize * grad # tiny bit of repeat work here to avoid communication
beta = (
beta - stepSize * grad
) # tiny bit of repeat work here to avoid communication
Xbeta = Xbeta - stepSize * Xgradient

if stepSize == 0:
Expand Down Expand Up @@ -179,14 +206,13 @@ def newton(X, y, max_iter=50, tol=1e-8, family=Logistic, **kwargs):
# should this be dask or numpy?
# currently uses Python 3 specific syntax
step, _, _, _ = np.linalg.lstsq(hess, grad)
beta = (beta_old - step)
beta = beta_old - step

iter_count += 1

# should change this criterion
coef_change = np.absolute(beta_old - beta)
converged = (
(not np.any(coef_change > tol)) or (iter_count > max_iter))
converged = (not np.any(coef_change > tol)) or (iter_count > max_iter)

if not converged:
Xbeta = dot(X, beta) # numpy -> dask converstion of beta
Expand All @@ -195,8 +221,19 @@ def newton(X, y, max_iter=50, tol=1e-8, family=Logistic, **kwargs):


@normalize
def admm(X, y, regularizer='l1', lamduh=0.1, rho=1, over_relax=1,
max_iter=250, abstol=1e-4, reltol=1e-2, family=Logistic, **kwargs):
def admm(
X,
y,
regularizer="l1",
lamduh=0.1,
rho=1,
over_relax=1,
max_iter=250,
abstol=1e-4,
reltol=1e-2,
family=Logistic,
**kwargs
):
"""
Alternating Direction Method of Multipliers
Expand Down Expand Up @@ -230,6 +267,7 @@ def wrapped(beta, X, y, z, u, rho):
u = maybe_to_cupy(u, X)
res = func(beta, X, y) + rho * (beta - z + u)
return normalize_to_array(res)

return wrapped

def create_local_f(func):
Expand All @@ -238,15 +276,15 @@ def wrapped(beta, X, y, z, u, rho):
beta = maybe_to_cupy(beta, X)
z = maybe_to_cupy(z, X)
u = maybe_to_cupy(u, X)
res = func(beta, X, y) + (rho / 2) * np.dot(beta - z + u,
beta - z + u)
res = func(beta, X, y) + (rho / 2) * np.dot(beta - z + u, beta - z + u)
return normalize_to_array(res)

return wrapped

f = create_local_f(pointwise_loss)
fprime = create_local_gradient(pointwise_gradient)

nchunks = getattr(X, 'npartitions', 1)
nchunks = getattr(X, "npartitions", 1)
# nchunks = X.npartitions
(n, p) = X.shape
# XD = X.to_delayed().flatten().tolist()
Expand All @@ -265,11 +303,11 @@ def wrapped(beta, X, y, z, u, rho):
betas = np.array([np.ones(p) for i in range(nchunks)])

for k in range(max_iter):

# x-update step
new_betas = [delayed(local_update)(xx, yy, bb, z, uu, rho, f=f,
fprime=fprime) for
xx, yy, bb, uu in zip(XD, yD, betas, u)]
new_betas = [
delayed(local_update)(xx, yy, bb, z, uu, rho, f=f, fprime=fprime)
for xx, yy, bb, uu in zip(XD, yD, betas, u)
]
new_betas = np.array(da.compute(*new_betas))

beta_hat = over_relax * new_betas + (1 - over_relax) * z
Expand All @@ -287,9 +325,9 @@ def wrapped(beta, X, y, z, u, rho):
dual_res = np.linalg.norm(rho * (z - zold))

eps_pri = np.sqrt(p * nchunks) * abstol + reltol * np.maximum(
np.linalg.norm(new_betas), np.sqrt(nchunks) * np.linalg.norm(z))
eps_dual = np.sqrt(p * nchunks) * abstol + \
reltol * np.linalg.norm(rho * u)
np.linalg.norm(new_betas), np.sqrt(nchunks) * np.linalg.norm(z)
)
eps_dual = np.sqrt(p * nchunks) * abstol + reltol * np.linalg.norm(rho * u)

if primal_res < eps_pri and dual_res < eps_dual:
break
Expand All @@ -298,21 +336,29 @@ def wrapped(beta, X, y, z, u, rho):


def local_update(X, y, beta, z, u, rho, f, fprime, solver=fmin_l_bfgs_b):

beta = beta.ravel()
u = u.ravel()
z = z.ravel()
solver_args = (X, y, z, u, rho)
beta, f, d = solver(f, beta, fprime=fprime, args=solver_args,
maxiter=200,
maxfun=250)
beta, f, d = solver(
f, beta, fprime=fprime, args=solver_args, maxiter=200, maxfun=250
)

return beta


@normalize
def lbfgs(X, y, regularizer=None, lamduh=1.0, max_iter=100, tol=1e-4,
family=Logistic, verbose=False, **kwargs):
def lbfgs(
X,
y,
regularizer=None,
lamduh=1.0,
max_iter=100,
tol=1e-4,
family=Logistic,
verbose=False,
**kwargs
):
"""L-BFGS solver using scipy.optimize implementation
Parameters
Expand Down Expand Up @@ -348,25 +394,41 @@ def lbfgs(X, y, regularizer=None, lamduh=1.0, max_iter=100, tol=1e-4,

def compute_loss_grad(beta, X, y):
beta = maybe_to_cupy(beta, X)
scatter_beta = scatter_array(
beta, dask_distributed_client) if dask_distributed_client else beta
scatter_beta = (
scatter_array(beta, dask_distributed_client)
if dask_distributed_client
else beta
)
loss_fn = pointwise_loss(scatter_beta, X, y)
gradient_fn = pointwise_gradient(scatter_beta, X, y)
loss, gradient = compute(loss_fn, gradient_fn)
return normalize_to_array(loss), normalize_to_array(gradient.copy())

with dask.config.set(fuse_ave_width=0): # optimizations slows this down
beta, loss, info = fmin_l_bfgs_b(
compute_loss_grad, beta0, fprime=None,
compute_loss_grad,
beta0,
fprime=None,
args=(X, y),
iprint=(verbose > 0) - 1, pgtol=tol, maxiter=max_iter)
iprint=(verbose > 0) - 1,
pgtol=tol,
maxiter=max_iter,
)
beta = maybe_to_cupy(beta, X)
return beta


@normalize
def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
max_iter=100, tol=1e-8, **kwargs):
def proximal_grad(
X,
y,
regularizer="l1",
lamduh=0.1,
family=Logistic,
max_iter=100,
tol=1e-8,
**kwargs
):
"""
Proximal Gradient Method
Expand All @@ -391,7 +453,6 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,
n, p = X.shape
firstBacktrackMult = 0.1
nextBacktrackMult = 0.5
armijoMult = 0.1
stepGrowth = 1.25
stepSize = 1.0
recalcRate = 10
Expand All @@ -407,16 +468,16 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,

gradient = family.gradient(Xbeta, X, y)

Xbeta, func, gradient = persist(
Xbeta, func, gradient)
Xbeta, func, gradient = persist(Xbeta, func, gradient)

obeta = beta

# Compute the step size
lf = func
for ii in range(100):
beta = regularizer.proximal_operator(- stepSize * gradient + obeta, stepSize * lamduh)
step = - beta + obeta
beta = regularizer.proximal_operator(
-stepSize * gradient + obeta, stepSize * lamduh
)
Xbeta = X.dot(beta)

Xbeta, beta = persist(Xbeta, beta)
Expand Down Expand Up @@ -444,9 +505,9 @@ def proximal_grad(X, y, regularizer='l1', lamduh=0.1, family=Logistic,


_solvers = {
'admm': admm,
'gradient_descent': gradient_descent,
'newton': newton,
'lbfgs': lbfgs,
'proximal_grad': proximal_grad
"admm": admm,
"gradient_descent": gradient_descent,
"newton": newton,
"lbfgs": lbfgs,
"proximal_grad": proximal_grad,
}
Loading

0 comments on commit 5194d6f

Please sign in to comment.