Pin asdl and vendor previous version in laplace.third_party

aleximmer · Aug 19, 2024 · fb2712c · fb2712c
1 parent ccf4a30
commit fb2712c
Show file tree

Hide file tree

Showing 25 changed files with 4,176 additions and 10 deletions.
diff --git a/laplace/curvature/asdfghjkl.py b/laplace/curvature/asdfghjkl.py
@@ -6,7 +6,11 @@
 
 import numpy as np
 import torch
-from asdfghjkl import (
+from torch import nn
+from torch.utils.data import DataLoader
+
+from laplace.curvature import CurvatureInterface, EFInterface, GGNInterface
+from laplace.third_party.asdfghjkl import (
     COV,
     FISHER_EXACT,
     FISHER_MC,
@@ -15,12 +19,8 @@
     SHAPE_KRON,
     fisher_for_cross_entropy,
 )
-from asdfghjkl.gradient import batch_gradient
-from asdfghjkl.hessian import hessian_eigenvalues, hessian_for_loss
-from torch import nn
-from torch.utils.data import DataLoader
-
-from laplace.curvature import CurvatureInterface, EFInterface, GGNInterface
+from laplace.third_party.asdfghjkl.gradient import batch_gradient
+from laplace.third_party.asdfghjkl.hessian import hessian_eigenvalues, hessian_for_loss
 from laplace.utils import Kron, _is_batchnorm
 from laplace.utils.enums import Likelihood
 

diff --git a/laplace/third_party/__init__.py b/laplace/third_party/__init__.py
diff --git a/laplace/third_party/asdfghjkl.LICENSE b/laplace/third_party/asdfghjkl.LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Kazuki Osawa
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/laplace/third_party/asdfghjkl/__init__.py b/laplace/third_party/asdfghjkl/__init__.py
@@ -0,0 +1,12 @@
+from .utils import *
+from .operations import *
+from .core import extend
+from .symmatrix import *
+from .matrices import *
+from .gradient import *
+from .mvp import *
+from .fisher import *
+from .hessian import *
+from .kernel import *
+from .precondition import *
+from .fr import *
diff --git a/laplace/third_party/asdfghjkl/core.py b/laplace/third_party/asdfghjkl/core.py
@@ -0,0 +1,118 @@
+from typing import List
+from contextlib import contextmanager
+
+import torch.nn as nn
+from .utils import im2col_2d, record_original_requires_grad
+from .operations import OP_ACCUMULATE_GRADS, get_op_class
+
+
+@contextmanager
+def extend(model, op_names):
+    if not isinstance(op_names, (list, tuple)):
+        op_names = [op_names]
+    accumulate_grads = False
+    if OP_ACCUMULATE_GRADS in op_names:
+        accumulate_grads = True
+        op_names = [name for name in op_names if name != OP_ACCUMULATE_GRADS]
+    handles = []
+
+    def forward_hook(module, in_data, out_data):
+        in_data = in_data[0].clone().detach()
+        in_data = _preprocess_in_data(module, in_data, out_data)
+        _call_operations_in_forward(module, in_data)
+
+        def backward_hook(out_grads):
+            out_grads = out_grads.clone().detach()
+            out_grads = _preprocess_out_grads(module, out_grads)
+            _call_operations_in_backward(module, in_data, out_grads)
+
+        if out_data.requires_grad:
+            handles.append(out_data.register_hook(backward_hook))
+
+    for module in model.modules():
+        requires_grad = False
+        for attr in ['weight', 'bias']:
+            param = getattr(module, attr, None)
+            if param is not None:
+                requires_grad = requires_grad or param.requires_grad
+                record_original_requires_grad(param)
+        if not requires_grad:
+            continue
+        # register hooks and operations in modules
+        handles.append(module.register_forward_hook(forward_hook))
+        _register_operations(model, module, op_names)
+
+    yield
+
+    # remove hooks and operations from modules
+    for handle in handles:
+        handle.remove()
+    for module in model.modules():
+        _remove_operations(module)
+
+    # accumulate param.grad to param.acc_grad
+    if accumulate_grads:
+        attr = OP_ACCUMULATE_GRADS
+        for param in model.parameters():
+            if param.grad is None:
+                continue
+            if not hasattr(param, attr):
+                setattr(param, attr, param.grad)
+            else:
+                acc_grad = getattr(param, attr)
+                acc_grad.add_(param.grad)
+
+
+def _preprocess_in_data(module, in_data, out_data):
+    if isinstance(module, nn.Conv2d):
+        in_data = im2col_2d(in_data, module)
+
+    if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
+        bnorm = module
+        f = bnorm.num_features
+        if isinstance(module, nn.BatchNorm1d):
+            shape = (1, f)
+        elif isinstance(module, nn.BatchNorm2d):
+            shape = (1, f, 1, 1)
+        else:
+            shape = (1, f, 1, 1, 1)
+        # restore normalized input
+        in_data_norm = (out_data -
+                        bnorm.bias.view(shape)).div(bnorm.weight.view(shape))
+        in_data = in_data_norm
+
+    if isinstance(module, nn.LayerNorm):
+        layernorm = module
+        # restore normalized input
+        in_data_norm = (out_data - layernorm.bias).div(layernorm.weight)
+        in_data = in_data_norm
+
+    return in_data
+
+
+def _preprocess_out_grads(module, out_grads):
+    if isinstance(module, nn.Conv2d):
+        out_grads = out_grads.flatten(start_dim=2)
+
+    return out_grads
+
+
+def _register_operations(model: nn.Module, module: nn.Module, op_names: List):
+    op_class = get_op_class(module)
+    if op_class is not None:
+        setattr(module, 'operation', op_class(module, model, op_names))
+
+
+def _call_operations_in_forward(module, in_data):
+    if hasattr(module, 'operation'):
+        module.operation.forward_post_process(in_data)
+
+
+def _call_operations_in_backward(module, in_data, out_grads):
+    if hasattr(module, 'operation'):
+        module.operation.backward_pre_process(in_data, out_grads)
+
+
+def _remove_operations(module):
+    if hasattr(module, 'operation'):
+        delattr(module, 'operation')