-
Notifications
You must be signed in to change notification settings - Fork 2
/
tensors.py
159 lines (127 loc) · 5.47 KB
/
tensors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import numpy as np
import numpy.linalg as la
import scipy
import matplotlib.pyplot as plt
import time
import json
import pickle
import h5py
from common import *
import cppimport.import_hook
from cpp_ext.als_module import Tensor, LowRankTensor, SparseTensor, ALS
from cpp_ext.als_module import DenseTensor_float, DenseTensor_double
class PyLowRank:
def __init__(self, dims, R, allow_rhs_mttkrp=False, init_method="gaussian", seed=None):
if seed is None:
rng = np.random.default_rng()
else:
rng = np.random.default_rng(seed)
if init_method=="gaussian":
self.dims = []
for i in dims:
self.dims.append(i)
self.N = len(dims)
self.R = R
self.U = [rng.normal(size=(i, R)) for i in self.dims]
if allow_rhs_mttkrp:
self.ten = LowRankTensor(R, 10000, self.U)
else:
self.ten = LowRankTensor(R, self.U)
else:
assert(False)
def set_sigma(self, new_sigma):
self.ten.set_sigma(new_sigma)
def compute_diff_resid(self, rhs):
'''
Computes the residual of the difference between two low-rank tensors.
'''
sigma_lhs = np.zeros(self.R, dtype=np.double)
self.ten.get_sigma(sigma_lhs, -1)
normsq = rhs.ten.compute_residual_normsq(sigma_lhs, self.U)
return np.sqrt(normsq)
def compute_exact_fit(self, rhs):
diff_norm = self.compute_diff_resid(rhs)
rhs_norm = np.sqrt(rhs.ten.get_normsq())
return 1.0 - diff_norm / rhs_norm
def compute_approx_diff_norm(self, rhs):
if not isinstance(rhs, PySparseTensor):
raise NotImplementedError("This function is only implemented for sparse tensors!")
return np.sqrt(rhs.ten.compute_residual_normsq_estimated(self.ten))
def compute_approx_fit(self, rhs):
if not isinstance(rhs, PySparseTensor):
raise NotImplementedError("This function is only implemented for sparse tensors!")
diff_norm = self.compute_approx_diff_norm(rhs)
rhs_norm = np.sqrt(rhs.ten.get_normsq())
return 1.0 - diff_norm / rhs_norm
#res = rhs.ten.compute_residual_normsq_estimated(self.ten)
#return res
def compute_integral(self, dx):
'''
Computes the integral of the CP decomposition.
'''
sigma = np.zeros(self.R, dtype=np.double)
self.ten.get_sigma(sigma, -1)
buffers = [sigma]
for i in range(self.N):
buffers.append(scipy.integrate.simpson(self.U[i], axis=0, dx=dx[i]))
return np.sum(chain_had_prod(buffers))
class PySparseTensor:
def __init__(self, filename, lookup, preprocessing=None):
print("Loading sparse tensor...")
f = h5py.File(filename, 'r')
self.max_idxs = f['MAX_MODE_SET'][:]
self.min_idxs = f['MIN_MODE_SET'][:]
self.N = len(self.max_idxs)
self.dims = self.max_idxs[:]
# The tensor must have at least one mode
self.nnz = len(f['MODE_0'])
self.tensor_idxs = np.zeros((self.nnz, self.N), dtype=np.uint32)
for i in range(self.N):
self.tensor_idxs[:, i] = f[f'MODE_{i}'][:] - self.min_idxs[i]
self.values = f['VALUES'][:]
print("Loaded tensor values from disk...")
if preprocessing is not None:
if preprocessing == "log_count":
self.values = np.log(self.values + 1.0)
else:
print(f"Unknown preprocessing option '{preprocessing}' specified!")
self.ten = SparseTensor(self.tensor_idxs, self.values, lookup)
print("Finished loading sparse tensor...")
def initialize_randomized_accuracy_estimation(self, nz_samples, zero_samples=None, fp_tol=0.01):
if zero_samples is None:
zero_samples = nz_samples
dims_np = np.array(self.dims, dtype=np.uint32)
self.ten.initialize_randomized_accuracy_estimation(0.01, nz_samples, zero_samples, dims_np)
class PyDenseTensor:
def __init__(self, data):
if np.issubdtype(data.dtype, np.float32):
self.ten = DenseTensor_float(data, 10000)
elif np.issubdtype(data.dtype, np.float64):
self.ten = DenseTensor_double(data, 10000)
#from numba import cfunc, types, carray, void, uint32, float64, uint64, jit
#@jit(void(float64[:, :],uint64[:, :],uint32,uint32,uint32,uint32), nopython=True)
#def test_function(out_buffer, samples, j, row_pos, M, Ij):
# delta_X = 0.01
#
# for i in range(row_pos, row_pos + M):
# samples[i, j] = 0
# temp_sum = np.sum(samples[i, :])
#
# for k in range(Ij):
#out_buffer[i-row_pos, k] = np.sin((temp_sum + k) * delta_X)
#out_buffer[i-row_pos, k] = (temp_sum + k) * delta_X
# out_buffer[i-row_pos, k] = samples[i, 0]
#def test_wrapper(out_buffer_, samples_, j, row_pos, M, Ij, tensor_dim):
# out_ptr = ctypes.c_void_p(out_buffer_)
# samples_ptr = ctypes.c_void_p(samples_)
# out_buffer = carray(out_ptr, (M, Ij), dtype=np.double)
# samples = carray(samples_ptr, (M, tensor_dim), dtype=np.uint64)
# test_function(out_buffer, samples, j, row_pos, M, Ij)
class FunctionTensor:
def __init__(self, dims, J, dx, func=None, func_batch=None):
self.N = len(dims)
self.J = J
#self.bounds = bounds
#self.subdivisions = subdivisions
self.dims = np.array(dims, dtype=np.uint64)
self.ten = PyFunctionTensor(self.dims, J, 10000, dx)