Skip to content

Commit

Permalink
Start updating RipsPersistence
Browse files Browse the repository at this point in the history
  • Loading branch information
mglisse committed Jul 30, 2024
1 parent 381869f commit e5596ea
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 48 deletions.
1 change: 1 addition & 0 deletions src/Ripser/include/gudhi/ripser.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ struct Sparse_distance_matrix {

value_t operator()(const vertex_t i, const vertex_t j) const {
#ifdef USE_HASHMAP_FOR_SPARSE_DIST_MAT
// We could insert in both orders to save the minmax for each query.
return m.at(std::minmax(i,j));
//// We never hit the infinity case?
// auto it = m.find(std::minmax(i,j));
Expand Down
48 changes: 29 additions & 19 deletions src/python/gudhi/_ripser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,24 @@ typedef std::vector<std::array<double, 2>> Vd;
PYBIND11_MAKE_OPAQUE(Vf);
PYBIND11_MAKE_OPAQUE(Vd);

template<class T>struct Numpy_euclidean {
typedef Tag_other Category;
typedef int vertex_t;
typedef T value_t;

decltype(std::declval<py::array_t<T>&>().template unchecked<2>()) data;

int size() const { return data.shape(0); }

T operator()(int i, int j) const {
T dist = 0;
for (int k=0; k<data.shape(1); ++k) {
T diff = data(i, k) - data(j, k);
dist += diff * diff;
}
return std::sqrt(dist);
}
};
//template<class T>struct Numpy_euclidean {
// typedef Tag_other Category;
// typedef int vertex_t;
// typedef T value_t;
//
// decltype(std::declval<py::array_t<T>&>().template unchecked<2>()) data;
//
// int size() const { return data.shape(0); }
//
// T operator()(int i, int j) const {
// T dist = 0;
// for (int k=0; k<data.shape(1); ++k) {
// T diff = data(i, k) - data(j, k);
// dist += diff * diff;
// }
// return std::sqrt(dist);
// }
//};

template<class T>struct Full {
typedef Tag_dense Category;
Expand Down Expand Up @@ -108,6 +108,7 @@ py::list full(py::array_t<T> matrix, int max_dimension, T max_edge_length, unsig
}

py::list lower(py::object low_mat, int max_dimension, double max_edge_length, unsigned homology_coeff_field) {
using Dist = Compressed_distance_matrix<DParams<int, double>, LOWER_TRIANGULAR>;
std::vector<double> distances;
int rowi = 0;
for (auto&& row : low_mat) {
Expand All @@ -122,7 +123,16 @@ py::list lower(py::object low_mat, int max_dimension, double max_edge_length, un
};

std::optional<py::gil_scoped_release> release_local(std::in_place);
Compressed_distance_matrix<DParams<int, double>, LOWER_TRIANGULAR> dist(std::move(distances));
Dist dist(std::move(distances));

// Compute the radius and possibly use it as threshold
for (int i = 0; i < dist.size(); ++i) {
double r_i = -std::numeric_limits<double>::infinity();
for (int j = 0; j < dist.size(); ++j)
r_i = std::max(r_i, dist(i, j));
max_edge_length = std::min(max_edge_length, r_i);
}

release_local.reset();

return doit(std::move(dist), max_dimension, max_edge_length, homology_coeff_field);
Expand Down
69 changes: 40 additions & 29 deletions src/python/gudhi/sklearn/rips_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
# Modification(s):
# - YYYY/MM Author: Description of the modification

from .. import RipsComplex
from .._ripser import _lower, _full, _sparse
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin

# joblib is required by scikit-learn
Expand Down Expand Up @@ -73,36 +74,46 @@ def fit(self, X, Y=None):
"""
return self

def __transform(self, inputs):
max_dimension = max(self.dim_list_) + 1

if self.input_type == 'point cloud':
rips = RipsComplex(points=inputs, max_edge_length = self.threshold)
elif self.input_type == 'lower distance matrix':
rips = RipsComplex(distance_matrix=inputs, max_edge_length = self.threshold)
def __transform(self, inp):
# TODO: give the user the option to force the use of one particular strategy (including SimplexTree)
max_dimension = max(self.dim_list_)
input_type = self.input_type

if input_type == 'point cloud':
if self.threshold < float('inf'):
# Hope that the user gave a useful threshold
from scipy.spatial import cKDTree
from scipy.sparse import coo_matrix
tree = cKDTree(inp)
# Or tree.query_pairs(r=self.threshold, output_type='ndarray') and recompute the distances?
inp = tree.sparse_distance_matrix(tree, max_distance=self.threshold, output_type="coo_matrix")
input_type = 'coo_matrix' # call it 'sparse distance matrix'? 'distance coo_array'?

else:
from scipy.spatial.distance import pdist, squareform
inp = squareform(pdist(inp))
input_type = 'full distance matrix'

##TODO edge collapse!!!
#if max_dimension > 1:
# i, j = np.triu_indices_from(inp, k=1)

if input_type == 'full distance matrix':
#TODO: compute cone_radius before edge collapse
#TODO: possibly transpose for performance
inp = np.asarray(inp)
cone_radius = inp.max(-1).min()
r = min(self.threshold, cone_radius)
dgm = _full(inp, max_dimension=max_dimension, max_edge_length=self.threshold, homology_coeff_field=self.homology_coeff_field)
elif input_type == 'lower distance matrix':
# minmax threshold is computed inside _lower
dgm = _lower(inp, max_dimension=max_dimension, max_edge_length=self.threshold, homology_coeff_field=self.homology_coeff_field)
elif input_type == 'coo_matrix':
dgm = _sparse(inp, max_dimension=max_dimension, max_edge_length=self.threshold, homology_coeff_field=self.homology_coeff_field)
else:
raise ValueError("Only 'point cloud' and 'lower distance matrix' are valid input_type")
raise ValueError("Only 'point cloud', 'lower distance matrix', 'full distance matrix' and 'coo_matrix' are valid input_type") # move to __init__?

if max_dimension > 1:
stree = rips.create_simplex_tree(max_dimension=1)
stree.collapse_edges(nb_iterations = self.num_collapses)
stree.expansion(max_dimension)
else:
stree = rips.create_simplex_tree(max_dimension=max_dimension)

persistence_dim_max = False
# Specific case where, despite expansion(max_dimension), stree has a lower dimension
if max_dimension > stree.dimension():
persistence_dim_max = True

stree.compute_persistence(
homology_coeff_field=self.homology_coeff_field,
persistence_dim_max=persistence_dim_max
)

return [
stree.persistence_intervals_in_dimension(dim) for dim in self.dim_list_
]
return [dgm[dim] for dim in self.dim_list_]

def transform(self, X, Y=None):
"""Compute all the Vietoris-Rips complexes and their associated persistence diagrams.
Expand Down

0 comments on commit e5596ea

Please sign in to comment.