Skip to content

Commit

Permalink
improve python itf
Browse files Browse the repository at this point in the history
  • Loading branch information
mglisse committed Aug 3, 2024
1 parent 90a966b commit 6ac1c9f
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 30 deletions.
23 changes: 23 additions & 0 deletions src/python/gudhi/_ripser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,15 @@ py::list lower(py::object low_mat, int max_dimension, double max_edge_length, un
std::optional<py::gil_scoped_release> release_local(std::in_place);
Dist dist(std::move(distances));

#if 0
// Compute the radius and possibly use it as threshold
for (int i = 0; i < dist.size(); ++i) {
double r_i = -std::numeric_limits<double>::infinity();
for (int j = 0; j < dist.size(); ++j)
r_i = std::max(r_i, dist(i, j));
max_edge_length = std::min(max_edge_length, r_i);
}
#endif

release_local.reset();

Expand Down Expand Up @@ -199,6 +201,26 @@ py::list lower_to_coo(py::object low_mat, double max_edge_length) {
py::array(py::cast(std::move(fs))));
}

double lower_cone_radius(py::object low_mat) {
// It would be more efficient to read the matrix only once
auto n = py::len(low_mat);
std::vector<double> maxs(n, -std::numeric_limits<double>::infinity());
int rowi = 0;
for (auto&& row : low_mat) {
if (rowi == 0) { ++rowi; continue; }
int coli = 0;
for (auto&& elem : row) {
double d = elem.cast<double>();
maxs[rowi] = std::max(maxs[rowi], d);
maxs[coli] = std::max(maxs[coli], d);
if (++coli == rowi) break;
}
if (coli < rowi) throw std::invalid_argument("Not enough elements for a lower triangular matrix");
++rowi;
};
return *std::max_element(maxs.begin(), maxs.end());
}

PYBIND11_MODULE(_ripser, m) {
py::bind_vector<Vi >(m, "VectorInt" , py::buffer_protocol());
py::bind_vector<Vd >(m, "VectorDouble" , py::buffer_protocol());
Expand All @@ -216,6 +238,7 @@ PYBIND11_MODULE(_ripser, m) {
m.def("_sparse", sparse<int, double>, py::arg("row"), py::arg("col"), py::arg("data"), py::arg("num_vertices"), py::arg("max_dimension") = std::numeric_limits<int>::max(), py::arg("max_edge_length") = std::numeric_limits<double>::infinity(), py::arg("homology_coeff_field") = 2);
// Not directly an interface to Ripser...
m.def("_lower_to_coo", lower_to_coo, py::arg("matrix"), py::arg("max_edge_length"));
m.def("_lower_cone_radius", lower_cone_radius, py::arg("matrix"));
}

// We could also create a RipsComplex class, that allows looking at a simplex, querying its (co)boundary, etc. But I am not convinced it is worth the effort.
Expand Down
67 changes: 37 additions & 30 deletions src/python/gudhi/sklearn/rips_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
# Modification(s):
# - YYYY/MM Author: Description of the modification

from .._ripser import _lower, _full, _sparse, _lower_to_coo
from .._ripser import _lower, _full, _sparse, _lower_to_coo, _lower_cone_radius
from ..flag_filtration.edge_collapse import reduce_graph
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from scipy.sparse import coo_matrix
from scipy.spatial import cKDTree
from scipy.spatial.distance import pdist, squareform

# joblib is required by scikit-learn
from joblib import Parallel, delayed
Expand Down Expand Up @@ -79,52 +82,56 @@ def fit(self, X, Y=None):
def __transform(self, inp):
# TODO: give the user the option to force the use of one particular strategy (including SimplexTree)
max_dimension = max(self.dim_list_)
num_collapses = self.num_collapses
input_type = self.input_type
threshold = self.threshold
if num_collapses == 'auto':
num_collapses = 1 if max_dimension > 1 else 0
# or num_collapses=max_dimension-1 maybe?
elif max_dimension == 0:
num_collapses = 0

if input_type == 'point cloud':
if self.threshold < float('inf'):
if threshold < float('inf'):
# Hope that the user gave a useful threshold
from scipy.spatial import cKDTree
from scipy.sparse import coo_matrix
tree = cKDTree(inp)
# Or tree.query_pairs(r=self.threshold, output_type='ndarray') and recompute the distances?
inp = tree.sparse_distance_matrix(tree, max_distance=self.threshold, output_type="coo_matrix")
input_type = 'coo_matrix' # call it 'sparse distance matrix'? 'distance coo_matrix'?
# Or tree.query_pairs(r=threshold, output_type='ndarray') and recompute the distances?
inp = tree.sparse_distance_matrix(tree, max_distance=threshold, output_type="coo_matrix")
input_type = 'coo_matrix' # FIXME: call it 'sparse distance matrix'? 'distance coo_matrix'?
else:
from scipy.spatial.distance import pdist, squareform
inp = squareform(pdist(inp))
input_type = 'full distance matrix'

# Edge collapse always goes through the sparse format
num_collapses = self.num_collapses
if num_collapses == 'auto':
num_collapses = 1 if max_dimension > 1 else 0
# or num_collapses=max_dimension-1 maybe?
elif max_dimension == 0:
num_collapses = 0
if num_collapses > 0:
if input_type in ('full distance matrix', 'lower distance matrix'):
# Dense -> sparse
if input_type in ('full distance matrix', 'lower distance matrix'):
# After this filtration value, all complexes are cones, nothing happens
if input_type == 'full distance matrix':
inp = np.asarray(inp)
cone_radius = inp.max(-1).min()
else:
cone_radius = _lower_cone_radius(inp)
sparsify = num_collapses > 0 or threshold < cone_radius # heuristic
threshold = min(threshold, cone_radius)
if sparsify:
# For 'full' we could use i, j = np.triu_indices_from(inp, k=1), etc
i, j, f = _lower_to_coo(inp, self.threshold)
# TODO move this, or use directly _collapse_edges
from scipy.sparse import coo_matrix
i, j, f = _lower_to_coo(inp, threshold)
inp = coo_matrix((f, (i, j)), shape=(len(inp),) * 2)
input_type = 'coo_matrix'

if num_collapses > 0:
assert input_type == 'coo_matrix'
inp = reduce_graph(inp, num_collapses)

if input_type == 'full distance matrix':
#TODO: compute cone_radius before edge collapse
#TODO: possibly transpose for performance
inp = np.asarray(inp)
cone_radius = inp.max(-1).min()
r = min(self.threshold, cone_radius)
dgm = _full(inp, max_dimension=max_dimension, max_edge_length=self.threshold, homology_coeff_field=self.homology_coeff_field)
##TODO: possibly transpose for performance
#if inp.strides[0] > inp.strides[1]:
# inp = inp.T
dgm = _full(inp, max_dimension=max_dimension, max_edge_length=threshold, homology_coeff_field=self.homology_coeff_field)
elif input_type == 'lower distance matrix':
# minmax threshold is computed inside _lower
dgm = _lower(inp, max_dimension=max_dimension, max_edge_length=self.threshold, homology_coeff_field=self.homology_coeff_field)
dgm = _lower(inp, max_dimension=max_dimension, max_edge_length=threshold, homology_coeff_field=self.homology_coeff_field)
elif input_type == 'coo_matrix':
#TODO: switch to coo_array (the interface is different)?
dgm = _sparse(inp.row, inp.col, inp.data, inp.shape[0], max_dimension=max_dimension, max_edge_length=self.threshold, homology_coeff_field=self.homology_coeff_field)
#TODO: switch to coo_array (danger: row/col seem deprecated)?
dgm = _sparse(inp.row, inp.col, inp.data, inp.shape[0], max_dimension=max_dimension, max_edge_length=threshold, homology_coeff_field=self.homology_coeff_field)
else:
raise ValueError("Only 'point cloud', 'lower distance matrix', 'full distance matrix' and 'coo_matrix' are valid input_type") # move to __init__?

Expand Down

0 comments on commit 6ac1c9f

Please sign in to comment.