Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#187887894]: Median measure #399

Merged
merged 12 commits into from
Jul 4, 2024
48 changes: 48 additions & 0 deletions src/cr/cube/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,15 @@ def means(self) -> Optional[np.ndarray]:
return None
return self._measures.means.raw_cube_array[self._valid_idxs].astype(np.float64)

@lazyproperty
def medians(self) -> Optional[np.ndarray]:
"""Optional float64 ndarray of the cube_medians if the measure exists."""
if self._measures.medians is None:
return None
return self._measures.medians.raw_cube_array[self._valid_idxs].astype(
np.float64
)

@lazyproperty
def missing(self) -> int:
"""Get missing count of a cube."""
Expand Down Expand Up @@ -781,6 +790,14 @@ def means(self) -> 'Optional["_MeanMeasure"]':
mean = _MeanMeasure(self._cube_dict, self._all_dimensions, self._cube_idx_arg)
return None if mean.raw_cube_array is None else mean

@lazyproperty
def medians(self) -> 'Optional["_MediansMeasure"]':
"""Optional _MedianMeasure object providing access to means values."""
medians = _MediansMeasure(
self._cube_dict, self._all_dimensions, self._cube_idx_arg
)
return None if medians.raw_cube_array is None else medians

@lazyproperty
def missing_count(self) -> int:
"""numeric representing count of missing rows in cube response."""
Expand All @@ -790,6 +807,10 @@ def missing_count(self) -> int:
# fixtures that don't have valid_counts.
if self.means is not None:
return self.means.missing_count
# The check on the median measure is needed for retro-compatibility with the old
# fixtures that don't have valid_counts.
if self.medians is not None:
return self.medians.missing_count
return self._cube_dict["result"].get("missing", 0)

@lazyproperty
Expand Down Expand Up @@ -1047,6 +1068,33 @@ def _flat_values(self) -> Optional[np.ndarray]:
).flatten()


class _MediansMeasure(_BaseMeasure):
"""Statistical medians values from a cube-response."""

@lazyproperty
def missing_count(self) -> int:
"""Numeric value representing count of missing rows in response."""
return self._cube_dict["result"]["measures"]["median"].get("n_missing", 0)

@lazyproperty
def _flat_values(self) -> Optional[np.ndarray]:
"""Optional 1D np.ndarray of np.float64 median values as found in cube response.

Medians data may include missing items represented by a dict like
{'?': -1} in the cube response. These are replaced by np.nan in the
returned value.
"""
measure_payload = self._cube_dict["result"].get("measures", {}).get("median")
if measure_payload is None:
return None
return np.array(
tuple(
np.nan if isinstance(x, dict) else x for x in measure_payload["data"]
),
dtype=np.float64,
).flatten()


class _OverlapMeasure(_BaseMeasure):
"""Overlap values from a cube-response."""

Expand Down
31 changes: 31 additions & 0 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,23 @@ def means(self):
"`.means` is undefined for a cube-result without a mean measure"
)

@lazyproperty
def medians(self):
"""2D optional np.float64 ndarray of median values for each table cell.

Cell value is `np.nan` for each cell corresponding to an inserted subtotal
(medians of addend cells cannot simply be added to get the mean of the
subtotal).

Raises `ValueError` if the cube-result does not include a median cube-measure.
"""
try:
return self._assemble_matrix(self._measures.medians.blocks)
except ValueError:
raise ValueError(
"`.medians` is undefined for a cube-result without a median measure"
)

@lazyproperty
def min_base_size_mask(self):
return MinBaseSizeMask(self, self._mask_size)
Expand Down Expand Up @@ -2005,6 +2022,20 @@ def means(self):
"`.means` is undefined for a cube-result without a mean measure"
)

@lazyproperty
def medians(self):
"""1D np.float64 ndarray of medians for each row of strand.

Raises ValueError when accessed on a cube-result that does not contain a median
cube-measure.
"""
try:
return self._assemble_vector(self._measures.medians.blocks)
except ValueError:
raise ValueError(
"`.medians` is undefined for a cube-result without a median measure"
)

@lazyproperty
def min_base_size_mask(self):
"""1D bool ndarray of True for each row that fails to meet min-base spec.
Expand Down
1 change: 1 addition & 0 deletions src/cr/cube/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ class CUBE_MEASURE(enum.Enum):
COVARIANCE = "covariance"
COUNT = "count"
MEAN = "mean"
MEDIAN = "median"
OVERLAP = "overlap"
STDDEV = "stddev"
SUM = "sum"
Expand Down
91 changes: 91 additions & 0 deletions src/cr/cube/matrix/cubemeasure.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ def cube_means(self):
"""_BaseCubeMeans subclass object for this cube-result."""
return _BaseCubeMeans.factory(self._cube, self._dimensions, self._slice_idx)

@lazyproperty
def cube_medians(self):
"""_BaseCubeMedian subclass object for this cube-result."""
return _BaseCubeMedians.factory(self._cube, self._dimensions, self._slice_idx)

@lazyproperty
def cube_overlaps(self):
"""_BaseCubeOverlaps subclass object for this cube-result."""
Expand Down Expand Up @@ -877,6 +882,92 @@ def means(self):
return self._means[:, 0, :, 0]


# === MEDIANs ===


class _BaseCubeMedians(_BaseCubeMeasure):
"""Base class for medians cube-measure variants."""

def __init__(self, dimensions, medians):
super(_BaseCubeMedians, self).__init__(dimensions)
self._medians = medians

@classmethod
def factory(cls, cube, dimensions, slice_idx):
"""Return _BaseCubeMedian subclass instance appropriate to `cube`.

Raises `ValueError` if the cube-result does not include a cube-median measure.
"""
if cube.medians is None:
raise ValueError("cube-result does not contain cube-median measure")
dimension_types = cube.dimension_types[-2:]
CubeMedianCls = (
_MrXMrCubeMedians
if dimension_types == (DT.MR, DT.MR)
else (
_MrXCatCubeMedians
if dimension_types[0] == DT.MR
else (
_CatXMrCubeMedians
if dimension_types[1] == DT.MR
else _CatXCatCubeMedians
)
)
)
return CubeMedianCls(
dimensions, cube.medians[cls._slice_idx_expr(cube, slice_idx)]
)

@lazyproperty
def medians(self):
"""2D np.float64 ndarray of cube medians."""
raise NotImplementedError( # pragma: no cover
f"`{type(self).__name__}` must implement `.medians`"
)


class _CatXCatCubeMedians(_BaseCubeMedians):
"""Medians cube-measure for a slice with no MR dimensions."""

@lazyproperty
def medians(self):
"""2D np.float64 ndarray of medians for each valid matrix cell."""
return self._medians


class _CatXMrCubeMedians(_BaseCubeMedians):
"""Medians cube-measure for a NOT_MR_X_MR slice.

Note that the rows-dimensions need not actually be CAT.
"""

@lazyproperty
def medians(self):
"""2D np.float64 ndarray of medians for each valid matrix cell."""
return self._medians[:, :, 0]


class _MrXCatCubeMedians(_BaseCubeMedians):
"""Medians cube-measure for an MR_X_NOT_MR slice.

Note that the columns-dimension need not actually be CAT.
"""

@lazyproperty
def medians(self):
"""2D np.float64 ndarray of medians for each valid matrix cell."""
return self._medians[:, 0, :]


class _MrXMrCubeMedians(_BaseCubeMedians):
"""Medians cube-measure for an MR_X_MR slice."""

@lazyproperty
def medians(self):
"""2D np.float64 ndarray of medians for each valid matrix cell."""
return self._medians[:, 0, :, 0]


# === OVERLAPS ===


Expand Down
16 changes: 16 additions & 0 deletions src/cr/cube/matrix/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,11 @@ def means(self):
"""_Means measure object for this cube-result"""
return _Means(self._dimensions, self, self._cube_measures)

@lazyproperty
def medians(self):
"""_Medians measure object for this cube-result"""
return _Medians(self._dimensions, self, self._cube_measures)

def pairwise_p_vals_for_subvar(self, subvar_idx):
"""_PairwiseSigPValsForSubvar measure object for this cube-result"""
return _PairwiseSigPValsForSubvar(
Expand Down Expand Up @@ -1177,6 +1182,17 @@ def blocks(self):
)


class _Medians(_BaseSecondOrderMeasure):
"""Provides the medians measure for a matrix."""

@lazyproperty
def blocks(self):
"""2D array of the four 2D "blocks" making up this measure."""
return NanSubtotals.blocks(
self._cube_measures.cube_medians.medians, self._dimensions
)


class _MeansSmoothed(_Means, _SmoothedMeasure):
"""Provides the smoothed mean measure for a matrix."""

Expand Down
56 changes: 56 additions & 0 deletions src/cr/cube/stripe/cubemeasure.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ def cube_means(self):
"""_BaseCubeMeans subclass object for this stripe."""
return _BaseCubeMeans.factory(self._cube, self._rows_dimension)

@lazyproperty
def cube_medians(self):
"""_BaseCubeMedians subclass object for this stripe."""
return _BaseCubeMedians.factory(self._cube, self._rows_dimension)

@lazyproperty
def cube_stddev(self):
"""_BaseCubeStdDev subclass object for this stripe."""
Expand Down Expand Up @@ -253,6 +258,57 @@ def means(self):
return self._means[:, 0]


# === MEDIANs ===


class _BaseCubeMedians(_BaseCubeMeasure):
"""Base class for medians cube-measure variants."""

def __init__(self, rows_dimension, medians):
super(_BaseCubeMedians, self).__init__(rows_dimension)
self._medians = medians

@classmethod
def factory(cls, cube, rows_dimension):
"""Return _BaseCubeMedian subclass instance appropriate to `cube`."""
if cube.medians is None:
raise ValueError("cube-result does not contain cube-median measure")
MedianCls = (
_MrCubeMedians
if rows_dimension.dimension_type == DT.MR
else _CatCubeMedians
)
return MedianCls(rows_dimension, cube.medians)

@lazyproperty
def medians(self):
"""1D np.float64 ndarray of medians for each stripe row."""
raise NotImplementedError(
f"`{type(self).__name__}` must implement `.medians`"
) # pragma: no cover


class _CatCubeMedians(_BaseCubeMedians):
"""Medians cube-measure for a non-MR stripe."""

@lazyproperty
def medians(self):
"""1D np.float64 ndarray of medians for each stripe row."""
return self._medians


class _MrCubeMedians(_BaseCubeMedians):
"""Medians cube-measure for an MR stripe.

Its `.medians` is a 2D ndarray with axes (rows, sel/not).
"""

@lazyproperty
def medians(self):
"""1D np.float64 ndarray of medians for each stripe row."""
return self._medians[:, 0]


# === STD DEV ===


Expand Down
26 changes: 26 additions & 0 deletions src/cr/cube/stripe/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ def means(self):
"""_Means measure object for this stripe."""
return _Means(self._rows_dimension, self, self._cube_measures)

@lazyproperty
def medians(self):
"""_Medians measure object for this stripe."""
return _Medians(self._rows_dimension, self, self._cube_measures)

@lazyproperty
def population_proportions(self):
"""_PopulationPrortion measure object for this stripe."""
Expand Down Expand Up @@ -231,6 +236,27 @@ def subtotal_values(self):
return NanSubtotals.subtotal_values(self.base_values, self._rows_dimension)


class _Medians(_BaseSecondOrderMeasure):
"""Provides the medians measure for a stripe.

Relies on the presence of a medians cube-measure in the cube-result.
"""

@lazyproperty
def base_values(self):
"""1D np.float64 ndarray of medians for each row."""
return self._cube_measures.cube_medians.medians

@lazyproperty
def subtotal_values(self):
"""1D ndarray of np.nan for each row-subtotal.

Medians values cannot be subtotaled and each subtotal value is unconditionally
np.nan.
"""
return NanSubtotals.subtotal_values(self.base_values, self._rows_dimension)


class _MeansSmoothed(_Means, _SmoothedMeasure):
"""Provides the smoothed means measure for a stripe.

Expand Down
Loading
Loading