From e9de2635c785a06387995b5f23c68b8cc74c46a3 Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 10:49:42 +0200 Subject: [PATCH 01/12] median measure in cube --- src/cr/cube/cube.py | 42 +++++++++++ src/cr/cube/enums.py | 1 + ...multi-numeric-measures-grouped-by-cat.json | 73 +++++++++++++++++++ tests/integration/test_cube.py | 9 +++ 4 files changed, 125 insertions(+) diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index e9f6c3f26..b03c4d484 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -427,6 +427,13 @@ def means(self) -> Optional[np.ndarray]: return None return self._measures.means.raw_cube_array[self._valid_idxs].astype(np.float64) + @lazyproperty + def median(self) -> Optional[np.ndarray]: + """Optional float64 ndarray of the cube_median if the measure exists.""" + if self._measures.median is None: + return None + return self._measures.median.raw_cube_array[self._valid_idxs].astype(np.float64) + @lazyproperty def missing(self) -> int: """Get missing count of a cube.""" @@ -781,6 +788,14 @@ def means(self) -> 'Optional["_MeanMeasure"]': mean = _MeanMeasure(self._cube_dict, self._all_dimensions, self._cube_idx_arg) return None if mean.raw_cube_array is None else mean + @lazyproperty + def median(self) -> 'Optional["_MedianMeasure"]': + """Optional _MedianMeasure object providing access to means values.""" + median = _MedianMeasure( + self._cube_dict, self._all_dimensions, self._cube_idx_arg + ) + return None if median.raw_cube_array is None else median + @lazyproperty def missing_count(self) -> int: """numeric representing count of missing rows in cube response.""" @@ -1047,6 +1062,33 @@ def _flat_values(self) -> Optional[np.ndarray]: ).flatten() +class _MedianMeasure(_BaseMeasure): + """Statistical median values from a cube-response.""" + + @lazyproperty + def missing_count(self) -> int: + """Numeric value representing count of missing rows in response.""" + return self._cube_dict["result"]["measures"]["mean"].get("n_missing", 0) + + @lazyproperty + def _flat_values(self) -> Optional[np.ndarray]: + """Optional 1D np.ndarray of np.float64 median values as found in cube response. + + Median data may include missing items represented by a dict like + {'?': -1} in the cube response. These are replaced by np.nan in the + returned value. + """ + measure_payload = self._cube_dict["result"].get("measures", {}).get("median") + if measure_payload is None: + return None + return np.array( + tuple( + np.nan if isinstance(x, dict) else x for x in measure_payload["data"] + ), + dtype=np.float64, + ).flatten() + + class _OverlapMeasure(_BaseMeasure): """Overlap values from a cube-response.""" diff --git a/src/cr/cube/enums.py b/src/cr/cube/enums.py index 287467631..5d97e908c 100644 --- a/src/cr/cube/enums.py +++ b/src/cr/cube/enums.py @@ -157,6 +157,7 @@ class CUBE_MEASURE(enum.Enum): COVARIANCE = "covariance" COUNT = "count" MEAN = "mean" + MEDIAN = "median" OVERLAP = "overlap" STDDEV = "stddev" SUM = "sum" diff --git a/tests/fixtures/numeric_arrays/num-arr-multi-numeric-measures-grouped-by-cat.json b/tests/fixtures/numeric_arrays/num-arr-multi-numeric-measures-grouped-by-cat.json index 6e0600c5f..30cc4b379 100644 --- a/tests/fixtures/numeric_arrays/num-arr-multi-numeric-measures-grouped-by-cat.json +++ b/tests/fixtures/numeric_arrays/num-arr-multi-numeric-measures-grouped-by-cat.json @@ -40,6 +40,17 @@ "variable": "9bf24ea26b364a0897d77471695f4c88" } ] + }, + "median": { + "function": "cube_median", + "args": [ + { + "variable": "9bf24ea26b364a0897d77471695f4c88" + }, + { + "value": 0.5 + } + ] } }, "dimensions": [ @@ -436,6 +447,68 @@ "missing_rules": {} } } + }, + "median": { + "data": [ + 71.0, + 16.6666666667, + 2.3333333333, + 42.5, + 25.5, + 25.0, + 55.3333333333, + 21.3333333333, + 4.3333333333, + 1.3333333333, + 43.0, + 25.0, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + } + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "uniform_basis": false, + "name": "Movies", + "subreferences": [ + { + "alias": "Dark Knight", + "name": "Dark Knight" + }, + { + "alias": "Fight Club", + "name": "Fight Club" + }, + { + "alias": "Meets the Par.", + "name": "Meets the Par." + } + ] + }, + "type": { + "integer": true, + "subvariables": [ + "S1", + "S2", + "S3" + ], + "class": "numeric", + "missing_reasons": { + "No Data": -1, + "NaN": -8 + }, + "missing_rules": {} + } + } } }, "n": 11, diff --git a/tests/integration/test_cube.py b/tests/integration/test_cube.py index eae86275a..62cb218d4 100644 --- a/tests/integration/test_cube.py +++ b/tests/integration/test_cube.py @@ -158,6 +158,15 @@ def it_provides_multiple_measures_for_NUM_ARRAY_GROUPED_BY_CAT(self): ] ) ) + assert cube.median == pytest.approx( + np.array( + [ + [71.0, 42.5, 55.33333333, 1.33333333], + [16.66666667, 25.5, 21.33333333, 43.0], + [2.33333333, 25.0, 4.33333333, 25.0], + ] + ) + ) assert cube.covariance == pytest.approx( np.array( [ From 16b7a627a77a76670d19cc5e0ddbd527839017d4 Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 11:20:54 +0200 Subject: [PATCH 02/12] median measure in strand --- src/cr/cube/cubepart.py | 14 + src/cr/cube/stripe/cubemeasure.py | 54 ++++ src/cr/cube/stripe/measure.py | 26 ++ tests/fixtures/cat-median.json | 165 ++++++++++ tests/fixtures/mr-median.json | 297 ++++++++++++++++++ ...multi-numeric-measures-grouped-by-cat.json | 2 +- tests/integration/test_cubepart.py | 17 + 7 files changed, 574 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/cat-median.json create mode 100644 tests/fixtures/mr-median.json diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index 35dcc0a06..d9b23367c 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -2005,6 +2005,20 @@ def means(self): "`.means` is undefined for a cube-result without a mean measure" ) + @lazyproperty + def median(self): + """1D np.float64 ndarray of median for each row of strand. + + Raises ValueError when accessed on a cube-result that does not contain a median + cube-measure. + """ + try: + return self._assemble_vector(self._measures.median.blocks) + except ValueError: + raise ValueError( + "`.median` is undefined for a cube-result without a median measure" + ) + @lazyproperty def min_base_size_mask(self): """1D bool ndarray of True for each row that fails to meet min-base spec. diff --git a/src/cr/cube/stripe/cubemeasure.py b/src/cr/cube/stripe/cubemeasure.py index 2b4d9eb77..8c42fd60e 100644 --- a/src/cr/cube/stripe/cubemeasure.py +++ b/src/cr/cube/stripe/cubemeasure.py @@ -26,6 +26,11 @@ def cube_means(self): """_BaseCubeMeans subclass object for this stripe.""" return _BaseCubeMeans.factory(self._cube, self._rows_dimension) + @lazyproperty + def cube_median(self): + """_BaseCubeMedian subclass object for this stripe.""" + return _BaseCubeMedian.factory(self._cube, self._rows_dimension) + @lazyproperty def cube_stddev(self): """_BaseCubeStdDev subclass object for this stripe.""" @@ -253,6 +258,55 @@ def means(self): return self._means[:, 0] +# === MEDIAN === + + +class _BaseCubeMedian(_BaseCubeMeasure): + """Base class for median cube-measure variants.""" + + def __init__(self, rows_dimension, median): + super(_BaseCubeMedian, self).__init__(rows_dimension) + self._median = median + + @classmethod + def factory(cls, cube, rows_dimension): + """Return _BaseCubeMedian subclass instance appropriate to `cube`.""" + if cube.median is None: + raise ValueError("cube-result does not contain cube-median measure") + MedianCls = ( + _MrCubeMedian if rows_dimension.dimension_type == DT.MR else _CatCubeMedian + ) + return MedianCls(rows_dimension, cube.median) + + @lazyproperty + def median(self): + """1D np.float64 ndarray of median for each stripe row.""" + raise NotImplementedError( + f"`{type(self).__name__}` must implement `.median`" + ) # pragma: no cover + + +class _CatCubeMedian(_BaseCubeMedian): + """Median cube-measure for a non-MR stripe.""" + + @lazyproperty + def median(self): + """1D np.float64 ndarray of median for each stripe row.""" + return self._median + + +class _MrCubeMedian(_BaseCubeMedian): + """Median cube-measure for an MR stripe. + + Its `.median` is a 2D ndarray with axes (rows, sel/not). + """ + + @lazyproperty + def median(self): + """1D np.float64 ndarray of median for each stripe row.""" + return self._median[:, 0] + + # === STD DEV === diff --git a/src/cr/cube/stripe/measure.py b/src/cr/cube/stripe/measure.py index 152733bee..3148ed7b5 100644 --- a/src/cr/cube/stripe/measure.py +++ b/src/cr/cube/stripe/measure.py @@ -38,6 +38,11 @@ def means(self): """_Means measure object for this stripe.""" return _Means(self._rows_dimension, self, self._cube_measures) + @lazyproperty + def median(self): + """_Median measure object for this stripe.""" + return _Median(self._rows_dimension, self, self._cube_measures) + @lazyproperty def population_proportions(self): """_PopulationPrortion measure object for this stripe.""" @@ -231,6 +236,27 @@ def subtotal_values(self): return NanSubtotals.subtotal_values(self.base_values, self._rows_dimension) +class _Median(_BaseSecondOrderMeasure): + """Provides the median measure for a stripe. + + Relies on the presence of a median cube-measure in the cube-result. + """ + + @lazyproperty + def base_values(self): + """1D np.float64 ndarray of median for each row.""" + return self._cube_measures.cube_median.median + + @lazyproperty + def subtotal_values(self): + """1D ndarray of np.nan for each row-subtotal. + + Median values cannot be subtotaled and each subtotal value is unconditionally + np.nan. + """ + return NanSubtotals.subtotal_values(self.base_values, self._rows_dimension) + + class _MeansSmoothed(_Means, _SmoothedMeasure): """Provides the smoothed means measure for a stripe. diff --git a/tests/fixtures/cat-median.json b/tests/fixtures/cat-median.json new file mode 100644 index 000000000..4d6c4a483 --- /dev/null +++ b/tests/fixtures/cat-median.json @@ -0,0 +1,165 @@ +{ + "query": { + "measures": { + "valid_count_unweighted": { + "function": "cube_valid_count", + "args": [ + { + "variable": "3a7cfcaf0d5349fe854057603f63b2f3" + } + ] + }, + "median": { + "function": "cube_quantile", + "args": [ + { + "variable": "3a7cfcaf0d5349fe854057603f63b2f3" + }, + { + "value": 0.5 + } + ] + } + }, + "dimensions": [ + { + "variable": "03694f029f9a408cb56f7cadfe9e9b48" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": false, + "references": { + "alias": "Gender", + "name": "Gender" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "id": 1, + "name": "Male", + "missing": false + }, + { + "numeric_value": null, + "id": 2, + "name": "Female", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 0, + "measures": { + "valid_count_unweighted": { + "data": [ + 3, + 2, + 0 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "integer": false, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + }, + "median": { + "data": [ + 8.8, + 7.445, + 0.11 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "median" + } + }, + "type": { + "integer": null, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + } + }, + "n": 5, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 5, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 5, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 5, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 5, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 5, + "weighted_n": 5 + }, + "filtered": { + "unweighted_n": 5, + "weighted_n": 5 + }, + "counts": [ + 3, + 2, + 0 + ], + "element": "crunch:cube" + } +} diff --git a/tests/fixtures/mr-median.json b/tests/fixtures/mr-median.json new file mode 100644 index 000000000..9cdf9b5fb --- /dev/null +++ b/tests/fixtures/mr-median.json @@ -0,0 +1,297 @@ +{ + "element": "shoji:view", + "value": { + "query": { + "dimensions": [ + { + "args": [ + { + "args": [ + { + "variable": "3ceb4ea17cd34c6295c26e3d43017757" + } + ], + "function": "as_selected" + }, + { + "value": "subvariables" + } + ], + "function": "dimension" + }, + { + "args": [ + { + "variable": "3ceb4ea17cd34c6295c26e3d43017757" + } + ], + "function": "as_selected" + } + ], + "measures": { + "median": { + "args": [ + { + "variable": "38bb586090ec488fb58177f687572db6" + }, + { + "value": 0.5 + } + ], + "function": "cube_quantile" + }, + "valid_count_unweighted": { + "args": [ + { + "variable": "38bb586090ec488fb58177f687572db6" + } + ], + "function": "cube_valid_count" + } + }, + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "counts": [ + 2, + 1, + 1, + 1, + 2, + 1, + 1, + 2, + 1 + ], + "dimensions": [ + { + "derived": true, + "references": { + "alias": "M", + "name": "M", + "subreferences": [ + { + "alias": "X", + "description": null, + "name": "X" + }, + { + "alias": "Y", + "description": null, + "name": "Y" + }, + { + "alias": "Z", + "description": null, + "name": "Z" + } + ], + "uniform_basis": false + }, + "type": { + "class": "enum", + "elements": [ + { + "id": 1, + "missing": false, + "value": { + "derived": false, + "id": "72OcG2j95yZHLX7YUk7Xkg000000", + "references": { + "alias": "X", + "description": null, + "name": "X" + } + } + }, + { + "id": 2, + "missing": false, + "value": { + "derived": false, + "id": "72OcG2j95yZHLX7YUk7Xkg000001", + "references": { + "alias": "Y", + "description": null, + "name": "Y" + } + } + }, + { + "id": 3, + "missing": false, + "value": { + "derived": false, + "id": "72OcG2j95yZHLX7YUk7Xkg000002", + "references": { + "alias": "Z", + "description": null, + "name": "Z" + } + } + } + ], + "subtype": { + "class": "variable" + } + } + }, + { + "derived": true, + "references": { + "alias": "M", + "name": "M", + "subreferences": [ + { + "alias": "X", + "description": null, + "name": "X" + }, + { + "alias": "Y", + "description": null, + "name": "Y" + }, + { + "alias": "Z", + "description": null, + "name": "Z" + } + ], + "uniform_basis": false + }, + "type": { + "categories": [ + { + "id": 1, + "missing": false, + "name": "Selected", + "numeric_value": 1, + "selected": true + }, + { + "id": 0, + "missing": false, + "name": "Other", + "numeric_value": 0 + }, + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false, + "subvariables": [ + "72OcG2j95yZHLX7YUk7Xkg000000", + "72OcG2j95yZHLX7YUk7Xkg000001", + "72OcG2j95yZHLX7YUk7Xkg000002" + ] + } + } + ], + "element": "crunch:cube", + "filter_stats": { + "filtered": { + "unweighted": { + "missing": 0, + "other": 0, + "selected": 4 + }, + "weighted": { + "missing": 0, + "other": 0, + "selected": 4 + } + }, + "filtered_complete": { + "unweighted": { + "missing": 0, + "other": 0, + "selected": 4 + }, + "weighted": { + "missing": 0, + "other": 0, + "selected": 4 + } + } + }, + "filtered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "measures": { + "median": { + "data": [ + 2.22398, + 3.12176, + 1.23254, + 0.23444, + 4.23464, + 6.53464, + 7.23452, + 8.12341, + 2.22122 + ], + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies" + }, + "type": { + "class": "numeric", + "integer": null, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 1 + }, + "valid_count_unweighted": { + "data": [ + 2, + 1, + 1, + 1, + 2, + 1, + 1, + 2, + 1 + ], + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies" + }, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 1 + } + }, + "missing": 1, + "n": 4, + "unfiltered": { + "unweighted_n": 4, + "weighted_n": 4 + } + } + } +} diff --git a/tests/fixtures/numeric_arrays/num-arr-multi-numeric-measures-grouped-by-cat.json b/tests/fixtures/numeric_arrays/num-arr-multi-numeric-measures-grouped-by-cat.json index 30cc4b379..9d9eeec2d 100644 --- a/tests/fixtures/numeric_arrays/num-arr-multi-numeric-measures-grouped-by-cat.json +++ b/tests/fixtures/numeric_arrays/num-arr-multi-numeric-measures-grouped-by-cat.json @@ -42,7 +42,7 @@ ] }, "median": { - "function": "cube_median", + "function": "cube_quantile", "args": [ { "variable": "9bf24ea26b364a0897d77471695f4c88" diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index 43f15c6f1..207c3a6f1 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -1990,6 +1990,11 @@ def it_provides_values_for_univariate_cat(self): assert str(e.value) == ( "`.sums` is undefined for a cube-result without a sum measure" ) + with pytest.raises(ValueError) as e: + strand.median + assert str(e.value) == ( + "`.median` is undefined for a cube-result without a median measure" + ) with pytest.raises(ValueError) as e: strand.stddev assert str(e.value) == ( @@ -2296,6 +2301,18 @@ def it_provides_sum_measure_for_CAT(self): assert strand.sums == pytest.approx([88.0, 77.0]) assert strand.table_base_range.tolist() == [5, 5] + def it_provides_median_measure_for_CAT(self): + strand = Cube(CR.CAT_MEDIAN).partitions[0] + + assert strand.median == pytest.approx([8.8, 7.445]) + assert strand.table_base_range.tolist() == [5, 5] + + def it_provides_median_measure_for_MR(self): + strand = Cube(CR.MR_MEDIAN).partitions[0] + + assert strand.median == pytest.approx([2.22398, 0.23444, 7.23452]) + assert strand.table_base_range.tolist() == [3, 3] + def it_provides_sum_measure_for_CAT_HS(self): transforms = { "rows_dimension": { From c58b20d998fb5bffe5616cff8b6e18371b56b5be Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 15:50:27 +0200 Subject: [PATCH 03/12] test for num array with no grouping --- .../num-arr-median-no-grouping.json | 161 ++++++++++++++++++ tests/integration/test_numeric_array.py | 9 + 2 files changed, 170 insertions(+) create mode 100644 tests/fixtures/numeric_arrays/num-arr-median-no-grouping.json diff --git a/tests/fixtures/numeric_arrays/num-arr-median-no-grouping.json b/tests/fixtures/numeric_arrays/num-arr-median-no-grouping.json new file mode 100644 index 000000000..8fc567355 --- /dev/null +++ b/tests/fixtures/numeric_arrays/num-arr-median-no-grouping.json @@ -0,0 +1,161 @@ +{ + "query": { + "measures": { + "valid_count_unweighted": { + "function": "cube_valid_count", + "args": [ + { + "variable": "80b47530e227436aba034cac8a411fea" + } + ] + }, + "stddev": { + "function": "cube_quantile", + "args": [ + { + "variable": "80b47530e227436aba034cac8a411fea" + }, + { + "value": 0.5 + } + ] + } + }, + "dimensions": [], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [], + "missing": 0, + "measures": { + "valid_count_unweighted": { + "data": [ + 4, + 3, + 2 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "uniform_basis": false, + "name": "Movies", + "subreferences": [ + { + "alias": "Dark Knight", + "name": "Dark Knight" + }, + { + "alias": "Fight Club", + "name": "Fight Club" + }, + { + "alias": "Meets the Par.", + "name": "Meets the P." + } + ] + }, + "type": { + "integer": false, + "subvariables": [ + "S1", + "S2", + "S3" + ], + "class": "numeric", + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + } + }, + "median": { + "data": [ + 3.5819888975, + 1.5118845843, + 0.1213203436 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "uniform_basis": false, + "name": "Movies", + "subreferences": [ + { + "alias": "Dark Knight", + "name": "Dark Knight" + }, + { + "alias": "Fight Club", + "name": "Fight Club" + }, + { + "alias": "Meets the Par.", + "name": "Meets the P." + } + ] + }, + "type": { + "integer": null, + "subvariables": [ + "S1", + "S2", + "S3" + ], + "class": "numeric", + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + } + } + }, + "n": 4, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 4, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 4, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 4, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 4, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "filtered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "counts": [ + 4 + ], + "element": "crunch:cube" + } +} diff --git a/tests/integration/test_numeric_array.py b/tests/integration/test_numeric_array.py index 195cda5b8..025dd4125 100644 --- a/tests/integration/test_numeric_array.py +++ b/tests/integration/test_numeric_array.py @@ -186,6 +186,15 @@ def it_provides_stddev_for_numeric_array_with_no_grouping(self): assert strand.unweighted_bases.tolist() == [4, 3, 2] assert strand.table_base_range.tolist() == [2, 4] + def it_provides_median_for_numeric_array_with_no_grouping(self): + """Test stddev on no-dimensions measure of numeric array.""" + strand = Cube(NA.NUM_ARR_MEDIAN_NO_GROUPING).partitions[0] + + assert strand.median == pytest.approx([3.5819889, 1.51188458, 0.12132034]) + assert strand.unweighted_counts.tolist() == [4, 3, 2] + assert strand.unweighted_bases.tolist() == [4, 3, 2] + assert strand.table_base_range.tolist() == [2, 4] + def it_provides_stddev_for_num_array_grouped_by_cat(self): """Test stddev on numeric array, grouped by single categorical dimension.""" slice_ = Cube(NA.NUM_ARR_STDDEV_GROUPED_BY_CAT).partitions[0] From 95c6170600d86fc3035d6dd7c10f66508853fa09 Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 16:13:50 +0200 Subject: [PATCH 04/12] support median for 2d partitions --- src/cr/cube/cubepart.py | 16 + src/cr/cube/matrix/cubemeasure.py | 95 +++- src/cr/cube/matrix/measure.py | 16 + tests/fixtures/median-cat-x-cat-hs.json | 174 ++++++++ .../num-arr-median-grouped-by-cat.json | 224 ++++++++++ .../numeric_arrays/num-arr-median-x-mr.json | 415 ++++++++++++++++++ tests/integration/test_cubepart.py | 19 + tests/integration/test_numeric_array.py | 36 ++ 8 files changed, 994 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/median-cat-x-cat-hs.json create mode 100644 tests/fixtures/numeric_arrays/num-arr-median-grouped-by-cat.json create mode 100644 tests/fixtures/numeric_arrays/num-arr-median-x-mr.json diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index d9b23367c..f3a764c82 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -711,6 +711,22 @@ def means(self): "`.means` is undefined for a cube-result without a mean measure" ) + @lazyproperty + def median(self): + """2D optional np.float64 ndarray of median value for each table cell. + + Cell value is `np.nan` for each cell corresponding to an inserted subtotal + (median of addend cells cannot simply be added to get the mean of the subtotal). + + Raises `ValueError` if the cube-result does not include a median cube-measure. + """ + try: + return self._assemble_matrix(self._measures.median.blocks) + except ValueError: + raise ValueError( + "`.median` is undefined for a cube-result without a median measure" + ) + @lazyproperty def min_base_size_mask(self): return MinBaseSizeMask(self, self._mask_size) diff --git a/src/cr/cube/matrix/cubemeasure.py b/src/cr/cube/matrix/cubemeasure.py index c9269a91c..d5accbe2a 100644 --- a/src/cr/cube/matrix/cubemeasure.py +++ b/src/cr/cube/matrix/cubemeasure.py @@ -25,6 +25,11 @@ def cube_means(self): """_BaseCubeMeans subclass object for this cube-result.""" return _BaseCubeMeans.factory(self._cube, self._dimensions, self._slice_idx) + @lazyproperty + def cube_median(self): + """_BaseCubeMedian subclass object for this cube-result.""" + return _BaseCubeMedian.factory(self._cube, self._dimensions, self._slice_idx) + @lazyproperty def cube_overlaps(self): """_BaseCubeOverlaps subclass object for this cube-result.""" @@ -134,7 +139,9 @@ def factory(cls, counts, diff_nans, cube, dimensions, slice_idx): ( "MR" if dim_type == DT.MR - else "ARR" if dim_type in DT.ARRAY_TYPES else "CAT" + else "ARR" + if dim_type in DT.ARRAY_TYPES + else "CAT" ) for dim_type in cube.dimension_types[-2:] ) @@ -877,6 +884,92 @@ def means(self): return self._means[:, 0, :, 0] +# === MEDIAN === + + +class _BaseCubeMedian(_BaseCubeMeasure): + """Base class for median cube-measure variants.""" + + def __init__(self, dimensions, median): + super(_BaseCubeMedian, self).__init__(dimensions) + self._median = median + + @classmethod + def factory(cls, cube, dimensions, slice_idx): + """Return _BaseCubeMedian subclass instance appropriate to `cube`. + + Raises `ValueError` if the cube-result does not include a cube-median measure. + """ + if cube.median is None: + raise ValueError("cube-result does not contain cube-median measure") + dimension_types = cube.dimension_types[-2:] + CubeMedianCls = ( + _MrXMrCubeMedian + if dimension_types == (DT.MR, DT.MR) + else ( + _MrXCatCubeMedian + if dimension_types[0] == DT.MR + else ( + _CatXMrCubeMedian + if dimension_types[1] == DT.MR + else _CatXCatCubeMedian + ) + ) + ) + return CubeMedianCls( + dimensions, cube.median[cls._slice_idx_expr(cube, slice_idx)] + ) + + @lazyproperty + def median(self): + """2D np.float64 ndarray of cube median.""" + raise NotImplementedError( # pragma: no cover + f"`{type(self).__name__}` must implement `.median`" + ) + + +class _CatXCatCubeMedian(_BaseCubeMedian): + """Median cube-measure for a slice with no MR dimensions.""" + + @lazyproperty + def median(self): + """2D np.float64 ndarray of median for each valid matrix cell.""" + return self._median + + +class _CatXMrCubeMedian(_BaseCubeMedian): + """Median cube-measure for a NOT_MR_X_MR slice. + + Note that the rows-dimensions need not actually be CAT. + """ + + @lazyproperty + def median(self): + """2D np.float64 ndarray of median for each valid matrix cell.""" + return self._median[:, :, 0] + + +class _MrXCatCubeMedian(_BaseCubeMedian): + """Median cube-measure for an MR_X_NOT_MR slice. + + Note that the columns-dimension need not actually be CAT. + """ + + @lazyproperty + def median(self): + """2D np.float64 ndarray of median for each valid matrix cell.""" + return self._median[:, 0, :] + + +class _MrXMrCubeMedian(_BaseCubeMedian): + """Median cube-measure for an MR_X_MR slice.""" + + @lazyproperty + def median(self): + """2D np.float64 ndarray of median for each valid matrix cell.""" + return self._median[:, 0, :, 0] + + # === OVERLAPS === diff --git a/src/cr/cube/matrix/measure.py b/src/cr/cube/matrix/measure.py index d6b3653e6..6506ceb26 100644 --- a/src/cr/cube/matrix/measure.py +++ b/src/cr/cube/matrix/measure.py @@ -206,6 +206,11 @@ def means(self): """_Means measure object for this cube-result""" return _Means(self._dimensions, self, self._cube_measures) + @lazyproperty + def median(self): + """_Median measure object for this cube-result""" + return _Median(self._dimensions, self, self._cube_measures) + def pairwise_p_vals_for_subvar(self, subvar_idx): """_PairwiseSigPValsForSubvar measure object for this cube-result""" return _PairwiseSigPValsForSubvar( @@ -1177,6 +1182,17 @@ def blocks(self): ) +class _Median(_BaseSecondOrderMeasure): + """Provides the median measure for a matrix.""" + + @lazyproperty + def blocks(self): + """2D array of the four 2D "blocks" making up this measure.""" + return NanSubtotals.blocks( + self._cube_measures.cube_median.median, self._dimensions + ) + + class _MeansSmoothed(_Means, _SmoothedMeasure): """Provides the smoothed mean measure for a matrix.""" diff --git a/tests/fixtures/median-cat-x-cat-hs.json b/tests/fixtures/median-cat-x-cat-hs.json new file mode 100644 index 000000000..f6327b9c1 --- /dev/null +++ b/tests/fixtures/median-cat-x-cat-hs.json @@ -0,0 +1,174 @@ +{ + "result": { + "dimensions": [ + { + "type": { + "class": "categorical", + "categories": [ + { + "id": 1, + "name": "Median", + "missing": false + } + ] + }, + "references": { + "alias": "median", + "name": "median" + } + }, + { + "references": { + "alias": "age4", + "view": { + "show_counts": false, + "show_numeric_values": false, + "transform": { + "insertions": [ + { + "function": "subtotal", + "args": [ + 2, + 1 + ], + "name": "<44", + "anchor": 2 + } + ] + }, + "include_missing": false, + "column_width": null + }, + "description": "Respondent age by category", + "name": "age4" + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "missing": false, + "id": 1, + "name": "Under 30" + }, + { + "numeric_value": null, + "missing": false, + "id": 2, + "name": "30-44" + }, + { + "numeric_value": null, + "missing": false, + "id": 3, + "name": "45-64" + }, + { + "numeric_value": null, + "missing": false, + "id": 4, + "name": "65+" + }, + { + "numeric_value": 32766, + "missing": true, + "id": 32766, + "name": "skipped" + }, + { + "numeric_value": 32767, + "missing": true, + "id": 32767, + "name": "not asked" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "measures": { + "median": { + "data": [ + 14.4393575687, + 37.3212274591, + 25.4857195647, + 23.0242765864, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + } + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": {}, + "type": { + "integer": false, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1, + "NaN": -8 + } + } + } + } + }, + "n": 1500, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 1500, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 1500.0, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 1500, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 1500.0, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 1500, + "weighted_n": 1500.0 + }, + "filtered": { + "unweighted_n": 1500, + "weighted_n": 1500.0 + }, + "counts": [ + 189, + 395, + 606, + 310, + 0, + 0, + 0 + ] + } +} diff --git a/tests/fixtures/numeric_arrays/num-arr-median-grouped-by-cat.json b/tests/fixtures/numeric_arrays/num-arr-median-grouped-by-cat.json new file mode 100644 index 000000000..71eb1365d --- /dev/null +++ b/tests/fixtures/numeric_arrays/num-arr-median-grouped-by-cat.json @@ -0,0 +1,224 @@ +{ + "query": { + "measures": { + "valid_count_unweighted": { + "function": "cube_valid_count", + "args": [ + { + "variable": "c3eac288a696476680a82afaead2b516" + } + ] + }, + "median": { + "function": "cube_quantile", + "args": [ + { + "variable": "c3eac288a696476680a82afaead2b516" + }, + { + "value": 0.5 + } + ] + } + }, + "dimensions": [ + { + "variable": "f46d917e75f643f4904b1769b9cc611b" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": false, + "references": { + "alias": "Gender", + "name": "Gender" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "id": 1, + "name": "Male", + "missing": false + }, + { + "numeric_value": null, + "id": 2, + "name": "Female", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 0, + "measures": { + "valid_count_unweighted": { + "data": [ + 3, + 3, + 1, + 2, + 1, + 1, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "uniform_basis": false, + "name": "Movies", + "subreferences": [ + { + "alias": "Dark Knight", + "name": "Dark Knight" + }, + { + "alias": "Fight Club", + "name": "Fight Club" + }, + { + "alias": "Meets the Par.", + "name": "Meets the Par." + } + ] + }, + "type": { + "integer": false, + "subvariables": [ + "S1", + "S2", + "S3" + ], + "class": "numeric", + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + } + }, + "median": { + "data": [ + 3.7368949481, + 1.8867513459, + { + "?": -8 + }, + 17.6776695297, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + } + ], + "n_missing": 0, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "uniform_basis": false, + "name": "Movies", + "subreferences": [ + { + "alias": "Dark Knight", + "name": "Dark Knight" + }, + { + "alias": "Fight Club", + "name": "Fight Club" + }, + { + "alias": "Meets the Par.", + "name": "Meets the Par." + } + ] + }, + "type": { + "integer": null, + "subvariables": [ + "S1", + "S2", + "S3" + ], + "class": "numeric", + "missing_reasons": { + "No Data": -1, + "NaN": -8 + }, + "missing_rules": {} + } + } + } + }, + "n": 5, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 5, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 5, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 5, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 5, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 5, + "weighted_n": 5 + }, + "filtered": { + "unweighted_n": 5, + "weighted_n": 5 + }, + "counts": [ + 3, + 2, + 0 + ], + "element": "crunch:cube" + } +} diff --git a/tests/fixtures/numeric_arrays/num-arr-median-x-mr.json b/tests/fixtures/numeric_arrays/num-arr-median-x-mr.json new file mode 100644 index 000000000..b49cc9f9b --- /dev/null +++ b/tests/fixtures/numeric_arrays/num-arr-median-x-mr.json @@ -0,0 +1,415 @@ +{ + "query": { + "measures": { + "valid_count_unweighted": { + "function": "cube_valid_count", + "args": [ + { + "variable": "dcae56175f4c4d95bd51dad94728aaf9" + } + ] + }, + "median": { + "function": "cube_quantile", + "args": [ + { + "variable": "dcae56175f4c4d95bd51dad94728aaf9" + }, + { + "value": 0.5 + } + ] + } + }, + "dimensions": [ + { + "function": "dimension", + "args": [ + { + "function": "as_selected", + "args": [ + { + "variable": "2929df830f2f44e9b2df6ca808a6d619" + } + ] + }, + { + "value": "subvariables" + } + ] + }, + { + "function": "as_selected", + "args": [ + { + "variable": "2929df830f2f44e9b2df6ca808a6d619" + } + ] + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "X", + "name": "X", + "description": null + }, + { + "alias": "Y", + "name": "Y", + "description": null + }, + { + "alias": "Z", + "name": "Z", + "description": null + } + ], + "uniform_basis": false, + "name": "M", + "alias": "M" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "references": { + "alias": "X", + "name": "X", + "description": null + }, + "derived": false, + "id": "4QUIV3Vk8ugFondcoPjPUM000000" + }, + "missing": false + }, + { + "id": 2, + "value": { + "references": { + "alias": "Y", + "name": "Y", + "description": null + }, + "derived": false, + "id": "4QUIV3Vk8ugFondcoPjPUM000001" + }, + "missing": false + }, + { + "id": 3, + "value": { + "references": { + "alias": "Z", + "name": "Z", + "description": null + }, + "derived": false, + "id": "4QUIV3Vk8ugFondcoPjPUM000002" + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "references": { + "subreferences": [ + { + "alias": "X", + "name": "X", + "description": null + }, + { + "alias": "Y", + "name": "Y", + "description": null + }, + { + "alias": "Z", + "name": "Z", + "description": null + } + ], + "uniform_basis": false, + "name": "M", + "alias": "M" + }, + "derived": true, + "type": { + "ordinal": false, + "subvariables": [ + "4QUIV3Vk8ugFondcoPjPUM000000", + "4QUIV3Vk8ugFondcoPjPUM000001", + "4QUIV3Vk8ugFondcoPjPUM000002" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "name": "Selected", + "missing": false + }, + { + "numeric_value": 0, + "id": 0, + "name": "Other", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 1, + "measures": { + "valid_count_unweighted": { + "data": [ + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 1, + 0, + 1, + 2, + 2, + 1, + 1, + 1, + 0, + 1, + 0, + 1, + 2, + 2, + 1, + 1, + 1, + 0 + ], + "n_missing": 1, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "uniform_basis": false, + "name": "Movies", + "subreferences": [ + { + "alias": "Dark Knight", + "name": "Dark Knight" + }, + { + "alias": "Fight Club", + "name": "Fight Club" + }, + { + "alias": "Meets the Par.", + "name": "Meets the P." + } + ] + }, + "type": { + "integer": false, + "subvariables": [ + "S1", + "S2", + "S3" + ], + "class": "numeric", + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + } + } + }, + "stddev": { + "data": [ + 2.4142135624, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 1.2426406871, + 3.8284271247, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 2.2426406871, + 1.8284271247, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + } + ], + "n_missing": 1, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "uniform_basis": false, + "name": "Movies", + "subreferences": [ + { + "alias": "Dark Knight", + "name": "Dark Knight" + }, + { + "alias": "Fight Club", + "name": "Fight Club" + }, + { + "alias": "Meets the Par.", + "name": "Meets the P." + } + ] + }, + "type": { + "integer": null, + "subvariables": [ + "S1", + "S2", + "S3" + ], + "class": "numeric", + "missing_reasons": { + "No Data": -1, + "NaN": -8 + }, + "missing_rules": {} + } + } + } + }, + "n": 4, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 4, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 4, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 4, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 4, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "filtered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "counts": [ + 2, + 1, + 1, + 1, + 2, + 1, + 1, + 2, + 1 + ], + "element": "crunch:cube" + } +} diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index 207c3a6f1..83dd7e251 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -57,6 +57,12 @@ def it_provides_values_for_cat_x_cat(self): str(e.value) == "`.means` is undefined for a cube-result without a mean measure" ) + with pytest.raises(ValueError) as e: + slice_.median + assert ( + str(e.value) + == "`.median` is undefined for a cube-result without a median measure" + ) with pytest.raises(ValueError) as e: slice_.pairwise_means_indices assert ( @@ -441,6 +447,19 @@ def it_provides_values_for_mean_cat_x_cat_hs(self): ) assert slice_.rows_margin.tolist() == [1500.0] + def it_provides_values_for_median_cat_x_cat_hs(self): + slice_ = Cube(CR.MEDIAN_CAT_X_CAT_HS).partitions[0] + + # This fixture has both cube_counts and cube_means measure, for this reason + # both measures are available at cubepart level. + assert slice_.columns_margin.tolist() == [189, 395, 584, 606, 310] + assert slice_.counts == pytest.approx(np.array([[189, 395, 584, 606, 310]])) + assert slice_.median == pytest.approx( + np.array([[14.4393575, 37.3212274, np.nan, 25.4857195, 23.0242765]]), + nan_ok=True, + ) + assert slice_.rows_margin.tolist() == [1500.0] + def it_provides_values_for_mr_x_mr_means(self): slice_ = Cube(CR.MR_X_MR_MEANS).partitions[0] diff --git a/tests/integration/test_numeric_array.py b/tests/integration/test_numeric_array.py index 025dd4125..676130742 100644 --- a/tests/integration/test_numeric_array.py +++ b/tests/integration/test_numeric_array.py @@ -213,6 +213,23 @@ def it_provides_stddev_for_num_array_grouped_by_cat(self): ) assert slice_.columns_base == pytest.approx(np.array([[3, 2], [3, 1], [1, 1]])) + def it_provides_median_for_num_array_grouped_by_cat(self): + slice_ = Cube(NA.NUM_ARR_MEDIAN_GROUPED_BY_CAT).partitions[0] + + assert slice_.median == pytest.approx( + np.array( + [ + # --------Gender------------ + # M F + [3.7368949, 17.6776695], # S1 (Ticket Sold) + [1.8867513, np.nan], # S2 (Ticket Sold) + [np.nan, np.nan], # S3 (Ticket Sold) + ], + ), + nan_ok=True, + ) + assert slice_.columns_base == pytest.approx(np.array([[3, 2], [3, 1], [1, 1]])) + def it_provides_stddev_for_num_array_x_mr(self): slice_ = Cube(NA.NUM_ARR_STDDEV_X_MR).partitions[0] @@ -232,6 +249,25 @@ def it_provides_stddev_for_num_array_x_mr(self): np.array([[2, 1, 1], [1, 0, 0], [1, 1, 1]]) ) + def it_provides_median_for_num_array_x_mr(self): + slice_ = Cube(NA.NUM_ARR_MEDIAN_X_MR).partitions[0] + + assert slice_.stddev == pytest.approx( + np.array( + [ + # -------------------------MR---------------- + # S1 S2 S3 + [2.4142136, np.nan, np.nan], # S1 (num arr) + [np.nan, np.nan, np.nan], # S2 (num arr) + [np.nan, np.nan, np.nan], # S3 (num arr) + ], + ), + nan_ok=True, + ) + assert slice_.columns_base == pytest.approx( + np.array([[2, 1, 1], [1, 0, 0], [1, 1, 1]]) + ) + def it_provides_share_of_sum_for_numeric_array_with_no_grouping(self): strand = Cube(NA.NUM_ARR_SUM_NO_GROUPING).partitions[0] From 434097539180cdcf886aea497deb91482f381c9e Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 16:21:40 +0200 Subject: [PATCH 05/12] black --- src/cr/cube/matrix/cubemeasure.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cr/cube/matrix/cubemeasure.py b/src/cr/cube/matrix/cubemeasure.py index d5accbe2a..2da54e67b 100644 --- a/src/cr/cube/matrix/cubemeasure.py +++ b/src/cr/cube/matrix/cubemeasure.py @@ -139,9 +139,7 @@ def factory(cls, counts, diff_nans, cube, dimensions, slice_idx): ( "MR" if dim_type == DT.MR - else "ARR" - if dim_type in DT.ARRAY_TYPES - else "CAT" + else "ARR" if dim_type in DT.ARRAY_TYPES else "CAT" ) for dim_type in cube.dimension_types[-2:] ) From 0dfbb798fdf4d381819f9f765b9184eb6906cdce Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 16:46:18 +0200 Subject: [PATCH 06/12] fix coverage --- src/cr/cube/cube.py | 6 +- tests/fixtures/median-cat-x-mr.json | 454 ++++++++++++++++ tests/fixtures/median-mr-x-cat.json | 389 ++++++++++++++ tests/fixtures/mr-x-mr-median.json | 798 ++++++++++++++++++++++++++++ tests/integration/test_cube.py | 9 + tests/integration/test_matrix.py | 37 ++ 6 files changed, 1692 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/median-cat-x-mr.json create mode 100644 tests/fixtures/median-mr-x-cat.json create mode 100644 tests/fixtures/mr-x-mr-median.json diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index b03c4d484..d73947697 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -805,6 +805,10 @@ def missing_count(self) -> int: # fixtures that don't have valid_counts. if self.means is not None: return self.means.missing_count + # The check on the median measure is needed for retro-compatibility with the old + # fixtures that don't have valid_counts. + if self.median is not None: + return self.median.missing_count return self._cube_dict["result"].get("missing", 0) @lazyproperty @@ -1068,7 +1072,7 @@ class _MedianMeasure(_BaseMeasure): @lazyproperty def missing_count(self) -> int: """Numeric value representing count of missing rows in response.""" - return self._cube_dict["result"]["measures"]["mean"].get("n_missing", 0) + return self._cube_dict["result"]["measures"]["median"].get("n_missing", 0) @lazyproperty def _flat_values(self) -> Optional[np.ndarray]: diff --git a/tests/fixtures/median-cat-x-mr.json b/tests/fixtures/median-cat-x-mr.json new file mode 100644 index 000000000..11732f7d9 --- /dev/null +++ b/tests/fixtures/median-cat-x-mr.json @@ -0,0 +1,454 @@ +{ + "query": { + "dimensions": [ + {"variable": "https://app.crunch.io/api/datasets/7af9b5a206054cc38c84b73ab700e201/variables/000026/"}, + {"each": "https://app.crunch.io/api/datasets/7af9b5a206054cc38c84b73ab700e201/variables/0000dc/"}, + { + "args": [ + {"variable": "https://app.crunch.io/api/datasets/7af9b5a206054cc38c84b73ab700e201/variables/0000dc/"} + ], + "function": "as_selected" + } + ], + "measures": { + "count": { + "args": [], + "function": "cube_count" + }, + "median": { + "args": [ + {"variable": "https://app.crunch.io/api/datasets/7af9b5a206054cc38c84b73ab700e201/variables/0000a1/"}, + {"value": 0.5} + ], + "function": "cube_quantile" + } + }, + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "counts": [ + 8, 7, 40, + 7, 8, 40, + 4, 9, 42, + 13, 7, 35, + 27, 5, 23, + + 7, 17, 102, + 16, 18, 92, + 21, 16, 89, + 36, 14, 76, + 58, 11, 57, + + 5, 68, 543, + 56, 65, 495, + 71, 64, 481, + 230, 43, 343, + 307, 39, 270, + + 6, 51, 253, + 26, 49, 235, + 39, 42, 229, + 130, 29, 151, + 134, 33, 143, + + 5, 64, 332, + 27, 59, 315, + 54, 57, 290, + 190, 31, 180, + 166, 42, 193, + + 2, 15, 133, + 11, 12, 127, + 11, 14, 125, + 36, 10, 104, + 101, 8, 41, + + 0, 0, 4, + 0, 0, 4, + 0, 0, 4, + 2, 0, 2, + 2, 0, 2, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 0 + ], + "dimensions": [ + { + "derived": false, + "references": { + "alias": "pasta", + "description": "The geometry of pasta", + "name": "Shapes of pasta", + "notes": "A categorical variable", + "view": { + "transform": { + "insertions": [] + } + } + }, + "type": { + "categories": [ + {"id": 1, "name": "Bucatini", "numeric_value": 1}, + {"id": 2, "name": "Chitarra", "numeric_value": 2}, + {"id": 0, "name": "Boccoli", "numeric_value": 0}, + {"id": 4, "name": "Orecchiette", "numeric_value": 4}, + {"id": 5, "name": "Quadrefiore", "numeric_value": 5}, + {"id": 6, "name": "Fileja", "numeric_value": 6}, + {"id": 32766, "missing": true, "name": "Skipped", "numeric_value": 32766}, + {"id": 32767, "missing": true, "name": "Not asked", "numeric_value": 32767}, + {"id": -1, "missing": true, "name": "No Data"} + ], + "class": "categorical", + "ordinal": false + } + }, + { + "derived": true, + "references": { + "alias": "nordics", + "description": "Which of the following Nordic countries have you visited? (select all that apply)", + "is_dichotomous": true, + "name": "Nordic countries", + "notes": "A multiple response variable", + "subreferences": [ + {"alias": "dk", "description": "milstat_1", "name": "Denmark"}, + {"alias": "fi", "description": "milstat_2", "name": "Finland"}, + {"alias": "is", "description": "milstat_3", "name": "Iceland"}, + {"alias": "no", "description": "milstat_4", "name": "Norway"}, + {"alias": "se", "description": "milstat_5", "name": "Sweden"} + ], + "uniform_basis": false + }, + "type": { + "class": "enum", + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "id": "00c0", + "references": {"alias": "dk", "description": "milstat_1", "name": "Denmark"} + } + }, + { + "id": 2, + "value": { + "derived": false, + "id": "00c1", + "references": {"alias": "fi", "description": "milstat_2", "name": "Finland"} + } + }, + { + "id": 3, + "value": { + "derived": false, + "id": "00c2", + "references": {"alias": "is", "description": "milstat_3", "name": "Iceland"} + } + }, + { + "id": 4, + "value": { + "derived": false, + "id": "00c3", + "references": {"alias": "no", "description": "milstat_4", "name": "Norway"} + } + }, + { + "id": 5, + "value": { + "derived": false, + "id": "00c4", + "references": {"alias": "se", "description": "milstat_5", "name": "Sweden"} + } + } + ], + "subtype": { + "class": "variable" + } + } + }, + { + "derived": true, + "references": { + "alias": "nordics", + "description": "Which of the following Nordic countries have you visited? (select all that apply)", + "is_dichotomous": true, + "name": "Nordic countries", + "notes": "A multiple response variable", + "subreferences": [ + {"alias": "dk", "description": "milstat_1", "name": "Denmark"}, + {"alias": "fi", "description": "milstat_2", "name": "Finland"}, + {"alias": "is", "description": "milstat_3", "name": "Iceland"}, + {"alias": "no", "description": "milstat_4", "name": "Norway"}, + {"alias": "se", "description": "milstat_5", "name": "Sweden"} + ], + "uniform_basis": false + }, + "type": { + "categories": [ + {"id": 1, "name": "Selected", "numeric_value": 1, "selected": true}, + {"id": 0, "name": "Other", "numeric_value": 0}, + {"id": -1, "missing": true, "name": "No Data"} + ], + "class": "categorical", + "ordinal": false, + "subvariables": ["00c0", "00c1", "00c2", "00c3", "00c4"] + } + } + ], + "element": "crunch:cube", + "filter_stats": { + "filtered": { + "unweighted": {"missing": 0, "other": 0, "selected": 1662}, + "weighted": {"missing": 0, "other": 0, "selected": 1662} + }, + "filtered_complete": { + "unweighted": {"missing": 0, "other": 0, "selected": 1662}, + "weighted": {"missing": 0, "other": 0, "selected": 1662} + } + }, + "filtered": {"unweighted_n": 1662, "weighted_n": 1662}, + "measures": { + "count": { + "data": [ + 8, + 7, + 40, + 7, + 8, + 40, + 4, + 9, + 42, + 13, + 7, + 35, + 27, + 5, + 23, + 7, + 17, + 102, + 16, + 18, + 92, + 21, + 16, + 89, + 36, + 14, + 76, + 58, + 11, + 57, + 5, + 68, + 543, + 56, + 65, + 495, + 71, + 64, + 481, + 230, + 43, + 343, + 307, + 39, + 270, + 6, + 51, + 253, + 26, + 49, + 235, + 39, + 42, + 229, + 130, + 29, + 151, + 134, + 33, + 143, + 5, + 64, + 332, + 27, + 59, + 315, + 54, + 57, + 290, + 190, + 31, + 180, + 166, + 42, + 193, + 2, + 15, + 133, + 11, + 12, + 127, + 11, + 14, + 125, + 36, + 10, + 104, + 101, + 8, + 41, + 0, + 0, + 4, + 0, + 0, + 4, + 0, + 0, + 4, + 2, + 0, + 2, + 2, + 0, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": true, + "missing_reasons": {"No Data": -1}, + "missing_rules": {} + } + }, + "n_missing": 4 + }, + "median": { + "data": [ + 19.0, 39.42857142857143, 11.575, + 30.714285714285715, 21.625, 10.575, + 18.0, 39.333333333333336, 38.69047619047619, + 49.76923076923077, 36.285714285714285, 36.285714285714285, + 37.2962962962963, 39.8, 41.95652173913044, + + 35.57142857142857, 51.294117647058826, 48.14705882352941, + 45.0, 49.666666666666664, 48.02173913043478, + 61.23809523809524, 47.8125, 44.73033707865169, + 48.0, 50.0, 47.421052631578945, + 45.741379310344826, 49.72727272727273, 49.68421052631579, + + 32.0, 49.1764705882353, 46.54696132596685, + 42.517857142857146, 49.646153846153844, 46.81010101010101, + 59.42253521126761, 46.796875, 44.83367983367983, + 51.95217391304348, 43.97674418604651, 43.55393586005831, + 41.57654723127036, 52.48717948717949, 51.733333333333334, + + 8.166666666666668, 9.431372549019606, 5.22924901185771, + 0.46153846153846, 7.285714285714285, 5.08085106382979, + 5.282051282051285, 6.714285714285715, 4.25764192139738, + 4.73076923076923, 4.310344827586206, 4.96026490066225, + 4.16417910447761, 9.90909090909091, 5.77622377622377, + + 2.6, 51.0625, 50.93072289156626, + 7.148148148148145, 50.152542372881356, 51.13650793650794, + 2.388888888888886, 48.40350877192982, 49.00689655172414, + 3.90526315789474, 44.645161290322584, 48.41111111111111, + 5.4277108433735, 55.785714285714285, 54.17616580310881, + + 4.5, 42.93333333333333, 38.30827067669173, + 9.09090909090909, 43.333333333333336, 38.09448818897638, + 1.18181818181818, 40.857142857142854, 37.224, + 4.19444444444444, 39.7, 36.53846153846154, + 5.960396039603964, 46.375, 43.53658536585366, + + {"?": -8}, {"?": -8}, 7.75, + {"?": -8}, {"?": -8}, 7.75, + {"?": -8}, {"?": -8}, 7.75, + 5.0, {"?": -8}, 7.5, + 7.5, {"?": -8}, 8.0, + + {"?": -8}, {"?": -8}, {"?": -8}, + {"?": -8}, {"?": -8}, {"?": -8}, + {"?": -8}, {"?": -8}, {"?": -8}, + {"?": -8}, {"?": -8}, {"?": -8}, + {"?": -8}, {"?": -8}, {"?": -8}, + + {"?": -8}, {"?": -8}, {"?": -8}, + {"?": -8}, {"?": -8}, {"?": -8}, + {"?": -8}, {"?": -8}, {"?": -8}, + {"?": -8}, {"?": -8}, {"?": -8}, + {"?": -8}, {"?": -8}, {"?": -8} + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": true, + "missing_reasons": { + "NaN": -8, + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 4 + } + }, + "missing": 4, + "n": 1662, + "unfiltered": { + "unweighted_n": 1662, + "weighted_n": 1662 + } + } +} diff --git a/tests/fixtures/median-mr-x-cat.json b/tests/fixtures/median-mr-x-cat.json new file mode 100644 index 000000000..2cde59edd --- /dev/null +++ b/tests/fixtures/median-mr-x-cat.json @@ -0,0 +1,389 @@ +{ + "query": { + "measures": { + "valid_count_unweighted": { + "function": "cube_valid_count", + "args": [ + { + "variable": "4d2dbd48338b45e3abb7f0f64975f0c9" + } + ] + }, + "median": { + "function": "cube_quantile", + "args": [ + { + "variable": "4d2dbd48338b45e3abb7f0f64975f0c9" + }, + { + "value": 0.5 + } + ] + } + }, + "dimensions": [ + { + "function": "dimension", + "args": [ + { + "function": "as_selected", + "args": [ + { + "variable": "0d85b19293704f48bf991358f1fb3a7f" + } + ] + }, + { + "value": "subvariables" + } + ] + }, + { + "function": "as_selected", + "args": [ + { + "variable": "0d85b19293704f48bf991358f1fb3a7f" + } + ] + }, + { + "variable": "6zVdSaG9ijnwgEIxTO6lo1000006" + } + ], + "weight": null + }, + "query_environment": { + "filter": [] + }, + "result": { + "dimensions": [ + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "X", + "name": "X", + "description": null + }, + { + "alias": "Y", + "name": "Y", + "description": null + }, + { + "alias": "Z", + "name": "Z", + "description": null + } + ], + "uniform_basis": false, + "name": "M", + "alias": "M" + }, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "references": { + "alias": "X", + "name": "X", + "description": null + }, + "derived": false, + "id": "6zVdSaG9ijnwgEIxTO6lo1000000" + }, + "missing": false + }, + { + "id": 2, + "value": { + "references": { + "alias": "Y", + "name": "Y", + "description": null + }, + "derived": false, + "id": "6zVdSaG9ijnwgEIxTO6lo1000001" + }, + "missing": false + }, + { + "id": 3, + "value": { + "references": { + "alias": "Z", + "name": "Z", + "description": null + }, + "derived": false, + "id": "6zVdSaG9ijnwgEIxTO6lo1000002" + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "references": { + "subreferences": [ + { + "alias": "X", + "name": "X", + "description": null + }, + { + "alias": "Y", + "name": "Y", + "description": null + }, + { + "alias": "Z", + "name": "Z", + "description": null + } + ], + "uniform_basis": false, + "name": "M", + "alias": "M" + }, + "derived": true, + "type": { + "ordinal": false, + "subvariables": [ + "6zVdSaG9ijnwgEIxTO6lo1000000", + "6zVdSaG9ijnwgEIxTO6lo1000001", + "6zVdSaG9ijnwgEIxTO6lo1000002" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "name": "Selected", + "missing": false + }, + { + "numeric_value": 0, + "id": 0, + "name": "Other", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + }, + { + "references": { + "alias": "cat", + "name": "cat", + "description": null + }, + "derived": false, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": null, + "id": 1, + "name": "a", + "missing": false + }, + { + "numeric_value": null, + "id": 2, + "name": "b", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + } + ], + "missing": 1, + "measures": { + "valid_count_unweighted": { + "data": [ + 2, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 0 + ], + "n_missing": 1, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "sum" + } + }, + "type": { + "integer": false, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + }, + "median": { + "data": [ + 3.5123, + 0.1523, + 0.1263, + 0.7123, + 2.3123, + 0.3123, + 0.1123, + 3.4123, + 0.5123, + 2.9123, + 0.1123, + 0.1123, + 1.1123, + 2.3123, + 0.4123, + 0.95123, + 3.9123, + 0.8123, + 2.7123, + 0.6123, + 0.5123, + 1.4123, + 2.3123, + 0.2123, + 0.1123, + 3.1123, + 0.0123 + ], + "n_missing": 1, + "metadata": { + "derived": true, + "references": { + "alias": "Movies", + "name": "Movies", + "view": { + "summary_statistic": "median" + } + }, + "type": { + "integer": null, + "class": "numeric", + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + } + } + } + } + }, + "n": 4, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 4, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 4, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 4, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 4, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "filtered": { + "unweighted_n": 4, + "weighted_n": 4 + }, + "counts": [ + 2, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 1, + 0 + ], + "element": "crunch:cube" + } +} diff --git a/tests/fixtures/mr-x-mr-median.json b/tests/fixtures/mr-x-mr-median.json new file mode 100644 index 000000000..bb60a5303 --- /dev/null +++ b/tests/fixtures/mr-x-mr-median.json @@ -0,0 +1,798 @@ +{ + "query": { + "measures": { + "count": { + "function": "cube_count", + "args": [] + }, + "median": { + "function": "cube_quantile", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM000002/" + }, + { + "value": 0.5 + } + ] + } + }, + "dimensions": [ + { + "function": "dimension", + "args": [ + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + }, + { + "value": "subvariables" + } + ] + }, + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + }, + { + "function": "dimension", + "args": [ + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + }, + { + "value": "subvariables" + } + ] + }, + { + "function": "as_selected", + "args": [ + { + "variable": "https://alpha.crunch.io/api/datasets/582e54b9691b4f1893f2830a076f2adc/variables/4mSyJJJr7OiwwqO2VPuXfM00000d/" + } + ] + } + ] + }, + "result": { + "dimensions": [ + { + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "description": "My multiple response set", + "name": "mymrset", + "alias": "mymrset" + }, + "derived": true, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + "id": "0004" + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + "id": "0005" + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + }, + "id": "0006" + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "alias": "mymrset", + "description": "My multiple response set", + "name": "mymrset" + }, + "type": { + "ordinal": false, + "subvariables": [ + "0004", + "0005", + "0006" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + }, + { + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "description": "My multiple response set", + "name": "mymrset", + "alias": "mymrset" + }, + "derived": true, + "type": { + "subtype": { + "class": "variable" + }, + "elements": [ + { + "id": 1, + "value": { + "derived": false, + "references": { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + "id": "0004" + }, + "missing": false + }, + { + "id": 2, + "value": { + "derived": false, + "references": { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + "id": "0005" + }, + "missing": false + }, + { + "id": 3, + "value": { + "derived": false, + "references": { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + }, + "id": "0006" + }, + "missing": false + } + ], + "class": "enum" + } + }, + { + "derived": true, + "references": { + "subreferences": [ + { + "alias": "bool1", + "name": "Response #1", + "description": "bool1" + }, + { + "alias": "bool2", + "name": "Response #2", + "description": "bool2" + }, + { + "alias": "bool3", + "name": "Response #3", + "description": "bool3" + } + ], + "uniform_basis": false, + "alias": "mymrset", + "description": "My multiple response set", + "name": "mymrset" + }, + "type": { + "ordinal": false, + "subvariables": [ + "0004", + "0005", + "0006" + ], + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "selected": true, + "id": 1, + "missing": false, + "name": "Selected" + }, + { + "numeric_value": 0, + "missing": false, + "id": 0, + "name": "Other" + }, + { + "numeric_value": null, + "missing": true, + "id": -1, + "name": "No Data" + } + ] + } + } + ], + "missing": 0, + "measures": { + "count": { + "data": [ + 3, + 0, + 0, + 2, + 1, + 0, + 0, + 3, + 0, + 0, + 2, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 1, + 0, + 2, + 1, + 1, + 4, + 0, + 0, + 0, + 4, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 2, + 1, + 4, + 2, + 0, + 0, + 6, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0, + "metadata": { + "references": {}, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1 + }, + "class": "numeric" + } + } + }, + "median": { + "data": [ + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 2.234, + { + "?": -8 + }, + 1.334, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 4.234, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 3.14159, + 3.14159, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 3.14159, + { + "?": -8 + }, + { + "?": -8 + }, + 8.234, + 3.14159, + 2.187795, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 0.187795, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 1.234, + 3.14159, + 0.187795, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + 2.187795, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + }, + { + "?": -8 + } + ], + "n_missing": 4, + "metadata": { + "references": { + "alias": "z", + "format": { + "data": { + "digits": 2 + }, + "summary": { + "digits": 2 + } + }, + "description": "Numeric variable with missing value range", + "name": "z" + }, + "derived": true, + "type": { + "integer": true, + "missing_rules": {}, + "missing_reasons": { + "No Data": -1, + "NaN": -8 + }, + "class": "numeric" + } + } + } + }, + "n": 6, + "filter_stats": { + "filtered_complete": { + "unweighted": { + "selected": 6, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 6, + "other": 0, + "missing": 0 + } + }, + "filtered": { + "unweighted": { + "selected": 6, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 6, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 6, + "weighted_n": 6 + }, + "filtered": { + "unweighted_n": 6, + "weighted_n": 6 + }, + "counts": [ + 3, + 0, + 0, + 2, + 1, + 0, + 0, + 3, + 0, + 0, + 2, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 1, + 0, + 2, + 1, + 1, + 4, + 0, + 0, + 0, + 4, + 0, + 1, + 1, + 0, + 0, + 2, + 0, + 0, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 2, + 1, + 4, + 2, + 0, + 0, + 6, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "element": "crunch:cube" + } +} diff --git a/tests/integration/test_cube.py b/tests/integration/test_cube.py index 62cb218d4..2efc8dc37 100644 --- a/tests/integration/test_cube.py +++ b/tests/integration/test_cube.py @@ -272,6 +272,15 @@ def it_provides_the_means_missing_count_when_means_are_available(self): missing_count = measures.missing_count assert missing_count == 3 + def it_provides_the_median_missing_count_when_median_is_available(self): + cube_dict = CR.MEDIAN_CAT_X_CAT_HS + measures = _Measures( + cube_dict, + Dimensions.from_dicts(cube_dict["result"]["dimensions"]), + ) + missing_count = measures.missing_count + assert missing_count == 0 + def it_provides_the_means_missing_count_when_sum_are_available(self): cube_dict = CR.SUM_CAT_X_MR measures = _Measures( diff --git a/tests/integration/test_matrix.py b/tests/integration/test_matrix.py index 180982dc8..dc642ea12 100644 --- a/tests/integration/test_matrix.py +++ b/tests/integration/test_matrix.py @@ -1268,6 +1268,36 @@ def it_computes_means_cat_x_mr(self): ), ) + def it_computes_median_cat_x_mr(self): + slice_ = Cube(CR.MEDIAN_CAT_X_MR).partitions[0] + np.testing.assert_almost_equal( + slice_.median, + np.array( + [ + [19.0, 30.71428571, 18.0, 49.76923077, 37.2962963], + [35.57142857, 45.0, 61.23809524, 48.0, 45.74137931], + [32.0, 42.51785714, 59.42253521, 51.95217391, 41.57654723], + [8.16666667, 0.46153846, 5.28205128, 4.73076923, 4.1641791], + [2.6, 7.14814815, 2.38888889, 3.90526316, 5.42771084], + [4.5, 9.09090909, 1.18181818, 4.19444444, 5.96039604], + ] + ), + ) + + def it_computes_median_for_mr_x_mr(self): + slice_ = Cube(CR.MR_X_MR_MEDIAN).partitions[0] + + assert slice_.median == pytest.approx( + np.array( + [ + [np.nan, np.nan, np.nan], + [np.nan, 2.187795, np.nan], + [np.nan, np.nan, np.nan], + ] + ), + nan_ok=True, + ) + def it_computes_means_mr_x_cat(self): slice_ = Cube(CR.MEANS_MR_X_CAT).partitions[0] np.testing.assert_almost_equal( @@ -1506,6 +1536,13 @@ def it_computes_sum_mr_x_cat(self): np.array([[3.0, 0.0], [2.0, 0.0], [2.0, 0.0]]) ) + def it_computes_median_mr_x_cat(self): + slice_ = Cube(CR.MEDIAN_MR_X_CAT).partitions[0] + + assert slice_.median == pytest.approx( + np.array([[3.5123, 0.1523], [2.9123, 0.1123], [2.7123, 0.6123]]) + ) + class DescribeCubemeasure: def it_provides_overlaps_for_cat_x_mr_sub_x_mr_sel(self): From 80146e5ac3b5926f523da7f672e2a194d49a747e Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 17:02:12 +0200 Subject: [PATCH 07/12] unit tests for 1d objs --- tests/unit/stripe/test_cubemeasure.py | 89 +++++++++++++++++++++++++++ tests/unit/stripe/test_measure.py | 30 +++++++++ 2 files changed, 119 insertions(+) diff --git a/tests/unit/stripe/test_cubemeasure.py b/tests/unit/stripe/test_cubemeasure.py index 60a9fe727..e30ad23f4 100644 --- a/tests/unit/stripe/test_cubemeasure.py +++ b/tests/unit/stripe/test_cubemeasure.py @@ -10,11 +10,14 @@ from cr.cube.enums import DIMENSION_TYPE as DT from cr.cube.stripe.cubemeasure import ( _BaseCubeMeans, + _BaseCubeMedian, _BaseCubeSums, _BaseCubeCounts, _CatCubeCounts, _CatCubeMeans, + _CatCubeMedian, _CatCubeSums, + _MrCubeMedian, CubeMeasures, _MrCubeCounts, _MrCubeMeans, @@ -43,6 +46,21 @@ def it_provides_access_to_the_cube_means_object( _BaseCubeMeans_.factory.assert_called_once_with(cube_, rows_dimension_) assert cube_means is cube_means_ + def it_provides_access_to_the_cube_median_object( + self, request, cube_, rows_dimension_ + ): + cube_medians_ = instance_mock(request, _BaseCubeMedian) + _BaseCubeMedian_ = class_mock( + request, "cr.cube.stripe.cubemeasure._BaseCubeMedian" + ) + _BaseCubeMedian_.factory.return_value = cube_medians_ + cube_measures = CubeMeasures(cube_, rows_dimension_, None, None) + + cube_medians = cube_measures.cube_median + + _BaseCubeMedian_.factory.assert_called_once_with(cube_, rows_dimension_) + assert cube_medians is cube_medians_ + def it_provides_access_to_the_cube_sum_object( self, request, cube_, rows_dimension_ ): @@ -349,6 +367,77 @@ def but_it_raises_value_error_when_the_cube_result_does_not_contain_means( assert str(e.value) == "cube-result does not contain cube-means measure" +# === MEDIANS === + + +class Describe_BaseCubeMedian: + """Unit test suite for `cr.cube.matrix.cubemeasure._BaseCubeMedian`.""" + + @pytest.mark.parametrize( + "rows_dimension_type, CubeMedianCls, medians", + ( + (DT.CAT, _CatCubeMedian, [1, 2, 3]), + (DT.MR, _MrCubeMedian, [[1, 6], [2, 5], [3, 4]]), + ), + ) + def it_provides_a_factory_for_constructing_cube_medians_objects( + self, request, rows_dimension_type, CubeMedianCls, medians + ): + cube_ = instance_mock(request, Cube, median=medians) + rows_dimension_ = instance_mock( + request, Dimension, dimension_type=rows_dimension_type + ) + cube_medians_ = instance_mock(request, CubeMedianCls) + CubeMedianCls_ = class_mock( + request, + "cr.cube.stripe.cubemeasure.%s" % CubeMedianCls.__name__, + return_value=cube_medians_, + ) + + cube_medians = _BaseCubeMedian.factory(cube_, rows_dimension_) + + CubeMedianCls_.assert_called_once_with(rows_dimension_, medians) + assert cube_medians is cube_medians_ + + +class Describe_CatCubeMedians: + """Unit-test suite for `cr.cube.stripe.cubemeasure._CatCubeMedians`.""" + + def it_knows_its_medians(self): + cube_medians = _CatCubeMedian(None, np.array([1.1, 2.2, 3.3])) + assert cube_medians.median == pytest.approx([1.1, 2.2, 3.3]) + + def but_it_raises_value_error_when_the_cube_result_does_not_contain_medians( + self, request + ): + cube_ = instance_mock(request, Cube) + cube_.median = None + with pytest.raises(ValueError) as e: + _CatCubeMedian(None, None).factory(cube_, None) + + assert str(e.value) == "cube-result does not contain cube-median measure" + + +class Describe_MrCubeMedians: + """Unit-test suite for `cr.cube.stripe.cubemeasure._MrCubeMedians`.""" + + def it_knows_its_medians(self): + cube_medians = _MrCubeMedian( + None, np.array([[1.1, 2.2], [3.3, 4.4], [5.5, 6.6]]) + ) + assert cube_medians.median == pytest.approx([1.1, 3.3, 5.5]) + + def but_it_raises_value_error_when_the_cube_result_does_not_contain_medians( + self, request + ): + cube_ = instance_mock(request, Cube) + cube_.median = None + with pytest.raises(ValueError) as e: + _CatCubeMedian(None, None).factory(cube_, None) + + assert str(e.value) == "cube-result does not contain cube-median measure" + + # === SUM === diff --git a/tests/unit/stripe/test_measure.py b/tests/unit/stripe/test_measure.py index d68ae97f4..98ee75a99 100644 --- a/tests/unit/stripe/test_measure.py +++ b/tests/unit/stripe/test_measure.py @@ -11,6 +11,7 @@ from cr.cube.stripe.cubemeasure import ( _BaseCubeMeans, _BaseCubeCounts, + _BaseCubeMedian, _CatCubeCounts, CubeMeasures, ) @@ -19,6 +20,7 @@ _BaseSecondOrderMeasure, _Means, _MeansSmoothed, + _Median, _PopulationProportions, _PopulationProportionStderrs, _ScaledCounts, @@ -43,6 +45,7 @@ class DescribeStripeMeasures: "measure_prop_name, MeasureCls", ( ("means", _Means), + ("median", _Median), ("population_proportions", _PopulationProportions), ("population_proportion_stderrs", _PopulationProportionStderrs), ("scaled_counts", _ScaledCounts), @@ -194,6 +197,33 @@ def it_computes_its_subtotal_values_to_help(self, request): assert subtotal_values == pytest.approx([np.nan, np.nan], nan_ok=True) +class Describe_Median: + """Unit test suite for `cr.cube.stripe.measure._Median` object.""" + + def it_computes_its_base_values_to_help(self, request): + cube_medians_ = instance_mock( + request, _BaseCubeMedian, median=np.array([1.1, 2.2, 3.3]) + ) + cube_measures_ = instance_mock(request, CubeMeasures, cube_median=cube_medians_) + median = _Median(None, None, cube_measures_) + + assert median.base_values == pytest.approx([1.1, 2.2, 3.3]) + + def it_computes_its_subtotal_values_to_help(self, request): + property_mock(request, _Median, "base_values", return_value=[1.1, 2.2, 3.3]) + rows_dimension_ = instance_mock(request, Dimension) + NanSubtotals_ = class_mock(request, "cr.cube.stripe.measure.NanSubtotals") + NanSubtotals_.subtotal_values.return_value = np.array([np.nan, np.nan]) + medians = _Median(rows_dimension_, None, None) + + subtotal_values = medians.subtotal_values + + NanSubtotals_.subtotal_values.assert_called_once_with( + [1.1, 2.2, 3.3], rows_dimension_ + ) + assert subtotal_values == pytest.approx([np.nan, np.nan], nan_ok=True) + + class Describe_PopulationProportions: """Unit test suite for `cr.cube.stripe.measure._PopulationProportions` object.""" From cfc275ec2f0127178b743ffecd95a7941c7158b0 Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 17:19:36 +0200 Subject: [PATCH 08/12] make measure name plural --- src/cr/cube/cube.py | 28 ++++----- src/cr/cube/cubepart.py | 18 +++--- src/cr/cube/matrix/cubemeasure.py | 76 +++++++++++++------------ src/cr/cube/matrix/measure.py | 12 ++-- src/cr/cube/stripe/cubemeasure.py | 54 +++++++++--------- src/cr/cube/stripe/measure.py | 18 +++--- tests/integration/test_cube.py | 2 +- tests/integration/test_cubepart.py | 14 ++--- tests/integration/test_matrix.py | 6 +- tests/integration/test_numeric_array.py | 4 +- tests/unit/stripe/test_cubemeasure.py | 36 ++++++------ tests/unit/stripe/test_measure.py | 20 ++++--- 12 files changed, 148 insertions(+), 140 deletions(-) diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index d73947697..167737840 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -428,11 +428,13 @@ def means(self) -> Optional[np.ndarray]: return self._measures.means.raw_cube_array[self._valid_idxs].astype(np.float64) @lazyproperty - def median(self) -> Optional[np.ndarray]: - """Optional float64 ndarray of the cube_median if the measure exists.""" - if self._measures.median is None: + def medians(self) -> Optional[np.ndarray]: + """Optional float64 ndarray of the cube_medians if the measure exists.""" + if self._measures.medians is None: return None - return self._measures.median.raw_cube_array[self._valid_idxs].astype(np.float64) + return self._measures.medians.raw_cube_array[self._valid_idxs].astype( + np.float64 + ) @lazyproperty def missing(self) -> int: @@ -789,12 +791,12 @@ def means(self) -> 'Optional["_MeanMeasure"]': return None if mean.raw_cube_array is None else mean @lazyproperty - def median(self) -> 'Optional["_MedianMeasure"]': + def medians(self) -> 'Optional["_MediansMeasure"]': """Optional _MedianMeasure object providing access to means values.""" - median = _MedianMeasure( + medians = _MediansMeasure( self._cube_dict, self._all_dimensions, self._cube_idx_arg ) - return None if median.raw_cube_array is None else median + return None if medians.raw_cube_array is None else medians @lazyproperty def missing_count(self) -> int: @@ -805,10 +807,10 @@ def missing_count(self) -> int: # fixtures that don't have valid_counts. if self.means is not None: return self.means.missing_count - # The check on the median measure is needed for retro-compatibility with the old + # The check on the medians measure is needed for retro-compatibility with the old # fixtures that don't have valid_counts. - if self.median is not None: - return self.median.missing_count + if self.medians is not None: + return self.medians.missing_count return self._cube_dict["result"].get("missing", 0) @lazyproperty @@ -1066,8 +1068,8 @@ def _flat_values(self) -> Optional[np.ndarray]: ).flatten() -class _MedianMeasure(_BaseMeasure): - """Statistical median values from a cube-response.""" +class _MediansMeasure(_BaseMeasure): + """Statistical medians values from a cube-response.""" @lazyproperty def missing_count(self) -> int: @@ -1078,7 +1080,7 @@ def missing_count(self) -> int: def _flat_values(self) -> Optional[np.ndarray]: """Optional 1D np.ndarray of np.float64 median values as found in cube response. - Median data may include missing items represented by a dict like + Medians data may include missing items represented by a dict like {'?': -1} in the cube response. These are replaced by np.nan in the returned value. """ diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index f3a764c82..d85337cf0 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -712,19 +712,19 @@ def means(self): ) @lazyproperty - def median(self): - """2D optional np.float64 ndarray of median value for each table cell. + def medians(self): + """2D optional np.float64 ndarray of median values for each table cell. Cell value is `np.nan` for each cell corresponding to an inserted subtotal - (median of addend cells cannot simply be added to get the mean of the subtotal). + (medians of addend cells cannot simply be added to get the mean of the subtotal). Raises `ValueError` if the cube-result does not include a median cube-measure. """ try: - return self._assemble_matrix(self._measures.median.blocks) + return self._assemble_matrix(self._measures.medians.blocks) except ValueError: raise ValueError( - "`.median` is undefined for a cube-result without a median measure" + "`.medians` is undefined for a cube-result without a median measure" ) @lazyproperty @@ -2022,17 +2022,17 @@ def means(self): ) @lazyproperty - def median(self): - """1D np.float64 ndarray of median for each row of strand. + def medians(self): + """1D np.float64 ndarray of medians for each row of strand. Raises ValueError when accessed on a cube-result that does not contain a median cube-measure. """ try: - return self._assemble_vector(self._measures.median.blocks) + return self._assemble_vector(self._measures.medians.blocks) except ValueError: raise ValueError( - "`.median` is undefined for a cube-result without a median measure" + "`.medians` is undefined for a cube-result without a median measure" ) @lazyproperty diff --git a/src/cr/cube/matrix/cubemeasure.py b/src/cr/cube/matrix/cubemeasure.py index 2da54e67b..71ba66e26 100644 --- a/src/cr/cube/matrix/cubemeasure.py +++ b/src/cr/cube/matrix/cubemeasure.py @@ -28,7 +28,7 @@ def cube_means(self): @lazyproperty def cube_median(self): """_BaseCubeMedian subclass object for this cube-result.""" - return _BaseCubeMedian.factory(self._cube, self._dimensions, self._slice_idx) + return _BaseCubeMedians.factory(self._cube, self._dimensions, self._slice_idx) @lazyproperty def cube_overlaps(self): @@ -139,7 +139,9 @@ def factory(cls, counts, diff_nans, cube, dimensions, slice_idx): ( "MR" if dim_type == DT.MR - else "ARR" if dim_type in DT.ARRAY_TYPES else "CAT" + else "ARR" + if dim_type in DT.ARRAY_TYPES + else "CAT" ) for dim_type in cube.dimension_types[-2:] ) @@ -882,15 +884,15 @@ def means(self): return self._means[:, 0, :, 0] -# === MEDIAN === +# === MEDIANs === -class _BaseCubeMedian(_BaseCubeMeasure): - """Base class for median cube-measure variants.""" +class _BaseCubeMedians(_BaseCubeMeasure): + """Base class for medians cube-measure variants.""" - def __init__(self, dimensions, median): - super(_BaseCubeMedian, self).__init__(dimensions) - self._median = median + def __init__(self, dimensions, medians): + super(_BaseCubeMedians, self).__init__(dimensions) + self._medians = medians @classmethod def factory(cls, cube, dimensions, slice_idx): @@ -898,74 +900,74 @@ def factory(cls, cube, dimensions, slice_idx): Raises `ValueError` if the cube-result does not include a cube-median measure. """ - if cube.median is None: + if cube.medians is None: raise ValueError("cube-result does not contain cube-median measure") dimension_types = cube.dimension_types[-2:] CubeMedianCls = ( - _MrXMrCubeMedian + _MrXMrCubeMedians if dimension_types == (DT.MR, DT.MR) else ( - _MrXCatCubeMedian + _MrXCatCubeMedians if dimension_types[0] == DT.MR else ( - _CatXMrCubeMedian + _CatXMrCubeMedians if dimension_types[1] == DT.MR - else _CatXCatCubeMedian + else _CatXCatCubeMedians ) ) ) return CubeMedianCls( - dimensions, cube.median[cls._slice_idx_expr(cube, slice_idx)] + dimensions, cube.medians[cls._slice_idx_expr(cube, slice_idx)] ) @lazyproperty - def median(self): - """2D np.float64 ndarray of cube median.""" + def medians(self): + """2D np.float64 ndarray of cube medians.""" raise NotImplementedError( # pragma: no cover - f"`{type(self).__name__}` must implement `.median`" + f"`{type(self).__name__}` must implement `.medians`" ) -class _CatXCatCubeMedian(_BaseCubeMedian): - """Median cube-measure for a slice with no MR dimensions.""" +class _CatXCatCubeMedians(_BaseCubeMedians): + """Medians cube-measure for a slice with no MR dimensions.""" @lazyproperty - def median(self): - """2D np.float64 ndarray of median for each valid matrix cell.""" - return self._median + def medians(self): + """2D np.float64 ndarray of medians for each valid matrix cell.""" + return self._medians -class _CatXMrCubeMedian(_BaseCubeMedian): - """Median cube-measure for a NOT_MR_X_MR slice. +class _CatXMrCubeMedians(_BaseCubeMedians): + """Medians cube-measure for a NOT_MR_X_MR slice. Note that the rows-dimensions need not actually be CAT. """ @lazyproperty - def median(self): - """2D np.float64 ndarray of median for each valid matrix cell.""" - return self._median[:, :, 0] + def medians(self): + """2D np.float64 ndarray of medians for each valid matrix cell.""" + return self._medians[:, :, 0] -class _MrXCatCubeMedian(_BaseCubeMedian): - """Median cube-measure for an MR_X_NOT_MR slice. +class _MrXCatCubeMedians(_BaseCubeMedians): + """Medians cube-measure for an MR_X_NOT_MR slice. Note that the columns-dimension need not actually be CAT. """ @lazyproperty - def median(self): - """2D np.float64 ndarray of median for each valid matrix cell.""" - return self._median[:, 0, :] + def medians(self): + """2D np.float64 ndarray of medians for each valid matrix cell.""" + return self._medians[:, 0, :] -class _MrXMrCubeMedian(_BaseCubeMedian): - """Median cube-measure for an MR_X_MR slice.""" +class _MrXMrCubeMedians(_BaseCubeMedians): + """Medians cube-measure for an MR_X_MR slice.""" @lazyproperty - def median(self): - """2D np.float64 ndarray of median for each valid matrix cell.""" - return self._median[:, 0, :, 0] + def medians(self): + """2D np.float64 ndarray of medians for each valid matrix cell.""" + return self._medians[:, 0, :, 0] # === OVERLAPS === diff --git a/src/cr/cube/matrix/measure.py b/src/cr/cube/matrix/measure.py index 6506ceb26..708a87c69 100644 --- a/src/cr/cube/matrix/measure.py +++ b/src/cr/cube/matrix/measure.py @@ -207,9 +207,9 @@ def means(self): return _Means(self._dimensions, self, self._cube_measures) @lazyproperty - def median(self): - """_Median measure object for this cube-result""" - return _Median(self._dimensions, self, self._cube_measures) + def medians(self): + """_Medians measure object for this cube-result""" + return _Medians(self._dimensions, self, self._cube_measures) def pairwise_p_vals_for_subvar(self, subvar_idx): """_PairwiseSigPValsForSubvar measure object for this cube-result""" @@ -1182,14 +1182,14 @@ def blocks(self): ) -class _Median(_BaseSecondOrderMeasure): - """Provides the median measure for a matrix.""" +class _Medians(_BaseSecondOrderMeasure): + """Provides the medians measure for a matrix.""" @lazyproperty def blocks(self): """2D array of the four 2D "blocks" making up this measure.""" return NanSubtotals.blocks( - self._cube_measures.cube_median.median, self._dimensions + self._cube_measures.cube_median.medians, self._dimensions ) diff --git a/src/cr/cube/stripe/cubemeasure.py b/src/cr/cube/stripe/cubemeasure.py index 8c42fd60e..16ae3ebe9 100644 --- a/src/cr/cube/stripe/cubemeasure.py +++ b/src/cr/cube/stripe/cubemeasure.py @@ -27,9 +27,9 @@ def cube_means(self): return _BaseCubeMeans.factory(self._cube, self._rows_dimension) @lazyproperty - def cube_median(self): - """_BaseCubeMedian subclass object for this stripe.""" - return _BaseCubeMedian.factory(self._cube, self._rows_dimension) + def cube_medians(self): + """_BaseCubeMedians subclass object for this stripe.""" + return _BaseCubeMedians.factory(self._cube, self._rows_dimension) @lazyproperty def cube_stddev(self): @@ -258,53 +258,55 @@ def means(self): return self._means[:, 0] -# === MEDIAN === +# === MEDIANs === -class _BaseCubeMedian(_BaseCubeMeasure): - """Base class for median cube-measure variants.""" +class _BaseCubeMedians(_BaseCubeMeasure): + """Base class for medians cube-measure variants.""" - def __init__(self, rows_dimension, median): - super(_BaseCubeMedian, self).__init__(rows_dimension) - self._median = median + def __init__(self, rows_dimension, medians): + super(_BaseCubeMedians, self).__init__(rows_dimension) + self._medians = medians @classmethod def factory(cls, cube, rows_dimension): """Return _BaseCubeMedian subclass instance appropriate to `cube`.""" - if cube.median is None: + if cube.medians is None: raise ValueError("cube-result does not contain cube-median measure") MedianCls = ( - _MrCubeMedian if rows_dimension.dimension_type == DT.MR else _CatCubeMedian + _MrCubeMedians + if rows_dimension.dimension_type == DT.MR + else _CatCubeMedians ) - return MedianCls(rows_dimension, cube.median) + return MedianCls(rows_dimension, cube.medians) @lazyproperty - def median(self): - """1D np.float64 ndarray of median for each stripe row.""" + def medians(self): + """1D np.float64 ndarray of medians for each stripe row.""" raise NotImplementedError( - f"`{type(self).__name__}` must implement `.median`" + f"`{type(self).__name__}` must implement `.medians`" ) # pragma: no cover -class _CatCubeMedian(_BaseCubeMedian): - """Median cube-measure for a non-MR stripe.""" +class _CatCubeMedians(_BaseCubeMedians): + """Medians cube-measure for a non-MR stripe.""" @lazyproperty - def median(self): - """1D np.float64 ndarray of median for each stripe row.""" - return self._median + def medians(self): + """1D np.float64 ndarray of medians for each stripe row.""" + return self._medians -class _MrCubeMedian(_BaseCubeMedian): - """Median cube-measure for an MR stripe. +class _MrCubeMedians(_BaseCubeMedians): + """Medians cube-measure for an MR stripe. - Its `.median` is a 2D ndarray with axes (rows, sel/not). + Its `.medians` is a 2D ndarray with axes (rows, sel/not). """ @lazyproperty - def median(self): - """1D np.float64 ndarray of median for each stripe row.""" - return self._median[:, 0] + def medians(self): + """1D np.float64 ndarray of medians for each stripe row.""" + return self._medians[:, 0] # === STD DEV === diff --git a/src/cr/cube/stripe/measure.py b/src/cr/cube/stripe/measure.py index 3148ed7b5..2e4e9a6e0 100644 --- a/src/cr/cube/stripe/measure.py +++ b/src/cr/cube/stripe/measure.py @@ -39,9 +39,9 @@ def means(self): return _Means(self._rows_dimension, self, self._cube_measures) @lazyproperty - def median(self): - """_Median measure object for this stripe.""" - return _Median(self._rows_dimension, self, self._cube_measures) + def medians(self): + """_Medians measure object for this stripe.""" + return _Medians(self._rows_dimension, self, self._cube_measures) @lazyproperty def population_proportions(self): @@ -236,22 +236,22 @@ def subtotal_values(self): return NanSubtotals.subtotal_values(self.base_values, self._rows_dimension) -class _Median(_BaseSecondOrderMeasure): - """Provides the median measure for a stripe. +class _Medians(_BaseSecondOrderMeasure): + """Provides the medians measure for a stripe. - Relies on the presence of a median cube-measure in the cube-result. + Relies on the presence of a medians cube-measure in the cube-result. """ @lazyproperty def base_values(self): - """1D np.float64 ndarray of median for each row.""" - return self._cube_measures.cube_median.median + """1D np.float64 ndarray of medians for each row.""" + return self._cube_measures.cube_medians.medians @lazyproperty def subtotal_values(self): """1D ndarray of np.nan for each row-subtotal. - Median values cannot be subtotaled and each subtotal value is unconditionally + Medians values cannot be subtotaled and each subtotal value is unconditionally np.nan. """ return NanSubtotals.subtotal_values(self.base_values, self._rows_dimension) diff --git a/tests/integration/test_cube.py b/tests/integration/test_cube.py index 2efc8dc37..b36899e87 100644 --- a/tests/integration/test_cube.py +++ b/tests/integration/test_cube.py @@ -158,7 +158,7 @@ def it_provides_multiple_measures_for_NUM_ARRAY_GROUPED_BY_CAT(self): ] ) ) - assert cube.median == pytest.approx( + assert cube.medians == pytest.approx( np.array( [ [71.0, 42.5, 55.33333333, 1.33333333], diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index 83dd7e251..8426ea809 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -58,10 +58,10 @@ def it_provides_values_for_cat_x_cat(self): == "`.means` is undefined for a cube-result without a mean measure" ) with pytest.raises(ValueError) as e: - slice_.median + slice_.medians assert ( str(e.value) - == "`.median` is undefined for a cube-result without a median measure" + == "`.medians` is undefined for a cube-result without a median measure" ) with pytest.raises(ValueError) as e: slice_.pairwise_means_indices @@ -454,7 +454,7 @@ def it_provides_values_for_median_cat_x_cat_hs(self): # both measures are available at cubepart level. assert slice_.columns_margin.tolist() == [189, 395, 584, 606, 310] assert slice_.counts == pytest.approx(np.array([[189, 395, 584, 606, 310]])) - assert slice_.median == pytest.approx( + assert slice_.medians == pytest.approx( np.array([[14.4393575, 37.3212274, np.nan, 25.4857195, 23.0242765]]), nan_ok=True, ) @@ -2010,9 +2010,9 @@ def it_provides_values_for_univariate_cat(self): "`.sums` is undefined for a cube-result without a sum measure" ) with pytest.raises(ValueError) as e: - strand.median + strand.medians assert str(e.value) == ( - "`.median` is undefined for a cube-result without a median measure" + "`.medians` is undefined for a cube-result without a median measure" ) with pytest.raises(ValueError) as e: strand.stddev @@ -2323,13 +2323,13 @@ def it_provides_sum_measure_for_CAT(self): def it_provides_median_measure_for_CAT(self): strand = Cube(CR.CAT_MEDIAN).partitions[0] - assert strand.median == pytest.approx([8.8, 7.445]) + assert strand.medians == pytest.approx([8.8, 7.445]) assert strand.table_base_range.tolist() == [5, 5] def it_provides_median_measure_for_MR(self): strand = Cube(CR.MR_MEDIAN).partitions[0] - assert strand.median == pytest.approx([2.22398, 0.23444, 7.23452]) + assert strand.medians == pytest.approx([2.22398, 0.23444, 7.23452]) assert strand.table_base_range.tolist() == [3, 3] def it_provides_sum_measure_for_CAT_HS(self): diff --git a/tests/integration/test_matrix.py b/tests/integration/test_matrix.py index dc642ea12..1fc2db503 100644 --- a/tests/integration/test_matrix.py +++ b/tests/integration/test_matrix.py @@ -1271,7 +1271,7 @@ def it_computes_means_cat_x_mr(self): def it_computes_median_cat_x_mr(self): slice_ = Cube(CR.MEDIAN_CAT_X_MR).partitions[0] np.testing.assert_almost_equal( - slice_.median, + slice_.medians, np.array( [ [19.0, 30.71428571, 18.0, 49.76923077, 37.2962963], @@ -1287,7 +1287,7 @@ def it_computes_median_cat_x_mr(self): def it_computes_median_for_mr_x_mr(self): slice_ = Cube(CR.MR_X_MR_MEDIAN).partitions[0] - assert slice_.median == pytest.approx( + assert slice_.medians == pytest.approx( np.array( [ [np.nan, np.nan, np.nan], @@ -1539,7 +1539,7 @@ def it_computes_sum_mr_x_cat(self): def it_computes_median_mr_x_cat(self): slice_ = Cube(CR.MEDIAN_MR_X_CAT).partitions[0] - assert slice_.median == pytest.approx( + assert slice_.medians == pytest.approx( np.array([[3.5123, 0.1523], [2.9123, 0.1123], [2.7123, 0.6123]]) ) diff --git a/tests/integration/test_numeric_array.py b/tests/integration/test_numeric_array.py index 676130742..febb92ef9 100644 --- a/tests/integration/test_numeric_array.py +++ b/tests/integration/test_numeric_array.py @@ -190,7 +190,7 @@ def it_provides_median_for_numeric_array_with_no_grouping(self): """Test stddev on no-dimensions measure of numeric array.""" strand = Cube(NA.NUM_ARR_MEDIAN_NO_GROUPING).partitions[0] - assert strand.median == pytest.approx([3.5819889, 1.51188458, 0.12132034]) + assert strand.medians == pytest.approx([3.5819889, 1.51188458, 0.12132034]) assert strand.unweighted_counts.tolist() == [4, 3, 2] assert strand.unweighted_bases.tolist() == [4, 3, 2] assert strand.table_base_range.tolist() == [2, 4] @@ -216,7 +216,7 @@ def it_provides_stddev_for_num_array_grouped_by_cat(self): def it_provides_median_for_num_array_grouped_by_cat(self): slice_ = Cube(NA.NUM_ARR_MEDIAN_GROUPED_BY_CAT).partitions[0] - assert slice_.median == pytest.approx( + assert slice_.medians == pytest.approx( np.array( [ # --------Gender------------ diff --git a/tests/unit/stripe/test_cubemeasure.py b/tests/unit/stripe/test_cubemeasure.py index e30ad23f4..4354d42f8 100644 --- a/tests/unit/stripe/test_cubemeasure.py +++ b/tests/unit/stripe/test_cubemeasure.py @@ -10,14 +10,14 @@ from cr.cube.enums import DIMENSION_TYPE as DT from cr.cube.stripe.cubemeasure import ( _BaseCubeMeans, - _BaseCubeMedian, + _BaseCubeMedians, _BaseCubeSums, _BaseCubeCounts, _CatCubeCounts, _CatCubeMeans, - _CatCubeMedian, + _CatCubeMedians, _CatCubeSums, - _MrCubeMedian, + _MrCubeMedians, CubeMeasures, _MrCubeCounts, _MrCubeMeans, @@ -49,14 +49,14 @@ def it_provides_access_to_the_cube_means_object( def it_provides_access_to_the_cube_median_object( self, request, cube_, rows_dimension_ ): - cube_medians_ = instance_mock(request, _BaseCubeMedian) + cube_medians_ = instance_mock(request, _BaseCubeMedians) _BaseCubeMedian_ = class_mock( - request, "cr.cube.stripe.cubemeasure._BaseCubeMedian" + request, "cr.cube.stripe.cubemeasure._BaseCubeMedians" ) _BaseCubeMedian_.factory.return_value = cube_medians_ cube_measures = CubeMeasures(cube_, rows_dimension_, None, None) - cube_medians = cube_measures.cube_median + cube_medians = cube_measures.cube_medians _BaseCubeMedian_.factory.assert_called_once_with(cube_, rows_dimension_) assert cube_medians is cube_medians_ @@ -376,14 +376,14 @@ class Describe_BaseCubeMedian: @pytest.mark.parametrize( "rows_dimension_type, CubeMedianCls, medians", ( - (DT.CAT, _CatCubeMedian, [1, 2, 3]), - (DT.MR, _MrCubeMedian, [[1, 6], [2, 5], [3, 4]]), + (DT.CAT, _CatCubeMedians, [1, 2, 3]), + (DT.MR, _MrCubeMedians, [[1, 6], [2, 5], [3, 4]]), ), ) def it_provides_a_factory_for_constructing_cube_medians_objects( self, request, rows_dimension_type, CubeMedianCls, medians ): - cube_ = instance_mock(request, Cube, median=medians) + cube_ = instance_mock(request, Cube, medians=medians) rows_dimension_ = instance_mock( request, Dimension, dimension_type=rows_dimension_type ) @@ -394,7 +394,7 @@ def it_provides_a_factory_for_constructing_cube_medians_objects( return_value=cube_medians_, ) - cube_medians = _BaseCubeMedian.factory(cube_, rows_dimension_) + cube_medians = _BaseCubeMedians.factory(cube_, rows_dimension_) CubeMedianCls_.assert_called_once_with(rows_dimension_, medians) assert cube_medians is cube_medians_ @@ -404,16 +404,16 @@ class Describe_CatCubeMedians: """Unit-test suite for `cr.cube.stripe.cubemeasure._CatCubeMedians`.""" def it_knows_its_medians(self): - cube_medians = _CatCubeMedian(None, np.array([1.1, 2.2, 3.3])) - assert cube_medians.median == pytest.approx([1.1, 2.2, 3.3]) + cube_medians = _CatCubeMedians(None, np.array([1.1, 2.2, 3.3])) + assert cube_medians.medians == pytest.approx([1.1, 2.2, 3.3]) def but_it_raises_value_error_when_the_cube_result_does_not_contain_medians( self, request ): cube_ = instance_mock(request, Cube) - cube_.median = None + cube_.medians = None with pytest.raises(ValueError) as e: - _CatCubeMedian(None, None).factory(cube_, None) + _CatCubeMedians(None, None).factory(cube_, None) assert str(e.value) == "cube-result does not contain cube-median measure" @@ -422,18 +422,18 @@ class Describe_MrCubeMedians: """Unit-test suite for `cr.cube.stripe.cubemeasure._MrCubeMedians`.""" def it_knows_its_medians(self): - cube_medians = _MrCubeMedian( + cube_medians = _MrCubeMedians( None, np.array([[1.1, 2.2], [3.3, 4.4], [5.5, 6.6]]) ) - assert cube_medians.median == pytest.approx([1.1, 3.3, 5.5]) + assert cube_medians.medians == pytest.approx([1.1, 3.3, 5.5]) def but_it_raises_value_error_when_the_cube_result_does_not_contain_medians( self, request ): cube_ = instance_mock(request, Cube) - cube_.median = None + cube_.medians = None with pytest.raises(ValueError) as e: - _CatCubeMedian(None, None).factory(cube_, None) + _CatCubeMedians(None, None).factory(cube_, None) assert str(e.value) == "cube-result does not contain cube-median measure" diff --git a/tests/unit/stripe/test_measure.py b/tests/unit/stripe/test_measure.py index 98ee75a99..e058fdc2d 100644 --- a/tests/unit/stripe/test_measure.py +++ b/tests/unit/stripe/test_measure.py @@ -11,7 +11,7 @@ from cr.cube.stripe.cubemeasure import ( _BaseCubeMeans, _BaseCubeCounts, - _BaseCubeMedian, + _BaseCubeMedians, _CatCubeCounts, CubeMeasures, ) @@ -20,7 +20,7 @@ _BaseSecondOrderMeasure, _Means, _MeansSmoothed, - _Median, + _Medians, _PopulationProportions, _PopulationProportionStderrs, _ScaledCounts, @@ -45,7 +45,7 @@ class DescribeStripeMeasures: "measure_prop_name, MeasureCls", ( ("means", _Means), - ("median", _Median), + ("medians", _Medians), ("population_proportions", _PopulationProportions), ("population_proportion_stderrs", _PopulationProportionStderrs), ("scaled_counts", _ScaledCounts), @@ -202,19 +202,21 @@ class Describe_Median: def it_computes_its_base_values_to_help(self, request): cube_medians_ = instance_mock( - request, _BaseCubeMedian, median=np.array([1.1, 2.2, 3.3]) + request, _BaseCubeMedians, medians=np.array([1.1, 2.2, 3.3]) ) - cube_measures_ = instance_mock(request, CubeMeasures, cube_median=cube_medians_) - median = _Median(None, None, cube_measures_) + cube_measures_ = instance_mock( + request, CubeMeasures, cube_medians=cube_medians_ + ) + medians = _Medians(None, None, cube_measures_) - assert median.base_values == pytest.approx([1.1, 2.2, 3.3]) + assert medians.base_values == pytest.approx([1.1, 2.2, 3.3]) def it_computes_its_subtotal_values_to_help(self, request): - property_mock(request, _Median, "base_values", return_value=[1.1, 2.2, 3.3]) + property_mock(request, _Medians, "base_values", return_value=[1.1, 2.2, 3.3]) rows_dimension_ = instance_mock(request, Dimension) NanSubtotals_ = class_mock(request, "cr.cube.stripe.measure.NanSubtotals") NanSubtotals_.subtotal_values.return_value = np.array([np.nan, np.nan]) - medians = _Median(rows_dimension_, None, None) + medians = _Medians(rows_dimension_, None, None) subtotal_values = medians.subtotal_values From b475ccdad7cf64a13cb0e7a9037f3d6761db62c6 Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 17:23:05 +0200 Subject: [PATCH 09/12] ooops --- src/cr/cube/matrix/cubemeasure.py | 6 ++---- src/cr/cube/matrix/measure.py | 2 +- tests/unit/matrix/test_cubemeasure.py | 2 ++ 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cr/cube/matrix/cubemeasure.py b/src/cr/cube/matrix/cubemeasure.py index 71ba66e26..cafa9ce65 100644 --- a/src/cr/cube/matrix/cubemeasure.py +++ b/src/cr/cube/matrix/cubemeasure.py @@ -26,7 +26,7 @@ def cube_means(self): return _BaseCubeMeans.factory(self._cube, self._dimensions, self._slice_idx) @lazyproperty - def cube_median(self): + def cube_medians(self): """_BaseCubeMedian subclass object for this cube-result.""" return _BaseCubeMedians.factory(self._cube, self._dimensions, self._slice_idx) @@ -139,9 +139,7 @@ def factory(cls, counts, diff_nans, cube, dimensions, slice_idx): ( "MR" if dim_type == DT.MR - else "ARR" - if dim_type in DT.ARRAY_TYPES - else "CAT" + else "ARR" if dim_type in DT.ARRAY_TYPES else "CAT" ) for dim_type in cube.dimension_types[-2:] ) diff --git a/src/cr/cube/matrix/measure.py b/src/cr/cube/matrix/measure.py index 708a87c69..2fc7d0420 100644 --- a/src/cr/cube/matrix/measure.py +++ b/src/cr/cube/matrix/measure.py @@ -1189,7 +1189,7 @@ class _Medians(_BaseSecondOrderMeasure): def blocks(self): """2D array of the four 2D "blocks" making up this measure.""" return NanSubtotals.blocks( - self._cube_measures.cube_median.medians, self._dimensions + self._cube_measures.cube_medians.medians, self._dimensions ) diff --git a/tests/unit/matrix/test_cubemeasure.py b/tests/unit/matrix/test_cubemeasure.py index 2287d95b3..6e74db680 100644 --- a/tests/unit/matrix/test_cubemeasure.py +++ b/tests/unit/matrix/test_cubemeasure.py @@ -15,6 +15,7 @@ _BaseCubeCounts, _BaseCubeMeans, _BaseCubeMeasure, + _BaseCubeMedians, _BaseCubeOverlaps, _BaseCubeStdDev, _BaseCubeSums, @@ -54,6 +55,7 @@ class DescribeCubeMeasures: "cube_measure_, CubeMeasureCls", ( ("cube_means", _BaseCubeMeans), + ("cube_medians", _BaseCubeMedians), ("cube_overlaps", _BaseCubeOverlaps), ("cube_sum", _BaseCubeSums), ("cube_stddev", _BaseCubeStdDev), From 2a63c2993554e0bd3ff18139516c093410b8da66 Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 17:30:45 +0200 Subject: [PATCH 10/12] unit tests for 2d objs --- tests/unit/matrix/test_cubemeasure.py | 193 ++++++++++++++++++++++++++ tests/unit/matrix/test_measure.py | 2 + 2 files changed, 195 insertions(+) diff --git a/tests/unit/matrix/test_cubemeasure.py b/tests/unit/matrix/test_cubemeasure.py index 6e74db680..a37461d72 100644 --- a/tests/unit/matrix/test_cubemeasure.py +++ b/tests/unit/matrix/test_cubemeasure.py @@ -23,14 +23,18 @@ _CatXArrCubeCounts, _CatXCatCubeCounts, _CatXCatCubeMeans, + _CatXCatCubeMedians, _CatXCatCubeStdDev, _CatXCatCubeSums, _CatXCatUnconditionalCubeCounts, _CatXMrCubeCounts, _CatXMrCubeMeans, + _CatXMrCubeMedians, _CatXMrCubeStdDev, _CatXMrCubeSums, _CatXMrUnconditionalCubeCounts, + _MrXCatCubeMedians, + _MrXMrCubeMedians, CubeMeasures, _MrXArrCubeCounts, _MrXCatCubeCounts, @@ -1238,6 +1242,195 @@ def raw_means(self): ) +# === MEDIANS === + + +class Describe_BaseCubeMedians: + """Unit test suite for `cr.cube.matrix.cubemeasure._BaseCubeMedians`.""" + + @pytest.mark.parametrize( + "dimension_types, CubeMediansCls", + ( + ((DT.MR, DT.MR), _MrXMrCubeMedians), + ((DT.MR, DT.CAT), _MrXCatCubeMedians), + ((DT.CAT, DT.MR), _CatXMrCubeMedians), + ((DT.CAT, DT.CAT), _CatXCatCubeMedians), + ), + ) + def it_provides_a_factory_for_constructing_cube_medians_objects( + self, request, dimension_types, CubeMediansCls + ): + cube_ = instance_mock(request, Cube) + dimensions_ = ( + instance_mock(request, Dimension), + instance_mock(request, Dimension), + ) + cube_medians_ = instance_mock(request, CubeMediansCls) + CubeMediansCls_ = class_mock( + request, + "cr.cube.matrix.cubemeasure.%s" % CubeMediansCls.__name__, + return_value=cube_medians_, + ) + _slice_idx_expr_ = method_mock( + request, + _BaseCubeMedians, + "_slice_idx_expr", + return_value=1, + autospec=False, + ) + cube_.dimension_types = dimension_types + cube_.medians = [[1, 2], [3, 4]] + + cube_medians = _BaseCubeMedians.factory(cube_, dimensions_, slice_idx=7) + + _slice_idx_expr_.assert_called_once_with(cube_, 7) + CubeMediansCls_.assert_called_once_with(dimensions_, [3, 4]) + assert cube_medians is cube_medians_ + + def but_it_raises_a_value_error_when_cube_result_does_not_contain_mean_measure( + self, cube_ + ): + cube_.means = None + + with pytest.raises(ValueError) as e: + _BaseCubeMeans.factory(cube_, None, None) + + assert str(e.value) == "cube-result does not contain cube-means measure" + + # fixture components --------------------------------------------- + + @pytest.fixture + def cube_(self, request): + return instance_mock(request, Cube) + + +class Describe_CatXCatCubeMedians: + """Unit test suite for `cr.cube.matrix.cubemeasure._CatXCatCubeMedians`.""" + + def it_knows_its_medians(self): + raw_medians = np.array( + [ + [1.1, 2.3, 3.3], + [3.4, 1.5, 1.6], + ] + ) + cube_medians = _CatXCatCubeMedians(None, raw_medians) + + assert cube_medians.medians.tolist() == [ + [1.1, 2.3, 3.3], + [3.4, 1.5, 1.6], + ] + + +class Describe_CatXMrCubeMedians: + """Unit test suite for `cr.cube.matrix.cubemeasure._CatXMrCubeMedians`.""" + + def it_knows_its_means(self, raw_medians): + cube_medians = _CatXMrCubeMedians(None, raw_medians) + + assert cube_medians.medians.tolist() == [ + [1.1, 2.2, 3.2], + [4.3, 5.1, 6.1], + ] + + # fixtures ------------------------------------------------------- + + @pytest.fixture + def raw_medians(self): + """(2, 3, 2) np.float64 ndarray of medians as received from Cube.""" + return np.array( + [ # -- axes are (rows, cols, sel/not) -- + # --sel/not-- + [ # -- row 0 ------------ + [1.1, 6.1], # -- col 0 -- + [2.2, 5.2], # -- col 1 -- + [3.2, 4.2], # -- col 2 -- + ], + [ # -- row 1 ------------ + [4.3, 3.1], # -- col 0 -- + [5.1, 2.1], # -- col 1 -- + [6.1, 1.1], # -- col 2 -- + # -------------------- + ], + ] + ) + + +class Describe_MrXCatCubeMedians: + """Unit test suite for `cr.cube.matrix.cubemeasure._MrXCatCubeMedians`.""" + + def it_knows_its_means(self, raw_medians): + cube_medians = _MrXCatCubeMedians(None, raw_medians) + + assert cube_medians.medians.tolist() == [ + [1.1, 6.1], + [4.3, 3.1], + ] + + # fixtures ------------------------------------------------------- + + @pytest.fixture + def raw_medians(self): + """(2, 3, 2) np.int float64 of medians as received from Cube.""" + return np.array( + [ # -- axes are (rows, cols, sel/not) -- + # --sel/not-- + [ # -- row 0 ------------ + [1.1, 6.1], # -- col 0 -- + [2.2, 5.2], # -- col 1 -- + [3.2, 4.2], # -- col 2 -- + ], + [ # -- row 1 ------------ + [4.3, 3.1], # -- col 0 -- + [5.1, 2.1], # -- col 1 -- + [6.1, 1.1], # -- col 2 -- + # -------------------- + ], + ] + ) + + +class Describe_MrXMrCubeMedians: + """Unit test suite for `cr.cube.matrix.cubemeasure._MrXMrCubeMedians`.""" + + def it_knows_its_means(self, raw_medians): + cube_medians = _MrXMrCubeMedians(None, raw_medians) + + assert cube_medians.medians.tolist() == [[0.1, 0.1], [0.4, 0.5]] + + # fixtures ------------------------------------------------------- + + @pytest.fixture + def raw_medians(self): + """(2, 2, 2, 2) np.float64 ndarray of means as from Cube.""" + return np.array( + # -- axes are (rows, sel/not, cols, sel/not) -- + [ + [ # -- row 0 ------------- + # --sel/not-- + [ # -- selected ------ + [0.1, 0.8], # -- col 0 + [0.1, 0.7], # -- col 1 + ], + [ # -- not selected -- + [0.2, 0.6], # -- col 0 + [0.3, 0.5], # -- col 1 + ], + ], + [ # -- row 1 ------------- + [ # -- selected ------ + [0.4, 0.4], # -- col 0 + [0.5, 0.3], # -- col 1 + ], + [ # -- not selected -- + [0.6, 0.2], # -- col 0 + [0.7, 0.1], # -- col 1 + ], + ], + ] + ) + + # === STD DEV === diff --git a/tests/unit/matrix/test_measure.py b/tests/unit/matrix/test_measure.py index 446593226..dd375b9ee 100644 --- a/tests/unit/matrix/test_measure.py +++ b/tests/unit/matrix/test_measure.py @@ -27,6 +27,7 @@ _ColumnStandardError, _ColumnUnweightedBases, _ColumnWeightedBases, + _Medians, _PairwiseSigPvals, _PairwiseSigTstats, _PairwiseMeansSigPVals, @@ -81,6 +82,7 @@ class DescribeSecondOrderMeasures: ("column_unweighted_bases", _ColumnUnweightedBases), ("column_weighted_bases", _ColumnWeightedBases), ("means", _Means), + ("medians", _Medians), ("population_proportions", _PopulationProportions), ("population_std_err", _PopulationStandardError), ("pvalues", _Pvalues), From f87b128284295ecdc7ab0daa0d152685128e4e2d Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Wed, 3 Jul 2024 17:32:12 +0200 Subject: [PATCH 11/12] ooops lint --- src/cr/cube/cube.py | 2 +- src/cr/cube/cubepart.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index 167737840..18d24e693 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -807,7 +807,7 @@ def missing_count(self) -> int: # fixtures that don't have valid_counts. if self.means is not None: return self.means.missing_count - # The check on the medians measure is needed for retro-compatibility with the old + # The check on the median measure is needed for retro-compatibility with the old # fixtures that don't have valid_counts. if self.medians is not None: return self.medians.missing_count diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index d85337cf0..3c7cee198 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -716,7 +716,8 @@ def medians(self): """2D optional np.float64 ndarray of median values for each table cell. Cell value is `np.nan` for each cell corresponding to an inserted subtotal - (medians of addend cells cannot simply be added to get the mean of the subtotal). + (medians of addend cells cannot simply be added to get the mean of the + subtotal). Raises `ValueError` if the cube-result does not include a median cube-measure. """ From 63f9271c22aa2a6f15dcf03b431e465879da51e3 Mon Sep 17 00:00:00 2001 From: Ernesto Arbitrio Date: Thu, 4 Jul 2024 11:04:03 +0200 Subject: [PATCH 12/12] fix flatvalues for median cube measure --- src/cr/cube/cube.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index 18d24e693..4061efe01 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -1087,11 +1087,9 @@ def _flat_values(self) -> Optional[np.ndarray]: measure_payload = self._cube_dict["result"].get("measures", {}).get("median") if measure_payload is None: return None + data = np.array(measure_payload["data"]).flatten() return np.array( - tuple( - np.nan if isinstance(x, dict) else x for x in measure_payload["data"] - ), - dtype=np.float64, + tuple(np.nan if isinstance(x, dict) else x for x in data), dtype=np.float64 ).flatten()