From 51d04b1ba4057393589326fe4cb7b53a0e6a4d77 Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Thu, 30 Nov 2023 17:53:58 +0100 Subject: [PATCH 1/3] [#186590795]: Implement squared weights (iter #1) --- src/cr/cube/cube.py | 31 +++++++ src/cr/cube/enums.py | 1 + tests/fixtures/squared-weights.json | 127 ++++++++++++++++++++++++++++ tests/integration/test_cube.py | 19 +++++ 4 files changed, 178 insertions(+) create mode 100644 tests/fixtures/squared-weights.json diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index a7bcd8b89..e5374b393 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -523,6 +523,15 @@ def weighted_valid_counts(self) -> Optional[np.ndarray]: self._valid_idxs ].astype(np.float64) + @lazyproperty + def weighted_squared_counts(self) -> Optional[np.ndarray]: + """Optional float64 ndarray of weighted_squared_counts if the measure exists.""" + if self._measures.weighted_squared_counts is None: + return None + return self._measures.weighted_squared_counts.raw_cube_array[ + self._valid_idxs + ].astype(np.float64) + @lazyproperty def _all_dimensions(self) -> list: """List of all dimensions (not just user-apparent ones) for this cube.""" @@ -847,6 +856,14 @@ def weighted_valid_counts(self) -> 'Optional["_WeightedValidCountsMeasure"]': ) return valid_counts if valid_counts.raw_cube_array is not None else None + @lazyproperty + def weighted_squared_counts(self): + """Return object of class for representing squared weights.""" + squared_counts = _WeightedSquaredCountsMeasure( + self._cube_dict, self._all_dimensions, self._cube_idx_arg + ) + return squared_counts if squared_counts.raw_cube_array is not None else None + class _BaseMeasure: """Base class for measure objects.""" @@ -1100,6 +1117,20 @@ def _flat_values(self) -> Optional[np.ndarray]: return np.array(weighted_counts, dtype=np.float64) +class _WeightedSquaredCountsMeasure(_BaseMeasure): + """Weighted squared counts for cube.""" + + @lazyproperty + def _flat_values(self) -> Optional[np.ndarray]: + """Optional 1D np.ndarray of np.float64 weighted squared counts.""" + squared_counts = ( + self._cube_dict["result"]["measures"] + .get("weighted_squared_count", {}) + .get("data", []) + ) + return np.array(squared_counts, dtype=np.float64) if squared_counts else None + + class _WeightedValidCountsMeasure(_BaseMeasure): """Weighted Valid counts for cube.""" diff --git a/src/cr/cube/enums.py b/src/cr/cube/enums.py index 73f388252..287467631 100644 --- a/src/cr/cube/enums.py +++ b/src/cr/cube/enums.py @@ -163,6 +163,7 @@ class CUBE_MEASURE(enum.Enum): VALID_OVERLAP = "valid_overlap" UNWEIGHTED_VALID_COUNT = "valid_count_unweighted" WEIGHTED_VALID_COUNT = "valid_count_weighted" + WEIGHTED_SQUARED_COUNT = "weighted_squared_count" NUMERIC_CUBE_MEASURES = frozenset( diff --git a/tests/fixtures/squared-weights.json b/tests/fixtures/squared-weights.json new file mode 100644 index 000000000..448b013c8 --- /dev/null +++ b/tests/fixtures/squared-weights.json @@ -0,0 +1,127 @@ +{ + "result": { + "counts": [ + 2, + 0, + 2, + 2, + 2, + 0, + 0, + 0, + 1, + 1, + 0 + ], + "dimensions": [ + { + "derived": false, + "references": {}, + "type": { + "categories": [ + { + "id": -1, + "missing": true, + "name": "No Data", + "numeric_value": null + }, + { + "id": 0, + "missing": false, + "name": "Grade_0", + "numeric_value": null + }, + { + "id": 1, + "missing": false, + "name": "Grade_1", + "numeric_value": null + }, + { + "id": 2, + "missing": false, + "name": "Grade_2", + "numeric_value": null + }, + { + "id": 3, + "missing": false, + "name": "Grade_3", + "numeric_value": null + }, + { + "id": 4, + "missing": false, + "name": "Grade_4", + "numeric_value": null + }, + { + "id": 5, + "missing": false, + "name": "Grade_5", + "numeric_value": null + }, + { + "id": 6, + "missing": false, + "name": "Grade_6", + "numeric_value": null + }, + { + "id": 7, + "missing": false, + "name": "Grade_7", + "numeric_value": null + }, + { + "id": 8, + "missing": false, + "name": "Grade_8", + "numeric_value": null + }, + { + "id": 9, + "missing": false, + "name": "Grade_9", + "numeric_value": null + } + ], + "class": "categorical", + "ordinal": false + } + } + ], + "measures": { + "weighted_squared_count": { + "data": [ + 324.0, + 0.0, + 148.0, + 212.0, + 292.0, + 0.0, + 0.0, + 0.0, + 64.0, + 100.0, + 0.0 + ], + "metadata": { + "derived": true, + "references": {}, + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "NaN": -8, + "No Data": -1 + }, + "missing_rules": {} + } + }, + "n_missing": 2 + } + }, + "n": 10 + } +} diff --git a/tests/integration/test_cube.py b/tests/integration/test_cube.py index 7efb1860c..eae86275a 100644 --- a/tests/integration/test_cube.py +++ b/tests/integration/test_cube.py @@ -193,6 +193,25 @@ def it_does_not_get_fooled_into_single_mr_cats_dim(self): assert cube.dimension_types == (DT.LOGICAL,) assert cube.partitions[0].counts.tolist() == [200, 100] + def it_provides_squared_weights_counts(self): + cube = Cube(CR.SQUARED_WEIGHTS) + assert cube.weighted_squared_counts.tolist() == [ + 0.0, + 148.0, + 212.0, + 292.0, + 0.0, + 0.0, + 0.0, + 64.0, + 100.0, + 0.0, + ] + + def but_it_provides_None_when_no_squared_weights_counts_exist(self): + cube = Cube(CR.NOT_MR_CATS) + assert cube.weighted_squared_counts is None + class DescribeIntegrated_Measures: """Integration-tests that exercise the `cr.cube.cube._Measures` object.""" From 709f63452c75aae1d8717b1256aa0f25a904ab3b Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Mon, 4 Dec 2023 09:12:56 +0100 Subject: [PATCH 2/3] [#186590795]: Implement effective counts --- src/cr/cube/matrix/cubemeasure.py | 15 ++ src/cr/cube/matrix/measure.py | 25 +- tests/fixtures/squared-weights-cat-x-cat.json | 249 ++++++++++++++++++ tests/integration/test_cubepart.py | 13 + tests/unit/matrix/test_measure.py | 3 +- 5 files changed, 303 insertions(+), 2 deletions(-) create mode 100644 tests/fixtures/squared-weights-cat-x-cat.json diff --git a/src/cr/cube/matrix/cubemeasure.py b/src/cr/cube/matrix/cubemeasure.py index 0afe012a9..980c53762 100644 --- a/src/cr/cube/matrix/cubemeasure.py +++ b/src/cr/cube/matrix/cubemeasure.py @@ -71,6 +71,21 @@ def weighted_cube_counts(self): counts, diff_nans, self._cube, self._dimensions, self._slice_idx ) + @lazyproperty + def effective_cube_counts(self): + squared_counts = self._cube.weighted_squared_counts + if squared_counts is None: + return self.unweighted_cube_counts + + valid_counts = self._cube.weighted_valid_counts + counts = valid_counts if valid_counts is not None else self._cube.counts + effectiveness = squared_counts.sum() / self._cube.unweighted_counts.sum() ** 2 + effective_counts = counts * effectiveness + + return _BaseCubeCounts.factory( + effective_counts, False, self._cube, self._dimensions, self._slice_idx + ) + class _BaseCubeMeasure: """Base class for all cube-measure objects.""" diff --git a/src/cr/cube/matrix/measure.py b/src/cr/cube/matrix/measure.py index 53e868394..73c0c7f56 100644 --- a/src/cr/cube/matrix/measure.py +++ b/src/cr/cube/matrix/measure.py @@ -73,6 +73,12 @@ def column_unweighted_bases(self): """_ColumnUnweightedBases measure object for this cube-result.""" return _ColumnUnweightedBases(self._dimensions, self, self._cube_measures) + @lazyproperty + def column_effective_bases(self): + """_ColumnEffectiveBases measure object for this cube-result.""" + ceb = _ColumnEffectiveBases(self._dimensions, self, self._cube_measures) + return ceb + @lazyproperty def column_weighted_bases(self): """_ColumnWeightedBases measure object for this cube-result.""" @@ -649,6 +655,10 @@ def _weighted_cube_counts(self): """ return self._cube_measures.weighted_cube_counts + @lazyproperty + def _effective_cube_counts(self): + return self._cube_measures.effective_cube_counts + class _SmoothedMeasure(_BaseSecondOrderMeasure): """Mixin providing `._smoother` property for smoothed measures.""" @@ -1129,6 +1139,18 @@ def _subtotal_rows(self): return np.broadcast_to(self._base_values[0, :], subtotal_rows.shape) +class _ColumnEffectiveBases(_ColumnWeightedBases): + """Provides the column-effective-bases measure for a matrix.""" + + @lazyproperty + def _base_values(self): + """2D np.float64 ndarray of column-proportion denominator for each cell. + + This is the first "block" and has the shape of the cube-measure (no insertions). + """ + return self._effective_cube_counts.column_bases + + class _Means(_BaseSecondOrderMeasure): """Provides the mean measure for a matrix.""" @@ -1488,7 +1510,8 @@ def _base_values(self): @lazyproperty def _bases(self): """2D array of 2D ndarray "blocks" for the column unweighted bases""" - return self._second_order_measures.column_unweighted_bases.blocks + # return self._second_order_measures.column_unweighted_bases.blocks + return self._second_order_measures.column_effective_bases.blocks def _reference_values(self, block_index): """Tuple of the reference proportions and bases for diff --git a/tests/fixtures/squared-weights-cat-x-cat.json b/tests/fixtures/squared-weights-cat-x-cat.json new file mode 100644 index 000000000..858a1c68e --- /dev/null +++ b/tests/fixtures/squared-weights-cat-x-cat.json @@ -0,0 +1,249 @@ +{ + "result": { + "n": 10, + "counts": [ + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "dimensions": [ + { + "type": { + "class": "categorical", + "ordinal": false, + "categories": [ + { + "id": 2, + "numeric_value": 0.0, + "name": "M", + "missing": false + }, + { + "id": 1, + "numeric_value": 2.0, + "name": "F", + "missing": false, + "selected": true + }, + { + "id": -1, + "name": "No Data", + "numeric_value": null, + "missing": true + } + ] + }, + "references": {}, + "derived": false + }, + { + "type": { + "class": "categorical", + "ordinal": false, + "categories": [ + { + "id": -1, + "name": "No Data", + "numeric_value": null, + "missing": true + }, + { + "id": 0, + "name": "Grade_0", + "numeric_value": null, + "missing": false + }, + { + "id": 1, + "name": "Grade_1", + "numeric_value": null, + "missing": false + }, + { + "id": 2, + "name": "Grade_2", + "numeric_value": null, + "missing": false + }, + { + "id": 3, + "name": "Grade_3", + "numeric_value": null, + "missing": false + }, + { + "id": 4, + "name": "Grade_4", + "numeric_value": null, + "missing": false + }, + { + "id": 5, + "name": "Grade_5", + "numeric_value": null, + "missing": false + }, + { + "id": 6, + "name": "Grade_6", + "numeric_value": null, + "missing": false + }, + { + "id": 7, + "name": "Grade_7", + "numeric_value": null, + "missing": false + }, + { + "id": 8, + "name": "Grade_8", + "numeric_value": null, + "missing": false + }, + { + "id": 9, + "name": "Grade_9", + "numeric_value": null, + "missing": false + } + ] + }, + "references": {}, + "derived": false + } + ], + "measures": { + "count": { + "metadata": { + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + }, + "references": {}, + "derived": true + }, + "data": [ + 0.0, + 0.0, + 1.3333333333333333, + 1.3333333333333333, + 1.3333333333333333, + 0.0, + 0.0, + 0.0, + 0.0, + 1.3333333333333333, + 0.0, + 0.6666666666666666, + 0.0, + 0.6666666666666666, + 0.6666666666666666, + 0.0, + 0.0, + 0.0, + 0.0, + 0.6666666666666666, + 0.0, + 0.0, + 1.3333333333333333, + 0.0, + 0.0, + 0.0, + 0.6666666666666666, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "n_missing": 3 + }, + "weighted_squared_count": { + "metadata": { + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + }, + "references": {}, + "derived": true + }, + "data": [ + 0.0, + 0.0, + 1.7777777777777777, + 1.7777777777777777, + 1.7777777777777777, + 0.0, + 0.0, + 0.0, + 0.0, + 1.7777777777777777, + 0.0, + 0.4444444444444444, + 0.0, + 0.4444444444444444, + 0.4444444444444444, + 0.0, + 0.0, + 0.0, + 0.0, + 0.4444444444444444, + 0.0, + 0.0, + 1.7777777777777777, + 0.0, + 0.0, + 0.0, + 0.4444444444444444, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "n_missing": 3 + } + } + } +} diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index f5bb60430..193da497f 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -15,6 +15,8 @@ from ..fixtures import CR, TR, MRI from ..util import load_python_expression +NA = np.nan + class Describe_Slice: """Integration-test suite for _Slice object.""" @@ -1895,6 +1897,17 @@ def it_has_bases_that_dont_sum_across_ca_subvars_with_insertions(self): # --- and row bases are the same as the counts assert slice_.row_weighted_bases.tolist() == slice_.counts.tolist() + def it_uses_squared_weights_for_effect_calculation(self): + cube = Cube(CR.SQUARED_WEIGHTS_CAT_X_CAT) + slice_ = cube.partitions[0] + np.testing.assert_almost_equal( + slice_.pairwise_significance_t_stats(1), + [ + [NA, 0.0, 0.0, 0.41513323, NA, NA, NA, -0.83026647, 0.41513323, NA], + [NA, 0.0, 0.0, -0.41513323, NA, NA, NA, 0.83026647, -0.41513323, NA], + ], + ) + class Describe_Strand: """Integration-test suite for `cr.cube.cubepart._Strand` object.""" diff --git a/tests/unit/matrix/test_measure.py b/tests/unit/matrix/test_measure.py index fc5963bd2..ad01ce6e4 100644 --- a/tests/unit/matrix/test_measure.py +++ b/tests/unit/matrix/test_measure.py @@ -903,7 +903,8 @@ def it_calculates_the_base_values_to_help( _calculate_t_stats_.assert_called_once_with(pairwise_tstat, 0.5, 1, 9, 0) def it_provides_the_bases_to_help(self, second_order_measures_): - second_order_measures_.column_unweighted_bases.blocks = [1, 2] + # second_order_measures_.column_unweighted_bases.blocks = [1, 2] + second_order_measures_.column_effective_bases.blocks = [1, 2] pairwise_tstat = _PairwiseSigTstats(None, second_order_measures_, None, None) assert pairwise_tstat._bases == [1, 2] From 9443014dacf23f629590fa8b56cb5f1ec93780ce Mon Sep 17 00:00:00 2001 From: Slobodan Ilic Date: Tue, 5 Dec 2023 20:35:26 +0100 Subject: [PATCH 3/3] [#186590795]: Correct numbers at last --- src/cr/cube/cubepart.py | 13 + src/cr/cube/matrix/cubemeasure.py | 12 +- src/cr/cube/matrix/measure.py | 75 +++- src/cr/cube/measures/pairwise_significance.py | 8 +- ...su-illness-x-occupation-plain-weights.json | 278 ++++++++++++++ ...u-illness-x-occupation-square-weights.json | 339 ++++++++++++++++++ tests/integration/test_cubepart.py | 4 +- .../integration/test_pairwise_significance.py | 122 +++++++ tests/unit/matrix/test_measure.py | 4 +- 9 files changed, 830 insertions(+), 25 deletions(-) create mode 100644 tests/fixtures/pairwise-hirotsu-illness-x-occupation-plain-weights.json create mode 100644 tests/fixtures/pairwise-hirotsu-illness-x-occupation-square-weights.json diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index 57dda29bc..77c435e27 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -400,6 +400,19 @@ def columns_base(self): # --- otherwise columns-base is a vector --- return self._assemble_marginal(self._measures.columns_unweighted_base) + @lazyproperty + def columns_squared_base(self): + """1D np.float64 ndarray of squared weights, summed for each column. + + This is a measure that needs to be asked from zz9 explicitly. It is only used + in the calculation of the pairwise comparisons, where weights are applied, in + order to adjust for the "design effect" of the study (reduce the inflated Nw). + """ + if not self._measures.columns_squared_base.is_defined: + return None + + return self._assemble_marginal(self._measures.columns_squared_base) + @lazyproperty def columns_dimension_description(self): """str description assigned to columns-dimension.""" diff --git a/src/cr/cube/matrix/cubemeasure.py b/src/cr/cube/matrix/cubemeasure.py index 980c53762..a09c19494 100644 --- a/src/cr/cube/matrix/cubemeasure.py +++ b/src/cr/cube/matrix/cubemeasure.py @@ -72,18 +72,14 @@ def weighted_cube_counts(self): ) @lazyproperty - def effective_cube_counts(self): + def weighted_squared_cube_counts(self): + """_BaseSquaredCounts subclass obj for squared weights' counts cube-result.""" squared_counts = self._cube.weighted_squared_counts if squared_counts is None: - return self.unweighted_cube_counts - - valid_counts = self._cube.weighted_valid_counts - counts = valid_counts if valid_counts is not None else self._cube.counts - effectiveness = squared_counts.sum() / self._cube.unweighted_counts.sum() ** 2 - effective_counts = counts * effectiveness + return None return _BaseCubeCounts.factory( - effective_counts, False, self._cube, self._dimensions, self._slice_idx + squared_counts, False, self._cube, self._dimensions, self._slice_idx ) diff --git a/src/cr/cube/matrix/measure.py b/src/cr/cube/matrix/measure.py index 73c0c7f56..cf6a94eb9 100644 --- a/src/cr/cube/matrix/measure.py +++ b/src/cr/cube/matrix/measure.py @@ -74,10 +74,9 @@ def column_unweighted_bases(self): return _ColumnUnweightedBases(self._dimensions, self, self._cube_measures) @lazyproperty - def column_effective_bases(self): - """_ColumnEffectiveBases measure object for this cube-result.""" - ceb = _ColumnEffectiveBases(self._dimensions, self, self._cube_measures) - return ceb + def column_squared_bases(self): + """_ColumnSquaredBases measure object for this cube-result.""" + return _ColumnSquaredBases(self._dimensions, self, self._cube_measures) @lazyproperty def column_weighted_bases(self): @@ -183,6 +182,13 @@ def columns_unweighted_base(self): self._dimensions, self, self._cube_measures, MO.COLUMNS ) + @lazyproperty + def columns_squared_base(self): + """1D np.float64 ndarray of squared-weighted-N for each matrix column.""" + return _MarginSquaredBase( + self._dimensions, self, self._cube_measures, MO.COLUMNS + ) + @lazyproperty def columns_weighted_base(self): """_MarginWeightedBase for columns measure object for this cube-result. @@ -656,8 +662,12 @@ def _weighted_cube_counts(self): return self._cube_measures.weighted_cube_counts @lazyproperty - def _effective_cube_counts(self): - return self._cube_measures.effective_cube_counts + def _weighted_squared_cube_counts(self): + """_BaseCubeCounts subclass instance for this measure. + + Provides cube measures associated with weights' squared counts. + """ + return self._cube_measures.weighted_squared_cube_counts class _SmoothedMeasure(_BaseSecondOrderMeasure): @@ -1139,16 +1149,21 @@ def _subtotal_rows(self): return np.broadcast_to(self._base_values[0, :], subtotal_rows.shape) -class _ColumnEffectiveBases(_ColumnWeightedBases): - """Provides the column-effective-bases measure for a matrix.""" +class _ColumnSquaredBases(_ColumnWeightedBases): + """Provides the column-squared-bases measure for a matrix.""" @lazyproperty def _base_values(self): - """2D np.float64 ndarray of column-proportion denominator for each cell. + """2D np.float64 ndarray of squared weight denominator for each column. This is the first "block" and has the shape of the cube-measure (no insertions). """ - return self._effective_cube_counts.column_bases + return self._weighted_squared_cube_counts.column_bases + + @lazyproperty + def is_defined(self): + """Bool indicating whether squared weights' counts are defined.""" + return self._weighted_squared_cube_counts is not None class _Means(_BaseSecondOrderMeasure): @@ -1510,8 +1525,21 @@ def _base_values(self): @lazyproperty def _bases(self): """2D array of 2D ndarray "blocks" for the column unweighted bases""" - # return self._second_order_measures.column_unweighted_bases.blocks - return self._second_order_measures.column_effective_bases.blocks + unweighted_blocks = self._second_order_measures.column_unweighted_bases.blocks + if self._second_order_measures.columns_squared_base.is_defined: + squared_blocks = self._second_order_measures.column_squared_bases.blocks + effective_blocks = [ + [ + unweighted_blocks[0][0] ** 2 / squared_blocks[0][0], + unweighted_blocks[0][1] ** 2 / squared_blocks[0][1], + ], + [ + unweighted_blocks[1][0] ** 2 / squared_blocks[1][0], + unweighted_blocks[1][1] ** 2 / squared_blocks[1][1], + ], + ] + return effective_blocks + return unweighted_blocks def _reference_values(self, block_index): """Tuple of the reference proportions and bases for @@ -2493,6 +2521,11 @@ def _counts_are_defined(self): return self._second_order_measures.column_comparable_counts.is_defined return self._second_order_measures.row_comparable_counts.is_defined + @lazyproperty + def _squared_weights_are_defined(self): + """Bool indicating whether squared weights are defined.""" + return self._second_order_measures.column_squared_bases.is_defined + class _BaseScaledCountMarginal(_BaseMarginal): """A base class for marginals that depend on the scaled counts.""" @@ -2683,6 +2716,24 @@ def is_defined(self): return self._counts_are_defined +class _MarginSquaredBase(_BaseMarginal): + """The 'margin-squared-weight base', a 1D squared-weight base in the margin.""" + + @lazyproperty + def blocks(self): + """List of the 2 1D ndarray "blocks" of the squared-weights count margin. + + These are the base-values and the subtotals. + """ + bases = self._second_order_measures.column_squared_bases.blocks + return [bases[0][0][0, :], bases[0][1][0, :]] + + @lazyproperty + def is_defined(self): + """True if squared weights' counts are defined.""" + return self._squared_weights_are_defined + + class _MarginWeightedBase(_BaseMarginal): """The 'margin-weighted base', a 1D weighted base in the margin diff --git a/src/cr/cube/measures/pairwise_significance.py b/src/cr/cube/measures/pairwise_significance.py index fcb37ad8d..6417319a2 100644 --- a/src/cr/cube/measures/pairwise_significance.py +++ b/src/cr/cube/measures/pairwise_significance.py @@ -115,7 +115,13 @@ def summary_t_stats(self): def t_stats(self): props = self._slice.column_proportions diff = props - props[:, [self._col_idx]] - var_props = props * (1.0 - props) / self._slice.columns_base + squared_base = self._slice.columns_squared_base + if squared_base is not None: + weighted_base = self._slice.columns_base + effective_base = weighted_base**2 / squared_base + var_props = props * (1.0 - props) / effective_base + else: + var_props = props * (1.0 - props) / self._slice.columns_base se_diff = np.sqrt(var_props + var_props[:, [self._col_idx]]) return diff / se_diff diff --git a/tests/fixtures/pairwise-hirotsu-illness-x-occupation-plain-weights.json b/tests/fixtures/pairwise-hirotsu-illness-x-occupation-plain-weights.json new file mode 100644 index 000000000..adeb1c187 --- /dev/null +++ b/tests/fixtures/pairwise-hirotsu-illness-x-occupation-plain-weights.json @@ -0,0 +1,278 @@ +{ + "result": { + "n": 11908, + "counts": [ + 148, + 111, + 645, + 165, + 383, + 96, + 98, + 199, + 59, + 262, + 0, + 444, + 352, + 1911, + 771, + 1829, + 293, + 330, + 874, + 199, + 1320, + 0, + 86, + 49, + 328, + 119, + 311, + 47, + 58, + 155, + 30, + 236, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "dimensions": [ + { + "derived": false, + "references": { + "alias": "illness", + "name": "Illness" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 3, + "id": 3, + "name": "slight", + "missing": false + }, + { + "numeric_value": 1, + "id": 1, + "name": "medium", + "missing": false + }, + { + "numeric_value": 2, + "id": 2, + "name": "serious", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + }, + { + "derived": false, + "references": { + "alias": "occupation", + "name": "Occupation", + "view": { + "show_counts": false, + "column_width": null, + "transform": { + "insertions": [] + }, + "include_missing": false, + "show_numeric_values": false + } + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "id": 1, + "missing": false, + "name": "1" + }, + { + "numeric_value": 2, + "id": 2, + "missing": false, + "name": "2" + }, + { + "numeric_value": 3, + "id": 3, + "missing": false, + "name": "3" + }, + { + "numeric_value": 4, + "id": 4, + "missing": false, + "name": "4" + }, + { + "numeric_value": 5, + "id": 5, + "missing": false, + "name": "5" + }, + { + "numeric_value": 6, + "id": 6, + "missing": false, + "name": "6" + }, + { + "numeric_value": 7, + "id": 7, + "missing": false, + "name": "7" + }, + { + "numeric_value": 8, + "id": 8, + "missing": false, + "name": "8" + }, + { + "numeric_value": 9, + "id": 9, + "missing": false, + "name": "9" + }, + { + "numeric_value": 10, + "id": 10, + "missing": false, + "name": "10" + }, + { + "numeric_value": null, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + } + ], + "measures": { + "count": { + "metadata": { + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + }, + "references": {}, + "derived": true + }, + "data": [ + 152.26651785678774, + 108.11936370002984, + 649.4631580565033, + 174.1549767954934, + 382.0049076376127, + 96.78265446188736, + 92.44999508999025, + 213.1841339031303, + 51.0032100626308, + 242.63994241483408, + 0, + 448.6809180689826, + 331.5811113033111, + 1943.8819309513028, + 781.813064486398, + 1811.244621262913, + 255.29591276288795, + 316.3224085369704, + 864.0405558169539, + 215.53912407138873, + 1350.409010428394, + 0, + 89.85663035040939, + 51.393718588284514, + 354.07277635019744, + 110.13130107993513, + 287.5165380439115, + 52.01445200607955, + 58.36696673905563, + 163.0970341447683, + 32.922129994522194, + 227.75093503442997, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0 + } + }, + "missing": 0, + "filter_stats": { + "is_cat_date": false, + "filtered": { + "unweighted": { + "selected": 11908, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 11908.000000000002, + "other": 0, + "missing": 0 + } + }, + "filtered_complete": { + "unweighted": { + "selected": 11908, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 11908.000000000002, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 11908, + "weighted_n": 11908.000000000002 + }, + "filtered": { + "unweighted_n": 11908, + "weighted_n": 11908.000000000002 + }, + "element": "crunch:cube" + } +} diff --git a/tests/fixtures/pairwise-hirotsu-illness-x-occupation-square-weights.json b/tests/fixtures/pairwise-hirotsu-illness-x-occupation-square-weights.json new file mode 100644 index 000000000..9d5106c23 --- /dev/null +++ b/tests/fixtures/pairwise-hirotsu-illness-x-occupation-square-weights.json @@ -0,0 +1,339 @@ +{ + "result": { + "n": 11908, + "counts": [ + 148, + 111, + 645, + 165, + 383, + 96, + 98, + 199, + 59, + 262, + 0, + 444, + 352, + 1911, + 771, + 1829, + 293, + 330, + 874, + 199, + 1320, + 0, + 86, + 49, + 328, + 119, + 311, + 47, + 58, + 155, + 30, + 236, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "dimensions": [ + { + "derived": false, + "references": { + "alias": "illness", + "name": "Illness" + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 3, + "id": 3, + "name": "slight", + "missing": false + }, + { + "numeric_value": 1, + "id": 1, + "name": "medium", + "missing": false + }, + { + "numeric_value": 2, + "id": 2, + "name": "serious", + "missing": false + }, + { + "numeric_value": null, + "id": -1, + "name": "No Data", + "missing": true + } + ] + } + }, + { + "derived": false, + "references": { + "alias": "occupation", + "name": "Occupation", + "view": { + "show_counts": false, + "column_width": null, + "transform": { + "insertions": [] + }, + "include_missing": false, + "show_numeric_values": false + } + }, + "type": { + "ordinal": false, + "class": "categorical", + "categories": [ + { + "numeric_value": 1, + "id": 1, + "missing": false, + "name": "1" + }, + { + "numeric_value": 2, + "id": 2, + "missing": false, + "name": "2" + }, + { + "numeric_value": 3, + "id": 3, + "missing": false, + "name": "3" + }, + { + "numeric_value": 4, + "id": 4, + "missing": false, + "name": "4" + }, + { + "numeric_value": 5, + "id": 5, + "missing": false, + "name": "5" + }, + { + "numeric_value": 6, + "id": 6, + "missing": false, + "name": "6" + }, + { + "numeric_value": 7, + "id": 7, + "missing": false, + "name": "7" + }, + { + "numeric_value": 8, + "id": 8, + "missing": false, + "name": "8" + }, + { + "numeric_value": 9, + "id": 9, + "missing": false, + "name": "9" + }, + { + "numeric_value": 10, + "id": 10, + "missing": false, + "name": "10" + }, + { + "numeric_value": null, + "id": -1, + "missing": true, + "name": "No Data" + } + ] + } + } + ], + "measures": { + "count": { + "metadata": { + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + }, + "references": {}, + "derived": true + }, + "data": [ + 152.26651785678774, + 108.11936370002984, + 649.4631580565033, + 174.1549767954934, + 382.0049076376127, + 96.78265446188736, + 92.44999508999025, + 213.1841339031303, + 51.0032100626308, + 242.63994241483408, + 0, + 448.6809180689826, + 331.5811113033111, + 1943.8819309513028, + 781.813064486398, + 1811.244621262913, + 255.29591276288795, + 316.3224085369704, + 864.0405558169539, + 215.53912407138873, + 1350.409010428394, + 0, + 89.85663035040939, + 51.393718588284514, + 354.07277635019744, + 110.13130107993513, + 287.5165380439115, + 52.01445200607955, + 58.36696673905563, + 163.0970341447683, + 32.922129994522194, + 227.75093503442997, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0 + }, + "weighted_squared_count": { + "metadata": { + "type": { + "class": "numeric", + "integer": false, + "missing_reasons": { + "No Data": -1 + }, + "missing_rules": {} + }, + "references": {}, + "derived": true + }, + "data": [ + 284.5589837693164, + 227.7059762860058, + 1417.4762749450213, + 367.250893545145, + 704.8135722944284, + 199.8081628385786, + 169.39634454712765, + 467.41789800157187, + 94.33083156567952, + 437.615841989356, + 0, + 879.0697203093243, + 590.4055771337064, + 3795.401122860506, + 1568.1022338815494, + 3499.752914881418, + 478.48207043688035, + 674.8940443454467, + 1612.6769199472467, + 515.8409126097126, + 2659.1616467093972, + 0, + 179.48440423641054, + 119.86700499554048, + 770.327188575003, + 204.20085240630954, + 540.8816545552005, + 103.48755164814875, + 104.35614864137966, + 355.9445509992987, + 64.56491533760007, + 427.3713311730636, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "n_missing": 0 + } + }, + "missing": 0, + "filter_stats": { + "is_cat_date": false, + "filtered": { + "unweighted": { + "selected": 11908, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 11908.000000000002, + "other": 0, + "missing": 0 + } + }, + "filtered_complete": { + "unweighted": { + "selected": 11908, + "other": 0, + "missing": 0 + }, + "weighted": { + "selected": 11908.000000000002, + "other": 0, + "missing": 0 + } + } + }, + "unfiltered": { + "unweighted_n": 11908, + "weighted_n": 11908.000000000002 + }, + "filtered": { + "unweighted_n": 11908, + "weighted_n": 11908.000000000002 + }, + "element": "crunch:cube" + } +} diff --git a/tests/integration/test_cubepart.py b/tests/integration/test_cubepart.py index 193da497f..1f68a9931 100644 --- a/tests/integration/test_cubepart.py +++ b/tests/integration/test_cubepart.py @@ -1903,8 +1903,8 @@ def it_uses_squared_weights_for_effect_calculation(self): np.testing.assert_almost_equal( slice_.pairwise_significance_t_stats(1), [ - [NA, 0.0, 0.0, 0.41513323, NA, NA, NA, -0.83026647, 0.41513323, NA], - [NA, 0.0, 0.0, -0.41513323, NA, NA, NA, 0.83026647, -0.41513323, NA], + [NA, 0.0, 0.0, 0.9486833, NA, NA, NA, -1.8973666, 0.9486833, NA], + [NA, 0.0, 0.0, -0.9486833, NA, NA, NA, 1.8973666, -0.9486833, NA], ], ) diff --git a/tests/integration/test_pairwise_significance.py b/tests/integration/test_pairwise_significance.py index b71f06e2e..1dad43b45 100644 --- a/tests/integration/test_pairwise_significance.py +++ b/tests/integration/test_pairwise_significance.py @@ -438,6 +438,128 @@ def test_cat_x_cat_summary_pairwise_indices(self): expected_indices = np.array([(1, 2), (0, 2), (0, 1)], dtype="i,i") np.testing.assert_array_equal(pairwise_indices, expected_indices) + def test_cat_x_cat_effective_weight_summary_pairwise_indices(self): + slice_ = Cube( + CR.PAIRWISE_HIROTSU_ILLNESS_X_OCCUPATION_SQUARE_WEIGHTS + ).partitions[0] + np.testing.assert_almost_equal( + slice_.columns_squared_base, + np.array( + [ + 1343.11310832, + 937.97855842, + 5983.20458638, + 2139.55397983, + 4745.44814173, + 781.77778492, + 948.64653753, + 2436.03936895, + 674.73665951, + 3524.14881987, + ] + ), + ) + pairwise_indices = slice_.pairwise_indices + np.testing.assert_array_equal( + pairwise_indices, + np.array( + [ + [ + (3, 4, 9), + (4, 9), + (3, 4, 7, 9), + (), + (), + (3, 4, 7, 9), + (9,), + (9,), + (), + (), + ], + [(), (), (), (0, 2, 5), (0, 2, 5), (), (), (), (), (0, 1, 2, 5)], + [(), (), (), (), (), (), (), (), (), ()], + ], + dtype=tuple, + ), + ) + summary_pairwise_indices = slice_.summary_pairwise_indices + np.testing.assert_array_equal( + summary_pairwise_indices, + np.array( + [ + (1, 5, 6, 8), + (5, 8), + (0, 1, 3, 4, 5, 6, 7, 8, 9), + (0, 1, 5, 6, 8), + (0, 1, 3, 5, 6, 7, 8, 9), + (8,), + (8,), + (0, 1, 3, 5, 6, 8), + (), + (0, 1, 3, 5, 6, 7, 8), + ], + dtype=tuple, + ), + ) + + def test_cat_x_cat_plain_weight_summary_pairwise_indices(self): + slice_ = Cube( + CR.PAIRWISE_HIROTSU_ILLNESS_X_OCCUPATION_PLAIN_WEIGHTS + ).partitions[0] + pairwise_indices = slice_.pairwise_indices + np.testing.assert_array_equal( + pairwise_indices, + np.array( + [ + [ + (3, 4, 7, 9), + (3, 4, 7, 9), + (3, 4, 7, 8, 9), + (9,), + (), + (3, 4, 7, 8, 9), + (4, 9), + (9,), + (), + (), + ], + [ + (), + (), + (), + (0, 1, 2, 5, 6), + (0, 1, 2, 5, 6, 7), + (), + (), + (0, 2, 5), + (0, 2, 5), + (0, 1, 2, 5, 6, 7), + ], + [(), (), (), (), (), (), (), (3,), (), ()], + ], + dtype=tuple, + ), + ) + summary_pairwise_indices = slice_.summary_pairwise_indices + np.testing.assert_array_equal( + summary_pairwise_indices, + np.array( + [ + (1, 5, 6, 8), + (5, 8), + (0, 1, 3, 4, 5, 6, 7, 8, 9), + (0, 1, 5, 6, 8), + (0, 1, 3, 5, 6, 7, 8, 9), + (8,), + (8,), + (0, 1, 3, 5, 6, 8), + (), + (0, 1, 3, 5, 6, 7, 8), + ], + dtype=tuple, + ), + ) + def test_cat_x_cat_wgtd_pairwise_t_tests(self): """The weights on this cube demonstrate much higher variance (less extreme t values, and higher associated p-values) than if weighted_n diff --git a/tests/unit/matrix/test_measure.py b/tests/unit/matrix/test_measure.py index ad01ce6e4..91684c574 100644 --- a/tests/unit/matrix/test_measure.py +++ b/tests/unit/matrix/test_measure.py @@ -903,8 +903,8 @@ def it_calculates_the_base_values_to_help( _calculate_t_stats_.assert_called_once_with(pairwise_tstat, 0.5, 1, 9, 0) def it_provides_the_bases_to_help(self, second_order_measures_): - # second_order_measures_.column_unweighted_bases.blocks = [1, 2] - second_order_measures_.column_effective_bases.blocks = [1, 2] + second_order_measures_.column_unweighted_bases.blocks = [1, 2] + second_order_measures_.columns_squared_base.is_defined = False pairwise_tstat = _PairwiseSigTstats(None, second_order_measures_, None, None) assert pairwise_tstat._bases == [1, 2]