Skip to content

Commit

Permalink
[#186590795]: Correct numbers at last
Browse files Browse the repository at this point in the history
  • Loading branch information
slobodan-ilic committed Dec 5, 2023
1 parent 709f634 commit 95339a3
Show file tree
Hide file tree
Showing 9 changed files with 831 additions and 26 deletions.
13 changes: 13 additions & 0 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,19 @@ def columns_base(self):
# --- otherwise columns-base is a vector ---
return self._assemble_marginal(self._measures.columns_unweighted_base)

@lazyproperty
def columns_squared_base(self):
"""1D np.float64 ndarray of squared weights, summed for each column.
This is a measure that needs to be asked from zz9 explicitly. It is only used
in the calculation of the pairwise comparisons, where weights are applied, in
order to adjust for the "design effect" of the study (reduce the inflated Nw).
"""
if not self._measures.columns_squared_base.is_defined:
return None

return self._assemble_marginal(self._measures.columns_squared_base)

@lazyproperty
def columns_dimension_description(self):
"""str description assigned to columns-dimension."""
Expand Down
12 changes: 4 additions & 8 deletions src/cr/cube/matrix/cubemeasure.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,14 @@ def weighted_cube_counts(self):
)

@lazyproperty
def effective_cube_counts(self):
def weighted_squared_cube_counts(self):
"""_BaseSquaredCounts subclass obj for squared weights' counts cube-result."""
squared_counts = self._cube.weighted_squared_counts
if squared_counts is None:
return self.unweighted_cube_counts

valid_counts = self._cube.weighted_valid_counts
counts = valid_counts if valid_counts is not None else self._cube.counts
effectiveness = squared_counts.sum() / self._cube.unweighted_counts.sum() ** 2
effective_counts = counts * effectiveness
return None

return _BaseCubeCounts.factory(
effective_counts, False, self._cube, self._dimensions, self._slice_idx
squared_counts, False, self._cube, self._dimensions, self._slice_idx
)


Expand Down
77 changes: 64 additions & 13 deletions src/cr/cube/matrix/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,9 @@ def column_unweighted_bases(self):
return _ColumnUnweightedBases(self._dimensions, self, self._cube_measures)

@lazyproperty
def column_effective_bases(self):
"""_ColumnEffectiveBases measure object for this cube-result."""
ceb = _ColumnEffectiveBases(self._dimensions, self, self._cube_measures)
return ceb
def column_squared_bases(self):
"""_ColumnSquaredBases measure object for this cube-result."""
return _ColumnSquaredBases(self._dimensions, self, self._cube_measures)

@lazyproperty
def column_weighted_bases(self):
Expand Down Expand Up @@ -183,6 +182,13 @@ def columns_unweighted_base(self):
self._dimensions, self, self._cube_measures, MO.COLUMNS
)

@lazyproperty
def columns_squared_base(self):
"""1D np.float64 ndarray of squared-weighted-N for each matrix column."""
return _MarginSquaredBase(
self._dimensions, self, self._cube_measures, MO.COLUMNS
)

@lazyproperty
def columns_weighted_base(self):
"""_MarginWeightedBase for columns measure object for this cube-result.
Expand Down Expand Up @@ -656,8 +662,12 @@ def _weighted_cube_counts(self):
return self._cube_measures.weighted_cube_counts

@lazyproperty
def _effective_cube_counts(self):
return self._cube_measures.effective_cube_counts
def _weighted_squared_cube_counts(self):
"""_BaseCubeCounts subclass instance for this measure.
Provides cube measures associated with weights' squared counts.
"""
return self._cube_measures.weighted_squared_cube_counts


class _SmoothedMeasure(_BaseSecondOrderMeasure):
Expand Down Expand Up @@ -1139,16 +1149,21 @@ def _subtotal_rows(self):
return np.broadcast_to(self._base_values[0, :], subtotal_rows.shape)


class _ColumnEffectiveBases(_ColumnWeightedBases):
"""Provides the column-effective-bases measure for a matrix."""
class _ColumnSquaredBases(_ColumnWeightedBases):
"""Provides the column-squared-bases measure for a matrix."""

@lazyproperty
def _base_values(self):
"""2D np.float64 ndarray of column-proportion denominator for each cell.
"""2D np.float64 ndarray of squared weight denominator for each column.
This is the first "block" and has the shape of the cube-measure (no insertions).
"""
return self._effective_cube_counts.column_bases
return self._weighted_squared_cube_counts.column_bases

@lazyproperty
def is_defined(self):
"""Bool indicating whether squared weights' counts are defined."""
return self._weighted_squared_cube_counts is not None


class _Means(_BaseSecondOrderMeasure):
Expand Down Expand Up @@ -1510,8 +1525,21 @@ def _base_values(self):
@lazyproperty
def _bases(self):
"""2D array of 2D ndarray "blocks" for the column unweighted bases"""
# return self._second_order_measures.column_unweighted_bases.blocks
return self._second_order_measures.column_effective_bases.blocks
unweighted_blocks = self._second_order_measures.column_unweighted_bases.blocks
if self._second_order_measures.columns_squared_base.is_defined:
squared_blocks = self._second_order_measures.column_squared_bases.blocks
effective_blocks = [
[
unweighted_blocks[0][0] ** 2 / squared_blocks[0][0],
unweighted_blocks[0][1] ** 2 / squared_blocks[0][1],
],
[
unweighted_blocks[1][0] ** 2 / squared_blocks[1][0],
unweighted_blocks[1][1] ** 2 / squared_blocks[1][1],
],
]
return effective_blocks
return unweighted_blocks

def _reference_values(self, block_index):
"""Tuple of the reference proportions and bases for
Expand Down Expand Up @@ -2363,7 +2391,7 @@ def _calculate_zscores(self, counts, table_bases, row_bases, column_bases):
* column_bases
* (table_bases - row_bases)
* (table_bases - column_bases)
/ table_bases**3
/ table_bases ** 3
)
return (counts - expected_counts) / np.sqrt(variance)

Expand Down Expand Up @@ -2493,6 +2521,11 @@ def _counts_are_defined(self):
return self._second_order_measures.column_comparable_counts.is_defined
return self._second_order_measures.row_comparable_counts.is_defined

@lazyproperty
def _squared_weights_are_defined(self):
"""Bool indicating whether squared weights are defined."""
return self._second_order_measures.column_squared_bases.is_defined


class _BaseScaledCountMarginal(_BaseMarginal):
"""A base class for marginals that depend on the scaled counts."""
Expand Down Expand Up @@ -2683,6 +2716,24 @@ def is_defined(self):
return self._counts_are_defined


class _MarginSquaredBase(_BaseMarginal):
"""The 'margin-squared-weight base', a 1D squared-weight base in the margin."""

@lazyproperty
def blocks(self):
"""List of the 2 1D ndarray "blocks" of the squared-weights count margin.
These are the base-values and the subtotals.
"""
bases = self._second_order_measures.column_squared_bases.blocks
return [bases[0][0][0, :], bases[0][1][0, :]]

@lazyproperty
def is_defined(self):
"""True if squared weights' counts are defined."""
return self._squared_weights_are_defined


class _MarginWeightedBase(_BaseMarginal):
"""The 'margin-weighted base', a 1D weighted base in the margin
Expand Down
8 changes: 7 additions & 1 deletion src/cr/cube/measures/pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,13 @@ def summary_t_stats(self):
def t_stats(self):
props = self._slice.column_proportions
diff = props - props[:, [self._col_idx]]
var_props = props * (1.0 - props) / self._slice.columns_base
squared_base = self._slice.columns_squared_base
if squared_base is not None:
weighted_base = self._slice.columns_base
effective_base = weighted_base ** 2 / squared_base
var_props = props * (1.0 - props) / effective_base
else:
var_props = props * (1.0 - props) / self._slice.columns_base
se_diff = np.sqrt(var_props + var_props[:, [self._col_idx]])
return diff / se_diff

Expand Down
Loading

0 comments on commit 95339a3

Please sign in to comment.