Skip to content

Commit

Permalink
Merge pull request #391 from Crunch-io/squared-weights-186590795
Browse files Browse the repository at this point in the history
[#186590795]: Implement squared weights in `cr.cube`
  • Loading branch information
slobodan-ilic authored Dec 7, 2023
2 parents db39964 + 9443014 commit 981cbe9
Show file tree
Hide file tree
Showing 14 changed files with 1,286 additions and 2 deletions.
31 changes: 31 additions & 0 deletions src/cr/cube/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,15 @@ def weighted_valid_counts(self) -> Optional[np.ndarray]:
self._valid_idxs
].astype(np.float64)

@lazyproperty
def weighted_squared_counts(self) -> Optional[np.ndarray]:
"""Optional float64 ndarray of weighted_squared_counts if the measure exists."""
if self._measures.weighted_squared_counts is None:
return None
return self._measures.weighted_squared_counts.raw_cube_array[
self._valid_idxs
].astype(np.float64)

@lazyproperty
def _all_dimensions(self) -> list:
"""List of all dimensions (not just user-apparent ones) for this cube."""
Expand Down Expand Up @@ -847,6 +856,14 @@ def weighted_valid_counts(self) -> 'Optional["_WeightedValidCountsMeasure"]':
)
return valid_counts if valid_counts.raw_cube_array is not None else None

@lazyproperty
def weighted_squared_counts(self):
"""Return object of class for representing squared weights."""
squared_counts = _WeightedSquaredCountsMeasure(
self._cube_dict, self._all_dimensions, self._cube_idx_arg
)
return squared_counts if squared_counts.raw_cube_array is not None else None


class _BaseMeasure:
"""Base class for measure objects."""
Expand Down Expand Up @@ -1100,6 +1117,20 @@ def _flat_values(self) -> Optional[np.ndarray]:
return np.array(weighted_counts, dtype=np.float64)


class _WeightedSquaredCountsMeasure(_BaseMeasure):
"""Weighted squared counts for cube."""

@lazyproperty
def _flat_values(self) -> Optional[np.ndarray]:
"""Optional 1D np.ndarray of np.float64 weighted squared counts."""
squared_counts = (
self._cube_dict["result"]["measures"]
.get("weighted_squared_count", {})
.get("data", [])
)
return np.array(squared_counts, dtype=np.float64) if squared_counts else None


class _WeightedValidCountsMeasure(_BaseMeasure):
"""Weighted Valid counts for cube."""

Expand Down
13 changes: 13 additions & 0 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,19 @@ def columns_base(self):
# --- otherwise columns-base is a vector ---
return self._assemble_marginal(self._measures.columns_unweighted_base)

@lazyproperty
def columns_squared_base(self):
"""1D np.float64 ndarray of squared weights, summed for each column.
This is a measure that needs to be asked from zz9 explicitly. It is only used
in the calculation of the pairwise comparisons, where weights are applied, in
order to adjust for the "design effect" of the study (reduce the inflated Nw).
"""
if not self._measures.columns_squared_base.is_defined:
return None

return self._assemble_marginal(self._measures.columns_squared_base)

@lazyproperty
def columns_dimension_description(self):
"""str description assigned to columns-dimension."""
Expand Down
1 change: 1 addition & 0 deletions src/cr/cube/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ class CUBE_MEASURE(enum.Enum):
VALID_OVERLAP = "valid_overlap"
UNWEIGHTED_VALID_COUNT = "valid_count_unweighted"
WEIGHTED_VALID_COUNT = "valid_count_weighted"
WEIGHTED_SQUARED_COUNT = "weighted_squared_count"


NUMERIC_CUBE_MEASURES = frozenset(
Expand Down
11 changes: 11 additions & 0 deletions src/cr/cube/matrix/cubemeasure.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,17 @@ def weighted_cube_counts(self):
counts, diff_nans, self._cube, self._dimensions, self._slice_idx
)

@lazyproperty
def weighted_squared_cube_counts(self):
"""_BaseSquaredCounts subclass obj for squared weights' counts cube-result."""
squared_counts = self._cube.weighted_squared_counts
if squared_counts is None:
return None

return _BaseCubeCounts.factory(
squared_counts, False, self._cube, self._dimensions, self._slice_idx
)


class _BaseCubeMeasure:
"""Base class for all cube-measure objects."""
Expand Down
76 changes: 75 additions & 1 deletion src/cr/cube/matrix/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ def column_unweighted_bases(self):
"""_ColumnUnweightedBases measure object for this cube-result."""
return _ColumnUnweightedBases(self._dimensions, self, self._cube_measures)

@lazyproperty
def column_squared_bases(self):
"""_ColumnSquaredBases measure object for this cube-result."""
return _ColumnSquaredBases(self._dimensions, self, self._cube_measures)

@lazyproperty
def column_weighted_bases(self):
"""_ColumnWeightedBases measure object for this cube-result."""
Expand Down Expand Up @@ -177,6 +182,13 @@ def columns_unweighted_base(self):
self._dimensions, self, self._cube_measures, MO.COLUMNS
)

@lazyproperty
def columns_squared_base(self):
"""1D np.float64 ndarray of squared-weighted-N for each matrix column."""
return _MarginSquaredBase(
self._dimensions, self, self._cube_measures, MO.COLUMNS
)

@lazyproperty
def columns_weighted_base(self):
"""_MarginWeightedBase for columns measure object for this cube-result.
Expand Down Expand Up @@ -649,6 +661,14 @@ def _weighted_cube_counts(self):
"""
return self._cube_measures.weighted_cube_counts

@lazyproperty
def _weighted_squared_cube_counts(self):
"""_BaseCubeCounts subclass instance for this measure.
Provides cube measures associated with weights' squared counts.
"""
return self._cube_measures.weighted_squared_cube_counts


class _SmoothedMeasure(_BaseSecondOrderMeasure):
"""Mixin providing `._smoother` property for smoothed measures."""
Expand Down Expand Up @@ -1129,6 +1149,23 @@ def _subtotal_rows(self):
return np.broadcast_to(self._base_values[0, :], subtotal_rows.shape)


class _ColumnSquaredBases(_ColumnWeightedBases):
"""Provides the column-squared-bases measure for a matrix."""

@lazyproperty
def _base_values(self):
"""2D np.float64 ndarray of squared weight denominator for each column.
This is the first "block" and has the shape of the cube-measure (no insertions).
"""
return self._weighted_squared_cube_counts.column_bases

@lazyproperty
def is_defined(self):
"""Bool indicating whether squared weights' counts are defined."""
return self._weighted_squared_cube_counts is not None


class _Means(_BaseSecondOrderMeasure):
"""Provides the mean measure for a matrix."""

Expand Down Expand Up @@ -1488,7 +1525,21 @@ def _base_values(self):
@lazyproperty
def _bases(self):
"""2D array of 2D ndarray "blocks" for the column unweighted bases"""
return self._second_order_measures.column_unweighted_bases.blocks
unweighted_blocks = self._second_order_measures.column_unweighted_bases.blocks
if self._second_order_measures.columns_squared_base.is_defined:
squared_blocks = self._second_order_measures.column_squared_bases.blocks
effective_blocks = [
[
unweighted_blocks[0][0] ** 2 / squared_blocks[0][0],
unweighted_blocks[0][1] ** 2 / squared_blocks[0][1],
],
[
unweighted_blocks[1][0] ** 2 / squared_blocks[1][0],
unweighted_blocks[1][1] ** 2 / squared_blocks[1][1],
],
]
return effective_blocks
return unweighted_blocks

def _reference_values(self, block_index):
"""Tuple of the reference proportions and bases for
Expand Down Expand Up @@ -2470,6 +2521,11 @@ def _counts_are_defined(self):
return self._second_order_measures.column_comparable_counts.is_defined
return self._second_order_measures.row_comparable_counts.is_defined

@lazyproperty
def _squared_weights_are_defined(self):
"""Bool indicating whether squared weights are defined."""
return self._second_order_measures.column_squared_bases.is_defined


class _BaseScaledCountMarginal(_BaseMarginal):
"""A base class for marginals that depend on the scaled counts."""
Expand Down Expand Up @@ -2660,6 +2716,24 @@ def is_defined(self):
return self._counts_are_defined


class _MarginSquaredBase(_BaseMarginal):
"""The 'margin-squared-weight base', a 1D squared-weight base in the margin."""

@lazyproperty
def blocks(self):
"""List of the 2 1D ndarray "blocks" of the squared-weights count margin.
These are the base-values and the subtotals.
"""
bases = self._second_order_measures.column_squared_bases.blocks
return [bases[0][0][0, :], bases[0][1][0, :]]

@lazyproperty
def is_defined(self):
"""True if squared weights' counts are defined."""
return self._squared_weights_are_defined


class _MarginWeightedBase(_BaseMarginal):
"""The 'margin-weighted base', a 1D weighted base in the margin
Expand Down
8 changes: 7 additions & 1 deletion src/cr/cube/measures/pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,13 @@ def summary_t_stats(self):
def t_stats(self):
props = self._slice.column_proportions
diff = props - props[:, [self._col_idx]]
var_props = props * (1.0 - props) / self._slice.columns_base
squared_base = self._slice.columns_squared_base
if squared_base is not None:
weighted_base = self._slice.columns_base
effective_base = weighted_base**2 / squared_base
var_props = props * (1.0 - props) / effective_base
else:
var_props = props * (1.0 - props) / self._slice.columns_base
se_diff = np.sqrt(var_props + var_props[:, [self._col_idx]])
return diff / se_diff

Expand Down
Loading

0 comments on commit 981cbe9

Please sign in to comment.