Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#186653738] Enhanced Degrees of Freedom Calculation with Squared Weighted Counts #392

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 35 additions & 16 deletions src/cr/cube/matrix/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1519,12 +1519,32 @@ def _base_values(self):
# --- Use "body" reference values for base values
(ref_props, ref_bases) = self._reference_values(0)
return self._calculate_t_stats(
self._proportions[0][0], self._bases[0][0], ref_props, ref_bases
self._proportions[0][0], self._column_bases[0][0], ref_props, ref_bases
)

@lazyproperty
def _bases(self):
"""2D array of 2D ndarray "blocks" for the column unweighted bases"""
def _column_bases(self):
"""
Calculate and return the 2D array of 2D ndarray "blocks" representing the
column bases for analysis. These bases are determined based on the presence
or absence of the 'squared weighted counts'.

The method first checks if the 'squared weighted counts' measure is defined.
If it is, the method calculates the 'effective' counts. These are obtained by
squaring the unweighted counts and then dividing each by the corresponding
squared count. This calculation reflects the 'effective' sample size when
weighting is applied.

If the 'squared weighted counts' measure does not exist, the standard
unweighted counts are used. These counts are represented as a simple 2D array
of 2D ndarray blocks without any modification, directly reflecting the raw,
unweighted counts.

Returns:
numpy.ndarray: A 2D array of 2D ndarray counts, representing the
calculated column bases (either 'effective' or unweighted counts) for
the analysis.
"""
unweighted_blocks = self._second_order_measures.column_unweighted_bases.blocks
if self._second_order_measures.columns_squared_base.is_defined:
squared_blocks = self._second_order_measures.column_squared_bases.blocks
Expand Down Expand Up @@ -1556,10 +1576,10 @@ def _reference_values(self, block_index):
col_idx = self._selected_column_idx
if col_idx < 0:
props = self._proportions[block_index][1]
bases = self._bases[block_index][1]
bases = self._column_bases[block_index][1]
else:
props = self._proportions[block_index][0]
bases = self._bases[block_index][0]
bases = self._column_bases[block_index][0]

return (props[:, [col_idx]], bases[:, [col_idx]])

Expand All @@ -1586,7 +1606,7 @@ def _intersections(self):
# --- Use "inserted" reference values for intersections
(ref_props, ref_variance) = self._reference_values(1)
return self._calculate_t_stats(
self._proportions[1][1], self._bases[1][1], ref_props, ref_variance
self._proportions[1][1], self._column_bases[1][1], ref_props, ref_variance
)

@lazyproperty
Expand All @@ -1600,7 +1620,7 @@ def _subtotal_columns(self):
# --- Use "body" reference values for inserted columns
(ref_props, ref_variance) = self._reference_values(0)
return self._calculate_t_stats(
self._proportions[0][1], self._bases[0][1], ref_props, ref_variance
self._proportions[0][1], self._column_bases[0][1], ref_props, ref_variance
)

@lazyproperty
Expand All @@ -1609,7 +1629,7 @@ def _subtotal_rows(self):
# --- Use "inserted" reference values for inserted rows
(ref_props, ref_variance) = self._reference_values(1)
return self._calculate_t_stats(
self._proportions[1][0], self._bases[1][0], ref_props, ref_variance
self._proportions[1][0], self._column_bases[1][0], ref_props, ref_variance
)


Expand All @@ -1624,18 +1644,18 @@ def blocks(self):
"""2D array of the four 2D "blocks" making up this measure."""
col_idx = self._selected_column_idx
t_stats = self._second_order_measures.pairwise_t_stats(col_idx).blocks
column_bases = self._second_order_measures.column_unweighted_bases.blocks
body_selected_base = self._selected_columns_base(0)
ins_selected_base = self._selected_columns_base(1)
col_bases = self._column_bases

return [
[
self._p_vals(t_stats[0][0], column_bases[0][0], body_selected_base),
self._p_vals(t_stats[0][1], column_bases[0][1], body_selected_base),
self._p_vals(t_stats[0][0], col_bases[0][0], body_selected_base),
self._p_vals(t_stats[0][1], col_bases[0][1], body_selected_base),
],
[
self._p_vals(t_stats[1][0], column_bases[1][0], ins_selected_base),
self._p_vals(t_stats[1][1], column_bases[1][1], ins_selected_base),
self._p_vals(t_stats[1][0], col_bases[1][0], ins_selected_base),
self._p_vals(t_stats[1][1], col_bases[1][1], ins_selected_base),
],
]

Expand All @@ -1661,11 +1681,10 @@ def _selected_columns_base(self, table_index):
don't have to broadcast.
"""
col_idx = self._selected_column_idx
column_bases = self._second_order_measures.column_unweighted_bases.blocks
return (
column_bases[table_index][1][:, [col_idx]]
self._column_bases[table_index][1][:, [col_idx]]
if col_idx < 0
else column_bases[table_index][0][:, [col_idx]]
else self._column_bases[table_index][0][:, [col_idx]]
)


Expand Down
7 changes: 7 additions & 0 deletions tests/integration/test_cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -1907,6 +1907,13 @@ def it_uses_squared_weights_for_effect_calculation(self):
[NA, 0.0, 0.0, -0.9486833, NA, NA, NA, 1.8973666, -0.9486833, NA],
],
)
np.testing.assert_almost_equal(
slice_._measures.pairwise_p_vals(1).blocks[0][0],
[
[NA, 1.0, 1.0, 0.66381998, NA, NA, NA, 0.19510957, 0.66381998, NA],
[NA, 1.0, 1.0, 0.66381998, NA, NA, NA, 0.19510957, 0.66381998, NA],
],
)


class Describe_Strand:
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/matrix/test_measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,7 +907,7 @@ def it_provides_the_bases_to_help(self, second_order_measures_):
second_order_measures_.columns_squared_base.is_defined = False
pairwise_tstat = _PairwiseSigTstats(None, second_order_measures_, None, None)

assert pairwise_tstat._bases == [1, 2]
assert pairwise_tstat._column_bases == [1, 2]

def it_can_calculate_the_t_stat_to_help(self):
pairwise_tstat = _PairwiseSigTstats(None, None, None, None)
Expand Down Expand Up @@ -1039,7 +1039,7 @@ def it_provides_the_subtotal_rows_to_help(

@pytest.fixture
def _bases_prop_(self, request):
return property_mock(request, _PairwiseSigTstats, "_bases")
return property_mock(request, _PairwiseSigTstats, "_column_bases")

@pytest.fixture
def _calculate_t_stats_(self, request):
Expand Down