Skip to content

Commit

Permalink
Merge pull request #398 from Crunch-io/inflate-single-col-filter-cube…
Browse files Browse the repository at this point in the history
…s-187231707

augment single col filter cubes
  • Loading branch information
ernestoarbitrio authored Mar 19, 2024
2 parents 33948c7 + fff9313 commit ec15755
Show file tree
Hide file tree
Showing 2 changed files with 209 additions and 16 deletions.
100 changes: 89 additions & 11 deletions src/cr/cube/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@ def iter_cubes() -> Iterator[Cube]:
population=self._population,
mask_size=self._min_base,
)
cube = (
cube.augment_response(self._cube_responses[0])
if self._is_multi_cube
and cube.is_single_filter_col_cube
and idx > 0
else cube
)
# --- numeric-measures cubes require inflation to restore their
# --- rows-dimension, others don't
yield cube.inflate() if self._is_numeric_measure else cube
Expand Down Expand Up @@ -245,6 +252,69 @@ def available_measures(self) -> FrozenSet[CUBE_MEASURE]:
cube_measures = self._cube_response.get("result", {}).get("measures", {}).keys()
return frozenset(CUBE_MEASURE(m) for m in cube_measures)

def augment_response(self, summary_cube_resp) -> "Cube":
"""Inlfate counts, data and elements dict in case of a single filter cube.
This method is called though the CubeSet while we itearate over all the cube
responses. If a cube is a single column filter and its idx > 0 it will be
augmented but only if the summary cube shape is different from its own shape.
In a multitable example we can have a text variable on the rows and a single col
filter (text) on the columns. From zz9 result the 2 cubes (summary cube and
filter cube) will have a different shape. Basically the filter cube will miss
the row labes that have 0 as counts.
| FCUBE | CAT
----+-------+----
A | 1 | 1
B | 1 | 1
C | 1 | 1
D | | 1
E | | 1
F | | 1
The FUCBE have D E and F missing cause its results doesn't count them. And the
rendering starts from the top without the correct row label association.
For a correct result the FCUBE cube_response needs to be augmentes with all the
elements of the summary cube and position the values in the corresponding
position of the only existing labels in the response.
| FCUBE | CAT
----+-------+----
A | 0 | 1
B | 0 | 1
C | 1 | 1
D | 1 | 1
E | 1 | 1
F | 0 | 1
"""
cube_resp = self._cube_response

if len(cube_resp["result"]["counts"]) != len(
summary_cube_resp["result"]["counts"]
):
elements = summary_cube_resp["result"]["dimensions"][0]["type"]["elements"]
values = [
el.get("value")
for el in cube_resp["result"]["dimensions"][0]["type"]["elements"]
if isinstance(el.get("value"), (int, str))
]
positions = [item["id"] for item in elements if item["value"] in values]
cube_resp["result"]["dimensions"][0]["type"]["elements"] = elements
data = [0] * len(summary_cube_resp["result"]["counts"])
for pos, value in zip(positions, cube_resp["result"]["counts"]):
data[pos] = value
cube_resp["result"]["counts"] = data
cube_resp["result"]["measures"]["count"]["data"] = data
return Cube(
cube_resp,
self._cube_idx_arg,
self._transforms_dict,
self._population,
self._mask_size,
)
return self

@lazyproperty
def counts(self) -> np.ndarray:
return self.counts_with_missings[self._valid_idxs]
Expand Down Expand Up @@ -345,6 +415,11 @@ def has_weighted_counts(self) -> bool:
"""True if cube response has weighted count data."""
return self.weighted_counts is not None

@lazyproperty
def is_single_filter_col_cube(self) -> float:
"""bool determines if it is a single column filter cube."""
return self._cube_response["result"].get("is_single_col_cube", False)

@lazyproperty
def means(self) -> Optional[np.ndarray]:
"""Optional float64 ndarray of the cube_means if the measure exists."""
Expand Down Expand Up @@ -563,7 +638,7 @@ def _ca_as_0th(self) -> bool:
a 2D cube-result becomes a single slice.
"""
return (
(self._cube_idx_arg == 0 or self._is_single_filter_col_cube)
(self._cube_idx_arg == 0 or self.is_single_filter_col_cube)
and len(self.dimension_types) > 0
and self.dimension_types[0] == DT.CA
)
Expand All @@ -585,11 +660,6 @@ def _cube_response(self) -> Dict:
f" Cube response must be JSON (str) or dict."
)

@lazyproperty
def _is_single_filter_col_cube(self) -> float:
"""bool determines if it is a single column filter cube."""
return self._cube_response["result"].get("is_single_col_cube", False)

@lazyproperty
def _measures(self) -> "_Measures":
"""_Measures object for this cube.
Expand Down Expand Up @@ -927,7 +997,8 @@ def _flat_values(self) -> Optional[np.ndarray]:
return None
return np.array(
tuple(
np.nan if type(x) is dict else x for x in self._measure_payload["data"]
np.nan if isinstance(x, dict) else x
for x in self._measure_payload["data"]
),
dtype=np.float64,
).flatten()
Expand Down Expand Up @@ -969,7 +1040,9 @@ def _flat_values(self) -> Optional[np.ndarray]:
if measure_payload is None:
return None
return np.array(
tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]),
tuple(
np.nan if isinstance(x, dict) else x for x in measure_payload["data"]
),
dtype=np.float64,
).flatten()

Expand All @@ -989,7 +1062,8 @@ def _flat_values(self) -> Optional[np.ndarray]:
return None
return np.array(
tuple(
np.nan if type(x) is dict else x for x in self._measure_payload["data"]
np.nan if isinstance(x, dict) else x
for x in self._measure_payload["data"]
),
dtype=np.float64,
).flatten()
Expand Down Expand Up @@ -1028,7 +1102,9 @@ def _flat_values(self) -> Optional[np.ndarray]:
return None

return np.array(
tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]),
tuple(
np.nan if isinstance(x, dict) else x for x in measure_payload["data"]
),
dtype=np.float64,
).flatten()

Expand All @@ -1049,7 +1125,9 @@ def _flat_values(self) -> Optional[np.ndarray]:
return None

return np.array(
tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]),
tuple(
np.nan if isinstance(x, dict) else x for x in measure_payload["data"]
),
dtype=np.float64,
).flatten()

Expand Down
125 changes: 120 additions & 5 deletions tests/unit/test_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ def it_constructs_its_sequence_of_cube_objects_to_help(
self, request, Cube_, _is_numeric_measure_prop_
):
cubes_ = tuple(instance_mock(request, Cube) for _ in range(4))
for c in cubes_:
c.is_single_filter_col_cube = False
Cube_.side_effect = iter(cubes_)
_is_numeric_measure_prop_.return_value = False
cube_set = CubeSet(
Expand Down Expand Up @@ -211,6 +213,7 @@ def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload(
):
cubes_ = tuple(instance_mock(request, Cube) for _ in range(4))
cube_.inflate.side_effect = iter(cubes_)
cube_.is_single_filter_col_cube = False
Cube_.return_value = cube_
_is_numeric_measure_prop_.return_value = True
cube_set = CubeSet(
Expand Down Expand Up @@ -248,6 +251,118 @@ def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload(
assert cube_.inflate.call_args_list == [call(), call(), call()]
assert cubes == cubes_[:3]

def it_constructs_its_sequence_of_augmented_cube_objects_to_help(self, request):
cube_set = CubeSet(
cube_responses=[
{
"result": {
"counts": [1, 1, 1, 0],
"measures": {"count": {"data": [1, 1, 1, 0]}},
"dimensions": [
{
"type": {
"class": "enum",
"elements": [
{"id": 0, "missing": False, "value": "A"},
{"id": 1, "missing": False, "value": "B"},
{"id": 2, "missing": False, "value": "C"},
{"id": -1, "missing": True, "value": {"?": -1}},
],
"subtype": {
"class": "text",
"missing_reasons": {"No Data": -1},
"missing_rules": {},
},
},
}
],
}
},
{
"result": {
"is_single_col_cube": True,
"measures": {"count": {"data": [1, 1, 0]}},
"counts": [1, 1, 0],
"dimensions": [
{
"type": {
"class": "enum",
"elements": [
{"id": 0, "missing": False, "value": "A"},
{"id": 1, "missing": False, "value": "C"},
{"id": -1, "missing": True, "value": {"?": -1}},
],
"subtype": {
"class": "text",
"missing_reasons": {"No Data": -1},
"missing_rules": {},
},
},
}
],
}
},
{
"result": {
"is_single_col_cube": True,
"counts": [1, 1, 1, 0],
"measures": {"count": {"data": [1, 1, 1, 0]}},
"dimensions": [
{
"type": {
"class": "enum",
"elements": [
{"id": 0, "missing": False, "value": "A"},
{"id": 1, "missing": False, "value": "B"},
{"id": 2, "missing": False, "value": "C"},
{"id": -1, "missing": True, "value": {"?": -1}},
],
"subtype": {
"class": "text",
"missing_reasons": {"No Data": -1},
"missing_rules": {},
},
},
}
],
}
},
],
transforms=[{"xfrms": 1}, {"xfrms": 2}, {"xfrms": 3}],
population=1000,
min_base=10,
)

cubes = cube_set._cubes
summary_cube = cubes[0]
single_col_filter1 = cubes[1]
single_col_filter2 = cubes[2]

assert len(summary_cube.partitions) == len(single_col_filter1.partitions)
assert (
summary_cube.dimension_types
== single_col_filter1.dimension_types
== single_col_filter2.dimension_types
)
assert summary_cube.partitions[0].counts == pytest.approx(np.array([1, 1, 1]))
assert single_col_filter1.partitions[0].counts == pytest.approx(
np.array([1, 0, 1])
)
assert single_col_filter2.partitions[0].counts == pytest.approx(
np.array([1, 1, 1])
)
assert (
single_col_filter1._cube_response["result"]["dimensions"][0]["type"][
"elements"
]
== summary_cube._cube_response["result"]["dimensions"][0]["type"][
"elements"
]
== single_col_filter2._cube_response["result"]["dimensions"][0]["type"][
"elements"
]
)

@pytest.mark.parametrize(
("is_multi_cube", "cube_0_ndim", "expected_value"),
((False, 1, False), (False, 0, False), (True, 1, False), (True, 0, True)),
Expand Down Expand Up @@ -387,7 +502,7 @@ def it_knows_its_index_within_its_cube_set(self, cube_idx_arg, expected_value):
assert Cube(None, cube_idx_arg).cube_index == expected_value

@pytest.mark.parametrize(
("dim_types", "cube_idx", "_is_single_filter_col_cube", "expected_value"),
("dim_types", "cube_idx", "is_single_filter_col_cube", "expected_value"),
(
((), 0, False, False),
((), 0, True, False),
Expand All @@ -404,15 +519,15 @@ def it_knows_ca_as_0th(
request,
dim_types,
cube_idx,
_is_single_filter_col_cube,
is_single_filter_col_cube,
expected_value,
dimension_types_prop_,
):
property_mock(
request,
Cube,
"_is_single_filter_col_cube",
return_value=_is_single_filter_col_cube,
"is_single_filter_col_cube",
return_value=is_single_filter_col_cube,
)
dimension_types_prop_.return_value = dim_types
cube = Cube(
Expand Down Expand Up @@ -443,7 +558,7 @@ def it_knows_if_it_is_a_single_col_filter_cube(
self, _cube_response_prop_, cube_response, expected_value
):
_cube_response_prop_.return_value = cube_response
assert Cube(None)._is_single_filter_col_cube == expected_value
assert Cube(None).is_single_filter_col_cube == expected_value

def it_provides_access_to_the_cube_response_dict_to_help(self):
assert Cube({"cube": "dict"})._cube_response == {"cube": "dict"}
Expand Down

0 comments on commit ec15755

Please sign in to comment.