diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index 03f30cd5b..e9f6c3f26 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -162,6 +162,13 @@ def iter_cubes() -> Iterator[Cube]: population=self._population, mask_size=self._min_base, ) + cube = ( + cube.augment_response(self._cube_responses[0]) + if self._is_multi_cube + and cube.is_single_filter_col_cube + and idx > 0 + else cube + ) # --- numeric-measures cubes require inflation to restore their # --- rows-dimension, others don't yield cube.inflate() if self._is_numeric_measure else cube @@ -245,6 +252,69 @@ def available_measures(self) -> FrozenSet[CUBE_MEASURE]: cube_measures = self._cube_response.get("result", {}).get("measures", {}).keys() return frozenset(CUBE_MEASURE(m) for m in cube_measures) + def augment_response(self, summary_cube_resp) -> "Cube": + """Inlfate counts, data and elements dict in case of a single filter cube. + + This method is called though the CubeSet while we itearate over all the cube + responses. If a cube is a single column filter and its idx > 0 it will be + augmented but only if the summary cube shape is different from its own shape. + + In a multitable example we can have a text variable on the rows and a single col + filter (text) on the columns. From zz9 result the 2 cubes (summary cube and + filter cube) will have a different shape. Basically the filter cube will miss + the row labes that have 0 as counts. + + | FCUBE | CAT + ----+-------+---- + A | 1 | 1 + B | 1 | 1 + C | 1 | 1 + D | | 1 + E | | 1 + F | | 1 + + The FUCBE have D E and F missing cause its results doesn't count them. And the + rendering starts from the top without the correct row label association. + For a correct result the FCUBE cube_response needs to be augmentes with all the + elements of the summary cube and position the values in the corresponding + position of the only existing labels in the response. + + | FCUBE | CAT + ----+-------+---- + A | 0 | 1 + B | 0 | 1 + C | 1 | 1 + D | 1 | 1 + E | 1 | 1 + F | 0 | 1 + """ + cube_resp = self._cube_response + + if len(cube_resp["result"]["counts"]) != len( + summary_cube_resp["result"]["counts"] + ): + elements = summary_cube_resp["result"]["dimensions"][0]["type"]["elements"] + values = [ + el.get("value") + for el in cube_resp["result"]["dimensions"][0]["type"]["elements"] + if isinstance(el.get("value"), (int, str)) + ] + positions = [item["id"] for item in elements if item["value"] in values] + cube_resp["result"]["dimensions"][0]["type"]["elements"] = elements + data = [0] * len(summary_cube_resp["result"]["counts"]) + for pos, value in zip(positions, cube_resp["result"]["counts"]): + data[pos] = value + cube_resp["result"]["counts"] = data + cube_resp["result"]["measures"]["count"]["data"] = data + return Cube( + cube_resp, + self._cube_idx_arg, + self._transforms_dict, + self._population, + self._mask_size, + ) + return self + @lazyproperty def counts(self) -> np.ndarray: return self.counts_with_missings[self._valid_idxs] @@ -345,6 +415,11 @@ def has_weighted_counts(self) -> bool: """True if cube response has weighted count data.""" return self.weighted_counts is not None + @lazyproperty + def is_single_filter_col_cube(self) -> float: + """bool determines if it is a single column filter cube.""" + return self._cube_response["result"].get("is_single_col_cube", False) + @lazyproperty def means(self) -> Optional[np.ndarray]: """Optional float64 ndarray of the cube_means if the measure exists.""" @@ -563,7 +638,7 @@ def _ca_as_0th(self) -> bool: a 2D cube-result becomes a single slice. """ return ( - (self._cube_idx_arg == 0 or self._is_single_filter_col_cube) + (self._cube_idx_arg == 0 or self.is_single_filter_col_cube) and len(self.dimension_types) > 0 and self.dimension_types[0] == DT.CA ) @@ -585,11 +660,6 @@ def _cube_response(self) -> Dict: f" Cube response must be JSON (str) or dict." ) - @lazyproperty - def _is_single_filter_col_cube(self) -> float: - """bool determines if it is a single column filter cube.""" - return self._cube_response["result"].get("is_single_col_cube", False) - @lazyproperty def _measures(self) -> "_Measures": """_Measures object for this cube. @@ -927,7 +997,8 @@ def _flat_values(self) -> Optional[np.ndarray]: return None return np.array( tuple( - np.nan if type(x) is dict else x for x in self._measure_payload["data"] + np.nan if isinstance(x, dict) else x + for x in self._measure_payload["data"] ), dtype=np.float64, ).flatten() @@ -969,7 +1040,9 @@ def _flat_values(self) -> Optional[np.ndarray]: if measure_payload is None: return None return np.array( - tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]), + tuple( + np.nan if isinstance(x, dict) else x for x in measure_payload["data"] + ), dtype=np.float64, ).flatten() @@ -989,7 +1062,8 @@ def _flat_values(self) -> Optional[np.ndarray]: return None return np.array( tuple( - np.nan if type(x) is dict else x for x in self._measure_payload["data"] + np.nan if isinstance(x, dict) else x + for x in self._measure_payload["data"] ), dtype=np.float64, ).flatten() @@ -1028,7 +1102,9 @@ def _flat_values(self) -> Optional[np.ndarray]: return None return np.array( - tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]), + tuple( + np.nan if isinstance(x, dict) else x for x in measure_payload["data"] + ), dtype=np.float64, ).flatten() @@ -1049,7 +1125,9 @@ def _flat_values(self) -> Optional[np.ndarray]: return None return np.array( - tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]), + tuple( + np.nan if isinstance(x, dict) else x for x in measure_payload["data"] + ), dtype=np.float64, ).flatten() diff --git a/tests/unit/test_cube.py b/tests/unit/test_cube.py index ab0156a6e..1c9321e9c 100644 --- a/tests/unit/test_cube.py +++ b/tests/unit/test_cube.py @@ -170,6 +170,8 @@ def it_constructs_its_sequence_of_cube_objects_to_help( self, request, Cube_, _is_numeric_measure_prop_ ): cubes_ = tuple(instance_mock(request, Cube) for _ in range(4)) + for c in cubes_: + c.is_single_filter_col_cube = False Cube_.side_effect = iter(cubes_) _is_numeric_measure_prop_.return_value = False cube_set = CubeSet( @@ -211,6 +213,7 @@ def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload( ): cubes_ = tuple(instance_mock(request, Cube) for _ in range(4)) cube_.inflate.side_effect = iter(cubes_) + cube_.is_single_filter_col_cube = False Cube_.return_value = cube_ _is_numeric_measure_prop_.return_value = True cube_set = CubeSet( @@ -248,6 +251,118 @@ def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload( assert cube_.inflate.call_args_list == [call(), call(), call()] assert cubes == cubes_[:3] + def it_constructs_its_sequence_of_augmented_cube_objects_to_help(self, request): + cube_set = CubeSet( + cube_responses=[ + { + "result": { + "counts": [1, 1, 1, 0], + "measures": {"count": {"data": [1, 1, 1, 0]}}, + "dimensions": [ + { + "type": { + "class": "enum", + "elements": [ + {"id": 0, "missing": False, "value": "A"}, + {"id": 1, "missing": False, "value": "B"}, + {"id": 2, "missing": False, "value": "C"}, + {"id": -1, "missing": True, "value": {"?": -1}}, + ], + "subtype": { + "class": "text", + "missing_reasons": {"No Data": -1}, + "missing_rules": {}, + }, + }, + } + ], + } + }, + { + "result": { + "is_single_col_cube": True, + "measures": {"count": {"data": [1, 1, 0]}}, + "counts": [1, 1, 0], + "dimensions": [ + { + "type": { + "class": "enum", + "elements": [ + {"id": 0, "missing": False, "value": "A"}, + {"id": 1, "missing": False, "value": "C"}, + {"id": -1, "missing": True, "value": {"?": -1}}, + ], + "subtype": { + "class": "text", + "missing_reasons": {"No Data": -1}, + "missing_rules": {}, + }, + }, + } + ], + } + }, + { + "result": { + "is_single_col_cube": True, + "counts": [1, 1, 1, 0], + "measures": {"count": {"data": [1, 1, 1, 0]}}, + "dimensions": [ + { + "type": { + "class": "enum", + "elements": [ + {"id": 0, "missing": False, "value": "A"}, + {"id": 1, "missing": False, "value": "B"}, + {"id": 2, "missing": False, "value": "C"}, + {"id": -1, "missing": True, "value": {"?": -1}}, + ], + "subtype": { + "class": "text", + "missing_reasons": {"No Data": -1}, + "missing_rules": {}, + }, + }, + } + ], + } + }, + ], + transforms=[{"xfrms": 1}, {"xfrms": 2}, {"xfrms": 3}], + population=1000, + min_base=10, + ) + + cubes = cube_set._cubes + summary_cube = cubes[0] + single_col_filter1 = cubes[1] + single_col_filter2 = cubes[2] + + assert len(summary_cube.partitions) == len(single_col_filter1.partitions) + assert ( + summary_cube.dimension_types + == single_col_filter1.dimension_types + == single_col_filter2.dimension_types + ) + assert summary_cube.partitions[0].counts == pytest.approx(np.array([1, 1, 1])) + assert single_col_filter1.partitions[0].counts == pytest.approx( + np.array([1, 0, 1]) + ) + assert single_col_filter2.partitions[0].counts == pytest.approx( + np.array([1, 1, 1]) + ) + assert ( + single_col_filter1._cube_response["result"]["dimensions"][0]["type"][ + "elements" + ] + == summary_cube._cube_response["result"]["dimensions"][0]["type"][ + "elements" + ] + == single_col_filter2._cube_response["result"]["dimensions"][0]["type"][ + "elements" + ] + ) + @pytest.mark.parametrize( ("is_multi_cube", "cube_0_ndim", "expected_value"), ((False, 1, False), (False, 0, False), (True, 1, False), (True, 0, True)), @@ -387,7 +502,7 @@ def it_knows_its_index_within_its_cube_set(self, cube_idx_arg, expected_value): assert Cube(None, cube_idx_arg).cube_index == expected_value @pytest.mark.parametrize( - ("dim_types", "cube_idx", "_is_single_filter_col_cube", "expected_value"), + ("dim_types", "cube_idx", "is_single_filter_col_cube", "expected_value"), ( ((), 0, False, False), ((), 0, True, False), @@ -404,15 +519,15 @@ def it_knows_ca_as_0th( request, dim_types, cube_idx, - _is_single_filter_col_cube, + is_single_filter_col_cube, expected_value, dimension_types_prop_, ): property_mock( request, Cube, - "_is_single_filter_col_cube", - return_value=_is_single_filter_col_cube, + "is_single_filter_col_cube", + return_value=is_single_filter_col_cube, ) dimension_types_prop_.return_value = dim_types cube = Cube( @@ -443,7 +558,7 @@ def it_knows_if_it_is_a_single_col_filter_cube( self, _cube_response_prop_, cube_response, expected_value ): _cube_response_prop_.return_value = cube_response - assert Cube(None)._is_single_filter_col_cube == expected_value + assert Cube(None).is_single_filter_col_cube == expected_value def it_provides_access_to_the_cube_response_dict_to_help(self): assert Cube({"cube": "dict"})._cube_response == {"cube": "dict"}