Merge pull request #398 from Crunch-io/inflate-single-col-filter-cube…

…s-187231707 augment single col filter cubes
Crunch-io · Mar 19, 2024 · ec15755 · ec15755
2 parents 33948c7 + fff9313
commit ec15755
Show file tree

Hide file tree

Showing 2 changed files with 209 additions and 16 deletions.
diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py
@@ -162,6 +162,13 @@ def iter_cubes() -> Iterator[Cube]:
                     population=self._population,
                     mask_size=self._min_base,
                 )
+                cube = (
+                    cube.augment_response(self._cube_responses[0])
+                    if self._is_multi_cube
+                    and cube.is_single_filter_col_cube
+                    and idx > 0
+                    else cube
+                )
                 # --- numeric-measures cubes require inflation to restore their
                 # --- rows-dimension, others don't
                 yield cube.inflate() if self._is_numeric_measure else cube
@@ -245,6 +252,69 @@ def available_measures(self) -> FrozenSet[CUBE_MEASURE]:
         cube_measures = self._cube_response.get("result", {}).get("measures", {}).keys()
         return frozenset(CUBE_MEASURE(m) for m in cube_measures)
 
+    def augment_response(self, summary_cube_resp) -> "Cube":
+        """Inlfate counts, data and elements dict in case of a single filter cube.
+
+        This method is called though the CubeSet while we itearate over all the cube
+        responses. If a cube is a single column filter and its idx > 0 it will be
+        augmented but only if the summary cube shape is different from its own shape.
+
+        In a multitable example we can have a text variable on the rows and a single col
+        filter (text) on the columns. From zz9 result the 2 cubes (summary cube and
+        filter cube) will have a different shape. Basically the filter cube will miss
+        the row labes that have 0 as counts.
+
+            | FCUBE | CAT
+        ----+-------+----
+         A  |   1   |  1
+         B  |   1   |  1
+         C  |   1   |  1
+         D  |       |  1
+         E  |       |  1
+         F  |       |  1
+
+        The FUCBE have D E and F missing cause its results doesn't count them. And the
+        rendering starts from the top without the correct row label association.
+        For a correct result the FCUBE cube_response needs to be augmentes with all the
+        elements of the summary cube and position the values in the corresponding
+        position of the only existing labels in the response.
+
+            | FCUBE | CAT
+        ----+-------+----
+         A  |   0   |  1
+         B  |   0   |  1
+         C  |   1   |  1
+         D  |   1   |  1
+         E  |   1   |  1
+         F  |   0   |  1
+        """
+        cube_resp = self._cube_response
+
+        if len(cube_resp["result"]["counts"]) != len(
+            summary_cube_resp["result"]["counts"]
+        ):
+            elements = summary_cube_resp["result"]["dimensions"][0]["type"]["elements"]
+            values = [
+                el.get("value")
+                for el in cube_resp["result"]["dimensions"][0]["type"]["elements"]
+                if isinstance(el.get("value"), (int, str))
+            ]
+            positions = [item["id"] for item in elements if item["value"] in values]
+            cube_resp["result"]["dimensions"][0]["type"]["elements"] = elements
+            data = [0] * len(summary_cube_resp["result"]["counts"])
+            for pos, value in zip(positions, cube_resp["result"]["counts"]):
+                data[pos] = value
+            cube_resp["result"]["counts"] = data
+            cube_resp["result"]["measures"]["count"]["data"] = data
+            return Cube(
+                cube_resp,
+                self._cube_idx_arg,
+                self._transforms_dict,
+                self._population,
+                self._mask_size,
+            )
+        return self
+
     @lazyproperty
     def counts(self) -> np.ndarray:
         return self.counts_with_missings[self._valid_idxs]
@@ -345,6 +415,11 @@ def has_weighted_counts(self) -> bool:
         """True if cube response has weighted count data."""
         return self.weighted_counts is not None
 
+    @lazyproperty
+    def is_single_filter_col_cube(self) -> float:
+        """bool determines if it is a single column filter cube."""
+        return self._cube_response["result"].get("is_single_col_cube", False)
+
     @lazyproperty
     def means(self) -> Optional[np.ndarray]:
         """Optional float64 ndarray of the cube_means if the measure exists."""
@@ -563,7 +638,7 @@ def _ca_as_0th(self) -> bool:
         a 2D cube-result becomes a single slice.
         """
         return (
-            (self._cube_idx_arg == 0 or self._is_single_filter_col_cube)
+            (self._cube_idx_arg == 0 or self.is_single_filter_col_cube)
             and len(self.dimension_types) > 0
             and self.dimension_types[0] == DT.CA
         )
@@ -585,11 +660,6 @@ def _cube_response(self) -> Dict:
                 f" Cube response must be JSON (str) or dict."
             )
 
-    @lazyproperty
-    def _is_single_filter_col_cube(self) -> float:
-        """bool determines if it is a single column filter cube."""
-        return self._cube_response["result"].get("is_single_col_cube", False)
-
     @lazyproperty
     def _measures(self) -> "_Measures":
         """_Measures object for this cube.
@@ -927,7 +997,8 @@ def _flat_values(self) -> Optional[np.ndarray]:
             return None
         return np.array(
             tuple(
-                np.nan if type(x) is dict else x for x in self._measure_payload["data"]
+                np.nan if isinstance(x, dict) else x
+                for x in self._measure_payload["data"]
             ),
             dtype=np.float64,
         ).flatten()
@@ -969,7 +1040,9 @@ def _flat_values(self) -> Optional[np.ndarray]:
         if measure_payload is None:
             return None
         return np.array(
-            tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]),
+            tuple(
+                np.nan if isinstance(x, dict) else x for x in measure_payload["data"]
+            ),
             dtype=np.float64,
         ).flatten()
 
@@ -989,7 +1062,8 @@ def _flat_values(self) -> Optional[np.ndarray]:
             return None
         return np.array(
             tuple(
-                np.nan if type(x) is dict else x for x in self._measure_payload["data"]
+                np.nan if isinstance(x, dict) else x
+                for x in self._measure_payload["data"]
             ),
             dtype=np.float64,
         ).flatten()
@@ -1028,7 +1102,9 @@ def _flat_values(self) -> Optional[np.ndarray]:
             return None
 
         return np.array(
-            tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]),
+            tuple(
+                np.nan if isinstance(x, dict) else x for x in measure_payload["data"]
+            ),
             dtype=np.float64,
         ).flatten()
 
@@ -1049,7 +1125,9 @@ def _flat_values(self) -> Optional[np.ndarray]:
             return None
 
         return np.array(
-            tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]),
+            tuple(
+                np.nan if isinstance(x, dict) else x for x in measure_payload["data"]
+            ),
             dtype=np.float64,
         ).flatten()
 

diff --git a/tests/unit/test_cube.py b/tests/unit/test_cube.py
@@ -170,6 +170,8 @@ def it_constructs_its_sequence_of_cube_objects_to_help(
         self, request, Cube_, _is_numeric_measure_prop_
     ):
         cubes_ = tuple(instance_mock(request, Cube) for _ in range(4))
+        for c in cubes_:
+            c.is_single_filter_col_cube = False
         Cube_.side_effect = iter(cubes_)
         _is_numeric_measure_prop_.return_value = False
         cube_set = CubeSet(
@@ -211,6 +213,7 @@ def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload(
     ):
         cubes_ = tuple(instance_mock(request, Cube) for _ in range(4))
         cube_.inflate.side_effect = iter(cubes_)
+        cube_.is_single_filter_col_cube = False
         Cube_.return_value = cube_
         _is_numeric_measure_prop_.return_value = True
         cube_set = CubeSet(
@@ -248,6 +251,118 @@ def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload(
         assert cube_.inflate.call_args_list == [call(), call(), call()]
         assert cubes == cubes_[:3]
 
+    def it_constructs_its_sequence_of_augmented_cube_objects_to_help(self, request):
+        cube_set = CubeSet(
+            cube_responses=[
+                {
+                    "result": {
+                        "counts": [1, 1, 1, 0],
+                        "measures": {"count": {"data": [1, 1, 1, 0]}},
+                        "dimensions": [
+                            {
+                                "type": {
+                                    "class": "enum",
+                                    "elements": [
+                                        {"id": 0, "missing": False, "value": "A"},
+                                        {"id": 1, "missing": False, "value": "B"},
+                                        {"id": 2, "missing": False, "value": "C"},
+                                        {"id": -1, "missing": True, "value": {"?": -1}},
+                                    ],
+                                    "subtype": {
+                                        "class": "text",
+                                        "missing_reasons": {"No Data": -1},
+                                        "missing_rules": {},
+                                    },
+                                },
+                            }
+                        ],
+                    }
+                },
+                {
+                    "result": {
+                        "is_single_col_cube": True,
+                        "measures": {"count": {"data": [1, 1, 0]}},
+                        "counts": [1, 1, 0],
+                        "dimensions": [
+                            {
+                                "type": {
+                                    "class": "enum",
+                                    "elements": [
+                                        {"id": 0, "missing": False, "value": "A"},
+                                        {"id": 1, "missing": False, "value": "C"},
+                                        {"id": -1, "missing": True, "value": {"?": -1}},
+                                    ],
+                                    "subtype": {
+                                        "class": "text",
+                                        "missing_reasons": {"No Data": -1},
+                                        "missing_rules": {},
+                                    },
+                                },
+                            }
+                        ],
+                    }
+                },
+                {
+                    "result": {
+                        "is_single_col_cube": True,
+                        "counts": [1, 1, 1, 0],
+                        "measures": {"count": {"data": [1, 1, 1, 0]}},
+                        "dimensions": [
+                            {
+                                "type": {
+                                    "class": "enum",
+                                    "elements": [
+                                        {"id": 0, "missing": False, "value": "A"},
+                                        {"id": 1, "missing": False, "value": "B"},
+                                        {"id": 2, "missing": False, "value": "C"},
+                                        {"id": -1, "missing": True, "value": {"?": -1}},
+                                    ],
+                                    "subtype": {
+                                        "class": "text",
+                                        "missing_reasons": {"No Data": -1},
+                                        "missing_rules": {},
+                                    },
+                                },
+                            }
+                        ],
+                    }
+                },
+            ],
+            transforms=[{"xfrms": 1}, {"xfrms": 2}, {"xfrms": 3}],
+            population=1000,
+            min_base=10,
+        )
+
+        cubes = cube_set._cubes
+        summary_cube = cubes[0]
+        single_col_filter1 = cubes[1]
+        single_col_filter2 = cubes[2]
+
+        assert len(summary_cube.partitions) == len(single_col_filter1.partitions)
+        assert (
+            summary_cube.dimension_types
+            == single_col_filter1.dimension_types
+            == single_col_filter2.dimension_types
+        )
+        assert summary_cube.partitions[0].counts == pytest.approx(np.array([1, 1, 1]))
+        assert single_col_filter1.partitions[0].counts == pytest.approx(
+            np.array([1, 0, 1])
+        )
+        assert single_col_filter2.partitions[0].counts == pytest.approx(
+            np.array([1, 1, 1])
+        )
+        assert (
+            single_col_filter1._cube_response["result"]["dimensions"][0]["type"][
+                "elements"
+            ]
+            == summary_cube._cube_response["result"]["dimensions"][0]["type"][
+                "elements"
+            ]
+            == single_col_filter2._cube_response["result"]["dimensions"][0]["type"][
+                "elements"
+            ]
+        )
+
     @pytest.mark.parametrize(
         ("is_multi_cube", "cube_0_ndim", "expected_value"),
         ((False, 1, False), (False, 0, False), (True, 1, False), (True, 0, True)),
@@ -387,7 +502,7 @@ def it_knows_its_index_within_its_cube_set(self, cube_idx_arg, expected_value):
         assert Cube(None, cube_idx_arg).cube_index == expected_value
 
     @pytest.mark.parametrize(
-        ("dim_types", "cube_idx", "_is_single_filter_col_cube", "expected_value"),
+        ("dim_types", "cube_idx", "is_single_filter_col_cube", "expected_value"),
         (
             ((), 0, False, False),
             ((), 0, True, False),
@@ -404,15 +519,15 @@ def it_knows_ca_as_0th(
         request,
         dim_types,
         cube_idx,
-        _is_single_filter_col_cube,
+        is_single_filter_col_cube,
         expected_value,
         dimension_types_prop_,
     ):
         property_mock(
             request,
             Cube,
-            "_is_single_filter_col_cube",
-            return_value=_is_single_filter_col_cube,
+            "is_single_filter_col_cube",
+            return_value=is_single_filter_col_cube,
         )
         dimension_types_prop_.return_value = dim_types
         cube = Cube(
@@ -443,7 +558,7 @@ def it_knows_if_it_is_a_single_col_filter_cube(
         self, _cube_response_prop_, cube_response, expected_value
     ):
         _cube_response_prop_.return_value = cube_response
-        assert Cube(None)._is_single_filter_col_cube == expected_value
+        assert Cube(None).is_single_filter_col_cube == expected_value
 
     def it_provides_access_to_the_cube_response_dict_to_help(self):
         assert Cube({"cube": "dict"})._cube_response == {"cube": "dict"}