augment single col filter cubes

Crunch-io · Mar 18, 2024 · c8fe440 · c8fe440
1 parent 33948c7
commit c8fe440
Show file tree

Hide file tree

Showing 2 changed files with 156 additions and 7 deletions.
diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py
@@ -144,6 +144,39 @@ def valid_counts_summary_range(self) -> int:
         """The valid count summary values from first cube in this set."""
         return self._cubes[0].valid_counts_summary_range
 
+    def _augment_cube_response(self, cube_response, summary_cube_resp):
+        all_elements = summary_cube_resp["result"]["dimensions"][0]["type"]["elements"]
+        values = [
+            el.get("value")
+            for el in cube_response["result"]["dimensions"][0]["type"]["elements"]
+            if isinstance(el.get("value"), (int, str))
+        ]
+        positions = [item["id"] for item in all_elements if item["value"] in values]
+        cube_response["result"]["dimensions"][0]["type"]["elements"] = all_elements
+        data = [0] * len(summary_cube_resp["result"]["counts"])
+        for pos, value in zip(positions, cube_response["result"]["counts"]):
+            data[pos] = value
+        cube_response["result"]["counts"] = data
+        cube_response["result"]["measures"]["count"]["data"] = data
+        return cube_response
+
+    def _cube_needs_reshaping(self, cube_response, idx):
+        response = (
+            cube_response.get("value") if "value" in cube_response else cube_response
+        )
+        summary_cube_response = (
+            self._cube_responses[0].get("value")
+            if "value" in self._cube_responses[0]
+            else self._cube_responses[0]
+        )
+        return (
+            response["result"].get("is_single_col_cube")
+            and self._is_multi_cube
+            and idx > 0
+            and len(response["result"]["counts"])
+            != len(summary_cube_response["result"]["counts"])
+        )
+
     @lazyproperty
     def _cubes(self) -> Tuple["Cube", ...]:
         """Sequence of Cube objects containing data for this analysis."""
@@ -155,8 +188,13 @@ def iter_cubes() -> Iterator[Cube]:
             missing row dimension.
             """
             for idx, cube_response in enumerate(self._cube_responses):
+                resp = (
+                    self._augment_cube_response(cube_response, self._cube_responses[0])
+                    if self._cube_needs_reshaping(cube_response, idx)
+                    else cube_response
+                )
                 cube = Cube(
-                    cube_response,
+                    resp,
                     cube_idx=idx if self._is_multi_cube else None,
                     transforms=self._transforms_dicts[idx],
                     population=self._population,
@@ -927,7 +965,8 @@ def _flat_values(self) -> Optional[np.ndarray]:
             return None
         return np.array(
             tuple(
-                np.nan if type(x) is dict else x for x in self._measure_payload["data"]
+                np.nan if isinstance(x, dict) else x
+                for x in self._measure_payload["data"]
             ),
             dtype=np.float64,
         ).flatten()
@@ -969,7 +1008,9 @@ def _flat_values(self) -> Optional[np.ndarray]:
         if measure_payload is None:
             return None
         return np.array(
-            tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]),
+            tuple(
+                np.nan if isinstance(x, dict) else x for x in measure_payload["data"]
+            ),
             dtype=np.float64,
         ).flatten()
 
@@ -989,7 +1030,8 @@ def _flat_values(self) -> Optional[np.ndarray]:
             return None
         return np.array(
             tuple(
-                np.nan if type(x) is dict else x for x in self._measure_payload["data"]
+                np.nan if isinstance(x, dict) else x
+                for x in self._measure_payload["data"]
             ),
             dtype=np.float64,
         ).flatten()
@@ -1028,7 +1070,9 @@ def _flat_values(self) -> Optional[np.ndarray]:
             return None
 
         return np.array(
-            tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]),
+            tuple(
+                np.nan if isinstance(x, dict) else x for x in measure_payload["data"]
+            ),
             dtype=np.float64,
         ).flatten()
 
@@ -1049,7 +1093,9 @@ def _flat_values(self) -> Optional[np.ndarray]:
             return None
 
         return np.array(
-            tuple(np.nan if type(x) is dict else x for x in measure_payload["data"]),
+            tuple(
+                np.nan if isinstance(x, dict) else x for x in measure_payload["data"]
+            ),
             dtype=np.float64,
         ).flatten()
 

diff --git a/tests/unit/test_cube.py b/tests/unit/test_cube.py
@@ -16,7 +16,7 @@
 from cr.cube.enums import DIMENSION_TYPE as DT
 
 from ..fixtures import CR  # ---mnemonic: CR = 'cube-response'---
-from ..unitutil import call, class_mock, instance_mock, property_mock
+from ..unitutil import call, class_mock, instance_mock, method_mock, property_mock
 
 
 class DescribeCubeSet:
@@ -172,6 +172,7 @@ def it_constructs_its_sequence_of_cube_objects_to_help(
         cubes_ = tuple(instance_mock(request, Cube) for _ in range(4))
         Cube_.side_effect = iter(cubes_)
         _is_numeric_measure_prop_.return_value = False
+        method_mock(request, CubeSet, "_cube_needs_reshaping", return_value=False)
         cube_set = CubeSet(
             cube_responses=[{"cube": "resp-1"}, {"cube": "resp-2"}, {"cube": "resp-3"}],
             transforms=[{"xfrms": 1}, {"xfrms": 2}, {"xfrms": 3}],
@@ -213,6 +214,7 @@ def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload(
         cube_.inflate.side_effect = iter(cubes_)
         Cube_.return_value = cube_
         _is_numeric_measure_prop_.return_value = True
+        method_mock(request, CubeSet, "_cube_needs_reshaping", return_value=False)
         cube_set = CubeSet(
             cube_responses=[{"cube": "resp-1"}, {"cube": "resp-2"}, {"cube": "resp-3"}],
             transforms=[{"xfrms": 1}, {"xfrms": 2}, {"xfrms": 3}],
@@ -248,6 +250,107 @@ def but_it_inflates_the_cubes_in_special_case_of_numeric_mean_payload(
         assert cube_.inflate.call_args_list == [call(), call(), call()]
         assert cubes == cubes_[:3]
 
+    def it_constructs_its_sequence_of_augmented_cube_objects_to_help(self, request):
+        # cubes_ = tuple(instance_mock(request, Cube) for _ in range(2))
+        # cube_.inflate.side_effect = iter(cubes_)
+        # Cube_.return_value = cube_
+        method_mock(request, CubeSet, "_cube_needs_reshaping", return_value=True)
+        cube_set = CubeSet(
+            cube_responses=[
+                {
+                    "result": {
+                        "counts": [1, 1, 1, 0],
+                        "measures": {"count": {"data": [1, 1, 1, 0]}},
+                        "dimensions": [
+                            {
+                                "type": {
+                                    "class": "enum",
+                                    "elements": [
+                                        {
+                                            "id": 0,
+                                            "missing": False,
+                                            "value": "1955-11-06",
+                                        },
+                                        {
+                                            "id": 1,
+                                            "missing": False,
+                                            "value": "1955-11-07",
+                                        },
+                                        {
+                                            "id": 2,
+                                            "missing": False,
+                                            "value": "1955-11-08",
+                                        },
+                                        {"id": -1, "missing": True, "value": {"?": -1}},
+                                    ],
+                                    "subtype": {
+                                        "class": "datetime",
+                                        "missing_reasons": {"No Data": -1},
+                                        "missing_rules": {},
+                                        "resolution": "D",
+                                    },
+                                },
+                            }
+                        ],
+                    }
+                },
+                {
+                    "result": {
+                        "measures": {"count": {"data": [1, 1, 0]}},
+                        "counts": [1, 1, 0],
+                        "dimensions": [
+                            {
+                                "type": {
+                                    "class": "enum",
+                                    "elements": [
+                                        {
+                                            "id": 0,
+                                            "missing": False,
+                                            "value": "1955-11-06",
+                                        },
+                                        {
+                                            "id": 1,
+                                            "missing": False,
+                                            "value": "1955-11-07",
+                                        },
+                                        {"id": -1, "missing": True, "value": {"?": -1}},
+                                    ],
+                                    "subtype": {
+                                        "class": "datetime",
+                                        "missing_reasons": {"No Data": -1},
+                                        "missing_rules": {},
+                                        "resolution": "D",
+                                    },
+                                },
+                            }
+                        ],
+                    }
+                },
+            ],
+            transforms=[{"xfrms": 1}, {"xfrms": 2}],
+            population=1000,
+            min_base=10,
+        )
+
+        cubes = cube_set._cubes
+        summary_cube = cubes[0]
+        single_col_filter = cubes[1]
+
+        assert len(summary_cube.partitions) == len(single_col_filter.partitions)
+        assert summary_cube.dimension_types == single_col_filter.dimension_types
+        assert summary_cube.partitions[0].counts == pytest.approx(np.array([1, 1, 1]))
+        assert single_col_filter.partitions[0].counts == pytest.approx(
+            np.array([1, 1, 0])
+        )
+        assert (
+            single_col_filter._cube_response["result"]["dimensions"][0]["type"][
+                "elements"
+            ]
+            == summary_cube._cube_response["result"]["dimensions"][0]["type"][
+                "elements"
+            ]
+        )
+
     @pytest.mark.parametrize(
         ("is_multi_cube", "cube_0_ndim", "expected_value"),
         ((False, 1, False), (False, 0, False), (True, 1, False), (True, 0, True)),