Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove deepcopy from cube response #394

Merged
merged 11 commits into from
Feb 4, 2024
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
- id: trailing-whitespace

- repo: https://github.com/ambv/black
rev: 22.6.0
rev: 24.1.1
hooks:
- id: black

Expand Down
47 changes: 21 additions & 26 deletions src/cr/cube/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
CubeSet is the main API class for manipulating Crunch.io JSON cube responses.
"""

import copy
import json
from typing import Dict, FrozenSet, Iterator, List, Optional, Tuple, Union

Expand Down Expand Up @@ -260,11 +259,15 @@ def counts_with_missings(self) -> np.ndarray:
return (
self._measures.weighted_valid_counts.raw_cube_array
if self._measures.weighted_valid_counts is not None
else self._measures.unweighted_valid_counts.raw_cube_array
if self._measures.unweighted_valid_counts is not None
else self._measures.weighted_counts.raw_cube_array
if self.has_weighted_counts
else self._measures.unweighted_counts.raw_cube_array
else (
self._measures.unweighted_valid_counts.raw_cube_array
if self._measures.unweighted_valid_counts is not None
else (
self._measures.weighted_counts.raw_cube_array
if self.has_weighted_counts
else self._measures.unweighted_counts.raw_cube_array
)
)
)

@lazyproperty
Expand Down Expand Up @@ -312,8 +315,10 @@ def inflate(self) -> "Cube":
A multi-cube (tabbook) response formed from a function (e.g. mean()) on
a numeric variable arrives without a rows-dimension.
"""
cube_dict = self._cube_dict
dimensions = cube_dict["result"]["dimensions"]
cube_dict = self._cube_response
num_array_dim = self._numeric_array_dimension or None
dims = cube_dict["result"]["dimensions"]
dimensions = [num_array_dim] + dims if num_array_dim else dims
default_name = "-".join([m.value for m in self._available_numeric_measures])
# --- The default value in case of numeric variable is the combination of all
# --- the measures expressed in the cube response.
Expand Down Expand Up @@ -435,7 +440,7 @@ def title(self) -> str:
use-case it is a stand-in for the columns-dimension name since a strand has no
columns dimension.
"""
return self._cube_dict["result"].get("title", "Untitled")
return self._cube_response["result"].get("title", "Untitled")

@lazyproperty
def unweighted_counts(self) -> np.ndarray:
Expand Down Expand Up @@ -533,9 +538,12 @@ def weighted_squared_counts(self) -> Optional[np.ndarray]:
].astype(np.float64)

@lazyproperty
def _all_dimensions(self) -> list:
def _all_dimensions(self) -> Dimensions:
"""List of all dimensions (not just user-apparent ones) for this cube."""
return Dimensions.from_dicts(self._cube_dict["result"]["dimensions"])
num_array_dim = self._numeric_array_dimension or None
dims = self._cube_response["result"]["dimensions"]
dimension_dicts = [num_array_dim] + dims if num_array_dim else dims
return Dimensions.from_dicts(dimension_dicts)

@lazyproperty
def _available_numeric_measures(self) -> Tuple[CUBE_MEASURE, ...]:
Expand All @@ -560,19 +568,6 @@ def _ca_as_0th(self) -> bool:
and self.dimension_types[0] == DT.CA
)

@lazyproperty
def _cube_dict(self) -> Dict:
"""dict containing raw cube response, parsed from JSON payload."""
cube_dict = copy.deepcopy(self._cube_response)
if self._numeric_measure_subvariables:
dimensions = cube_dict.get("result", {}).get("dimensions", [])
# ---dim inflation---
# ---In case of numeric arrays, we need to inflate the row dimension
# ---according to the mean subvariables. For each subvar the row dimension
# ---will have a new element related to the subvar metadata.
dimensions.insert(0, self._numeric_array_dimension)
return cube_dict

@lazyproperty
def _cube_response(self) -> Dict:
"""dict representing the parsed cube response arguments."""
Expand All @@ -593,7 +588,7 @@ def _cube_response(self) -> Dict:
@lazyproperty
def _is_single_filter_col_cube(self) -> float:
"""bool determines if it is a single column filter cube."""
return self._cube_dict["result"].get("is_single_col_cube", False)
return self._cube_response["result"].get("is_single_col_cube", False)

@lazyproperty
def _measures(self) -> "_Measures":
Expand All @@ -602,7 +597,7 @@ def _measures(self) -> "_Measures":
Provides access to count based measures and numeric measures (e.g. mean, sum)
when available.
"""
return _Measures(self._cube_dict, self._all_dimensions, self._cube_idx_arg)
return _Measures(self._cube_response, self._all_dimensions, self._cube_idx_arg)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we build _Measures without the "dimension inflation". Do we still need it ?

Copy link
Contributor Author

@ernestoarbitrio ernestoarbitrio Feb 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually self._all_dimensions has the inflation, having the inflation in the cube_response is not needed

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok nice 👍


@lazyproperty
def _numeric_measure_references(self) -> Dict:
Expand Down
10 changes: 6 additions & 4 deletions src/cr/cube/cubepart.py
Original file line number Diff line number Diff line change
Expand Up @@ -1736,10 +1736,12 @@ def _assemble_vector(self, base_vector, subtotals, order, diffs_nan=False):
# values from a _BaseSubtotals subclass.
vector_subtotals = np.array(
[
np.nan
if diffs_nan and len(subtotal.subtrahend_idxs) > 0
else np.sum(base_vector[subtotal.addend_idxs])
- np.sum(base_vector[subtotal.subtrahend_idxs])
(
np.nan
if diffs_nan and len(subtotal.subtrahend_idxs) > 0
else np.sum(base_vector[subtotal.addend_idxs])
- np.sum(base_vector[subtotal.subtrahend_idxs])
)
for subtotal in subtotals
]
)
Expand Down
14 changes: 8 additions & 6 deletions src/cr/cube/dimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,11 @@ def format(x) -> str:
formatter = format

if dimension_type == DT.DATETIME:
orig_format = cls.datetime_formats.get(
typedef["subtype"].get("resolution")
orig_format: str = (
cls.datetime_formats.get(typedef["subtype"].get("resolution")) or ""
)
out_format = element_data_format
if orig_format is not None and out_format is not None:
if orig_format and out_format is not None:

def format_datetime(x) -> str:
try:
Expand Down Expand Up @@ -842,9 +842,11 @@ def _replaced_element_transforms(self, element_transforms) -> Dict:
if key == "subvar_id":
# --- translate from subvariable id
new_keys = tuple(
self._subvar_aliases[self._subvar_ids.index(_id)]
if _id in self._subvar_ids
else None
(
self._subvar_aliases[self._subvar_ids.index(_id)]
if _id in self._subvar_ids
else None
)
for _id in old_keys
)
else:
Expand Down
27 changes: 18 additions & 9 deletions src/cr/cube/matrix/assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,24 @@ def row_display_order(cls, dimensions, second_order_measures, format):
HelperCls = (
_SortRowsByBaseColumnHelper
if collation_method == CM.OPPOSING_ELEMENT
else _SortRowsByDerivedColumnHelper
if collation_method == CM.OPPOSING_INSERTION and dim_type in DT.ARRAY_TYPES
else _SortRowsByInsertedColumnHelper
if collation_method == CM.OPPOSING_INSERTION
else _SortRowsByLabelHelper
if collation_method == CM.LABEL
else _SortRowsByMarginalHelper
if collation_method == CM.MARGINAL
else _RowOrderHelper
else (
_SortRowsByDerivedColumnHelper
if collation_method == CM.OPPOSING_INSERTION
and dim_type in DT.ARRAY_TYPES
else (
_SortRowsByInsertedColumnHelper
if collation_method == CM.OPPOSING_INSERTION
else (
_SortRowsByLabelHelper
if collation_method == CM.LABEL
else (
_SortRowsByMarginalHelper
if collation_method == CM.MARGINAL
else _RowOrderHelper
)
)
)
)
)
return HelperCls(dimensions, second_order_measures, format)._display_order

Expand Down
64 changes: 39 additions & 25 deletions src/cr/cube/matrix/cubemeasure.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ def factory(cls, counts, diff_nans, cube, dimensions, slice_idx):
Chooses between unweighted and weighted counts based on `type`.
"""
dimension_type_strings = tuple(
"MR"
if dim_type == DT.MR
else "ARR"
if dim_type in DT.ARRAY_TYPES
else "CAT"
(
"MR"
if dim_type == DT.MR
else "ARR" if dim_type in DT.ARRAY_TYPES else "CAT"
)
for dim_type in cube.dimension_types[-2:]
)
CubeCountsCls = {
Expand Down Expand Up @@ -812,11 +812,15 @@ def factory(cls, cube, dimensions, slice_idx):
CubeMeansCls = (
_MrXMrCubeMeans
if dimension_types == (DT.MR, DT.MR)
else _MrXCatCubeMeans
if dimension_types[0] == DT.MR
else _CatXMrCubeMeans
if dimension_types[1] == DT.MR
else _CatXCatCubeMeans
else (
_MrXCatCubeMeans
if dimension_types[0] == DT.MR
else (
_CatXMrCubeMeans
if dimension_types[1] == DT.MR
else _CatXCatCubeMeans
)
)
)
return CubeMeansCls(
dimensions, cube.means[cls._slice_idx_expr(cube, slice_idx)]
Expand Down Expand Up @@ -1027,11 +1031,15 @@ def factory(cls, cube, dimensions, slice_idx):
CubeSumsCls = (
_MrXMrCubeStdDev
if dimension_types == (DT.MR, DT.MR)
else _MrXCatCubeStdDev
if dimension_types[0] == DT.MR
else _CatXMrCubeStdDev
if dimension_types[1] == DT.MR
else _CatXCatCubeStdDev
else (
_MrXCatCubeStdDev
if dimension_types[0] == DT.MR
else (
_CatXMrCubeStdDev
if dimension_types[1] == DT.MR
else _CatXCatCubeStdDev
)
)
)
return CubeSumsCls(
dimensions, cube.stddev[cls._slice_idx_expr(cube, slice_idx)]
Expand Down Expand Up @@ -1110,11 +1118,13 @@ def factory(cls, cube, dimensions, slice_idx):
CubeSumsCls = (
_MrXMrCubeSums
if dimension_types == (DT.MR, DT.MR)
else _MrXCatCubeSums
if dimension_types[0] == DT.MR
else _CatXMrCubeSums
if dimension_types[1] == DT.MR
else _CatXCatCubeSums
else (
_MrXCatCubeSums
if dimension_types[0] == DT.MR
else (
_CatXMrCubeSums if dimension_types[1] == DT.MR else _CatXCatCubeSums
)
)
)
return CubeSumsCls(dimensions, cube.sums[cls._slice_idx_expr(cube, slice_idx)])

Expand Down Expand Up @@ -1202,11 +1212,15 @@ def factory(cls, cube, dimensions, slice_idx):
UnconditionalCubeCountsCls = (
_MrXMrUnconditionalCubeCounts
if dimension_types == (DT.MR, DT.MR)
else _MrXCatUnconditionalCubeCounts
if dimension_types[0] == DT.MR
else _CatXMrUnconditionalCubeCounts
if dimension_types[1] == DT.MR
else _CatXCatUnconditionalCubeCounts
else (
_MrXCatUnconditionalCubeCounts
if dimension_types[0] == DT.MR
else (
_CatXMrUnconditionalCubeCounts
if dimension_types[1] == DT.MR
else _CatXCatUnconditionalCubeCounts
)
)
)
return UnconditionalCubeCountsCls(
dimensions,
Expand Down
16 changes: 10 additions & 6 deletions src/cr/cube/matrix/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -1712,9 +1712,11 @@ def blocks(self):
return (
self._second_order_measures.row_proportions.blocks
if self._dimensions[-2].dimension_type == DT.CAT_DATE
else self._second_order_measures.column_proportions.blocks
if self._dimensions[-1].dimension_type == DT.CAT_DATE
else self._second_order_measures.table_proportions.blocks
else (
self._second_order_measures.column_proportions.blocks
if self._dimensions[-1].dimension_type == DT.CAT_DATE
else self._second_order_measures.table_proportions.blocks
)
)


Expand Down Expand Up @@ -1744,9 +1746,11 @@ def blocks(self):
return (
self._second_order_measures.row_std_err.blocks
if self._dimensions[-2].dimension_type == DT.CAT_DATE
else self._second_order_measures.column_std_err.blocks
if self._dimensions[-1].dimension_type == DT.CAT_DATE
else self._second_order_measures.table_std_err.blocks
else (
self._second_order_measures.column_std_err.blocks
if self._dimensions[-1].dimension_type == DT.CAT_DATE
else self._second_order_measures.table_std_err.blocks
)
)


Expand Down
4 changes: 2 additions & 2 deletions src/cr/cube/measures/pairwise_significance.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from cr.cube.util import lazyproperty


class PairwiseSignificance(object):
class PairwiseSignificance:
"""Implementation of p-vals and t-tests for each column proportions comparison."""

def __init__(self, slice_, alpha=0.05, only_larger=True):
Expand Down Expand Up @@ -57,7 +57,7 @@ def _scale_mean_pairwise_indices(self):
return tuple(sig.scale_mean_pairwise_indices for sig in self.values)


class _ColumnPairwiseSignificance(object):
class _ColumnPairwiseSignificance:
"""Value object providing matrix of T-score based pairwise-comparison P-values"""

def __init__(self, slice_, col_idx, alpha=0.05, only_larger=True):
Expand Down
4 changes: 2 additions & 2 deletions src/cr/cube/smoothing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from cr.cube.util import lazyproperty


class Smoother(object):
class Smoother:
"""Base object class for Smoother variants."""

@classmethod
Expand All @@ -28,7 +28,7 @@ def factory(cls, dimension):
)


class _SingleSidedMovingAvgSmoother(object):
class _SingleSidedMovingAvgSmoother:
"""Create and configure smoothing function for one-sided moving average."""

def __init__(self, smoothing_dict, dimension_type):
Expand Down
8 changes: 5 additions & 3 deletions src/cr/cube/stripe/assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ def display_order(cls, rows_dimension, measures, format):
HelperCls = (
_SortByMeasureHelper
if order_spec.collation_method == CM.UNIVARIATE_MEASURE
else _SortByLabelHelper
if order_spec.collation_method == CM.LABEL
else _OrderHelper
else (
_SortByLabelHelper
if order_spec.collation_method == CM.LABEL
else _OrderHelper
)
)
return HelperCls(rows_dimension, measures, format)._display_order

Expand Down
2 changes: 1 addition & 1 deletion src/cr/cube/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import functools


class lazyproperty(object):
class lazyproperty:
"""Decorator like @property, but evaluated only on first access.
Like @property, this can only be used to decorate methods having only
a `self` parameter, and is accessed like an attribute on an instance,
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
CUBES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cubes")


class LazyCubeResponseLoader(object):
class LazyCubeResponseLoader:
"""Loads and caches cube-responses by name from fixture directory.

Provides access to all the cube-response fixtures in a directory by
Expand Down
Loading
Loading