diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1c94b7de1..265291826 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/ambv/black - rev: 22.6.0 + rev: 24.1.1 hooks: - id: black diff --git a/src/cr/cube/cube.py b/src/cr/cube/cube.py index e5374b393..03f30cd5b 100644 --- a/src/cr/cube/cube.py +++ b/src/cr/cube/cube.py @@ -5,7 +5,6 @@ CubeSet is the main API class for manipulating Crunch.io JSON cube responses. """ -import copy import json from typing import Dict, FrozenSet, Iterator, List, Optional, Tuple, Union @@ -260,11 +259,15 @@ def counts_with_missings(self) -> np.ndarray: return ( self._measures.weighted_valid_counts.raw_cube_array if self._measures.weighted_valid_counts is not None - else self._measures.unweighted_valid_counts.raw_cube_array - if self._measures.unweighted_valid_counts is not None - else self._measures.weighted_counts.raw_cube_array - if self.has_weighted_counts - else self._measures.unweighted_counts.raw_cube_array + else ( + self._measures.unweighted_valid_counts.raw_cube_array + if self._measures.unweighted_valid_counts is not None + else ( + self._measures.weighted_counts.raw_cube_array + if self.has_weighted_counts + else self._measures.unweighted_counts.raw_cube_array + ) + ) ) @lazyproperty @@ -312,8 +315,10 @@ def inflate(self) -> "Cube": A multi-cube (tabbook) response formed from a function (e.g. mean()) on a numeric variable arrives without a rows-dimension. """ - cube_dict = self._cube_dict - dimensions = cube_dict["result"]["dimensions"] + cube_dict = self._cube_response + num_array_dim = self._numeric_array_dimension or None + dims = cube_dict["result"]["dimensions"] + dimensions = [num_array_dim] + dims if num_array_dim else dims default_name = "-".join([m.value for m in self._available_numeric_measures]) # --- The default value in case of numeric variable is the combination of all # --- the measures expressed in the cube response. @@ -435,7 +440,7 @@ def title(self) -> str: use-case it is a stand-in for the columns-dimension name since a strand has no columns dimension. """ - return self._cube_dict["result"].get("title", "Untitled") + return self._cube_response["result"].get("title", "Untitled") @lazyproperty def unweighted_counts(self) -> np.ndarray: @@ -533,9 +538,12 @@ def weighted_squared_counts(self) -> Optional[np.ndarray]: ].astype(np.float64) @lazyproperty - def _all_dimensions(self) -> list: + def _all_dimensions(self) -> Dimensions: """List of all dimensions (not just user-apparent ones) for this cube.""" - return Dimensions.from_dicts(self._cube_dict["result"]["dimensions"]) + num_array_dim = self._numeric_array_dimension or None + dims = self._cube_response["result"]["dimensions"] + dimension_dicts = [num_array_dim] + dims if num_array_dim else dims + return Dimensions.from_dicts(dimension_dicts) @lazyproperty def _available_numeric_measures(self) -> Tuple[CUBE_MEASURE, ...]: @@ -560,19 +568,6 @@ def _ca_as_0th(self) -> bool: and self.dimension_types[0] == DT.CA ) - @lazyproperty - def _cube_dict(self) -> Dict: - """dict containing raw cube response, parsed from JSON payload.""" - cube_dict = copy.deepcopy(self._cube_response) - if self._numeric_measure_subvariables: - dimensions = cube_dict.get("result", {}).get("dimensions", []) - # ---dim inflation--- - # ---In case of numeric arrays, we need to inflate the row dimension - # ---according to the mean subvariables. For each subvar the row dimension - # ---will have a new element related to the subvar metadata. - dimensions.insert(0, self._numeric_array_dimension) - return cube_dict - @lazyproperty def _cube_response(self) -> Dict: """dict representing the parsed cube response arguments.""" @@ -593,7 +588,7 @@ def _cube_response(self) -> Dict: @lazyproperty def _is_single_filter_col_cube(self) -> float: """bool determines if it is a single column filter cube.""" - return self._cube_dict["result"].get("is_single_col_cube", False) + return self._cube_response["result"].get("is_single_col_cube", False) @lazyproperty def _measures(self) -> "_Measures": @@ -602,7 +597,7 @@ def _measures(self) -> "_Measures": Provides access to count based measures and numeric measures (e.g. mean, sum) when available. """ - return _Measures(self._cube_dict, self._all_dimensions, self._cube_idx_arg) + return _Measures(self._cube_response, self._all_dimensions, self._cube_idx_arg) @lazyproperty def _numeric_measure_references(self) -> Dict: diff --git a/src/cr/cube/cubepart.py b/src/cr/cube/cubepart.py index 77c435e27..00e7a241a 100644 --- a/src/cr/cube/cubepart.py +++ b/src/cr/cube/cubepart.py @@ -1736,10 +1736,12 @@ def _assemble_vector(self, base_vector, subtotals, order, diffs_nan=False): # values from a _BaseSubtotals subclass. vector_subtotals = np.array( [ - np.nan - if diffs_nan and len(subtotal.subtrahend_idxs) > 0 - else np.sum(base_vector[subtotal.addend_idxs]) - - np.sum(base_vector[subtotal.subtrahend_idxs]) + ( + np.nan + if diffs_nan and len(subtotal.subtrahend_idxs) > 0 + else np.sum(base_vector[subtotal.addend_idxs]) + - np.sum(base_vector[subtotal.subtrahend_idxs]) + ) for subtotal in subtotals ] ) diff --git a/src/cr/cube/dimension.py b/src/cr/cube/dimension.py index a7c30a4a0..fb1778829 100644 --- a/src/cr/cube/dimension.py +++ b/src/cr/cube/dimension.py @@ -4,8 +4,8 @@ import copy from collections.abc import Sequence -from datetime import datetime -from typing import Dict, List, Optional, Tuple, Union +from functools import partial +from typing import Callable, Dict, List, Optional, Tuple, Union import numpy as np @@ -17,6 +17,38 @@ ) from cr.cube.util import lazyproperty +from .util import format, format_datetime + +DATETIME_FORMATS = { + "Y": "%Y", + "Q": "%Y-%m", + "3M": "%Y-%m", + "M": "%Y-%m", + "W": "%Y-%m-%d", + "D": "%Y-%m-%d", + "h": "%Y-%m-%dT%H", + "m": "%Y-%m-%dT%H:%M", + "s": "%Y-%m-%dT%H:%M:%S", + "ms": "%Y-%m-%dT%H:%M:%S.%f", + "us": "%Y-%m-%dT%H:%M:%S.%f", +} + + +def _formatter(dimension_type, typedef, out_format) -> Union[Callable, partial]: + """Returns a formatting function according to the dimension type.""" + + if dimension_type != DT.DATETIME: + formatter: Union[Callable, partial] = format + else: + resolution = typedef["subtype"].get("resolution") + orig_format: str = DATETIME_FORMATS.get(resolution) or "" + formatter = ( + partial(format_datetime, orig_format=orig_format, out_format=out_format) + if orig_format and out_format + else format + ) + return formatter + class Dimensions(tuple): """Collection containing every dimension defined in cube response.""" @@ -460,20 +492,6 @@ class Elements(tuple): Each element is either a category or a subvariable. """ - datetime_formats = { - "Y": "%Y", - "Q": "%Y-%m", - "3M": "%Y-%m", - "M": "%Y-%m", - "W": "%Y-%m-%d", - "D": "%Y-%m-%d", - "h": "%Y-%m-%dT%H", - "m": "%Y-%m-%dT%H:%M", - "s": "%Y-%m-%dT%H:%M:%S", - "ms": "%Y-%m-%dT%H:%M:%S.%f", - "us": "%Y-%m-%dT%H:%M:%S.%f", - } - @classmethod def from_typedef( cls, typedef, dimension_transforms_dict, dimension_type, element_data_format @@ -509,31 +527,7 @@ def from_typedef( xforms = _ElementTransforms( all_xforms.get(element_id, all_xforms.get(str(element_id), {})) ) - - # This is so dumb that our type checker won't let us just - # write `formatter = str`. - def format(x) -> str: - return str(x) - - formatter = format - - if dimension_type == DT.DATETIME: - orig_format = cls.datetime_formats.get( - typedef["subtype"].get("resolution") - ) - out_format = element_data_format - if orig_format is not None and out_format is not None: - - def format_datetime(x) -> str: - try: - return datetime.strptime(x, orig_format).strftime( - out_format - ) - except ValueError: - return str(x) - - formatter = format_datetime - + formatter = _formatter(dimension_type, typedef, element_data_format) element = Element(element_dict, idx, xforms, formatter) elements.append(element) @@ -842,9 +836,11 @@ def _replaced_element_transforms(self, element_transforms) -> Dict: if key == "subvar_id": # --- translate from subvariable id new_keys = tuple( - self._subvar_aliases[self._subvar_ids.index(_id)] - if _id in self._subvar_ids - else None + ( + self._subvar_aliases[self._subvar_ids.index(_id)] + if _id in self._subvar_ids + else None + ) for _id in old_keys ) else: diff --git a/src/cr/cube/matrix/assembler.py b/src/cr/cube/matrix/assembler.py index 16fc6f719..7f3cf1abe 100644 --- a/src/cr/cube/matrix/assembler.py +++ b/src/cr/cube/matrix/assembler.py @@ -64,15 +64,24 @@ def row_display_order(cls, dimensions, second_order_measures, format): HelperCls = ( _SortRowsByBaseColumnHelper if collation_method == CM.OPPOSING_ELEMENT - else _SortRowsByDerivedColumnHelper - if collation_method == CM.OPPOSING_INSERTION and dim_type in DT.ARRAY_TYPES - else _SortRowsByInsertedColumnHelper - if collation_method == CM.OPPOSING_INSERTION - else _SortRowsByLabelHelper - if collation_method == CM.LABEL - else _SortRowsByMarginalHelper - if collation_method == CM.MARGINAL - else _RowOrderHelper + else ( + _SortRowsByDerivedColumnHelper + if collation_method == CM.OPPOSING_INSERTION + and dim_type in DT.ARRAY_TYPES + else ( + _SortRowsByInsertedColumnHelper + if collation_method == CM.OPPOSING_INSERTION + else ( + _SortRowsByLabelHelper + if collation_method == CM.LABEL + else ( + _SortRowsByMarginalHelper + if collation_method == CM.MARGINAL + else _RowOrderHelper + ) + ) + ) + ) ) return HelperCls(dimensions, second_order_measures, format)._display_order diff --git a/src/cr/cube/matrix/cubemeasure.py b/src/cr/cube/matrix/cubemeasure.py index a09c19494..c9269a91c 100644 --- a/src/cr/cube/matrix/cubemeasure.py +++ b/src/cr/cube/matrix/cubemeasure.py @@ -131,11 +131,11 @@ def factory(cls, counts, diff_nans, cube, dimensions, slice_idx): Chooses between unweighted and weighted counts based on `type`. """ dimension_type_strings = tuple( - "MR" - if dim_type == DT.MR - else "ARR" - if dim_type in DT.ARRAY_TYPES - else "CAT" + ( + "MR" + if dim_type == DT.MR + else "ARR" if dim_type in DT.ARRAY_TYPES else "CAT" + ) for dim_type in cube.dimension_types[-2:] ) CubeCountsCls = { @@ -812,11 +812,15 @@ def factory(cls, cube, dimensions, slice_idx): CubeMeansCls = ( _MrXMrCubeMeans if dimension_types == (DT.MR, DT.MR) - else _MrXCatCubeMeans - if dimension_types[0] == DT.MR - else _CatXMrCubeMeans - if dimension_types[1] == DT.MR - else _CatXCatCubeMeans + else ( + _MrXCatCubeMeans + if dimension_types[0] == DT.MR + else ( + _CatXMrCubeMeans + if dimension_types[1] == DT.MR + else _CatXCatCubeMeans + ) + ) ) return CubeMeansCls( dimensions, cube.means[cls._slice_idx_expr(cube, slice_idx)] @@ -1027,11 +1031,15 @@ def factory(cls, cube, dimensions, slice_idx): CubeSumsCls = ( _MrXMrCubeStdDev if dimension_types == (DT.MR, DT.MR) - else _MrXCatCubeStdDev - if dimension_types[0] == DT.MR - else _CatXMrCubeStdDev - if dimension_types[1] == DT.MR - else _CatXCatCubeStdDev + else ( + _MrXCatCubeStdDev + if dimension_types[0] == DT.MR + else ( + _CatXMrCubeStdDev + if dimension_types[1] == DT.MR + else _CatXCatCubeStdDev + ) + ) ) return CubeSumsCls( dimensions, cube.stddev[cls._slice_idx_expr(cube, slice_idx)] @@ -1110,11 +1118,13 @@ def factory(cls, cube, dimensions, slice_idx): CubeSumsCls = ( _MrXMrCubeSums if dimension_types == (DT.MR, DT.MR) - else _MrXCatCubeSums - if dimension_types[0] == DT.MR - else _CatXMrCubeSums - if dimension_types[1] == DT.MR - else _CatXCatCubeSums + else ( + _MrXCatCubeSums + if dimension_types[0] == DT.MR + else ( + _CatXMrCubeSums if dimension_types[1] == DT.MR else _CatXCatCubeSums + ) + ) ) return CubeSumsCls(dimensions, cube.sums[cls._slice_idx_expr(cube, slice_idx)]) @@ -1202,11 +1212,15 @@ def factory(cls, cube, dimensions, slice_idx): UnconditionalCubeCountsCls = ( _MrXMrUnconditionalCubeCounts if dimension_types == (DT.MR, DT.MR) - else _MrXCatUnconditionalCubeCounts - if dimension_types[0] == DT.MR - else _CatXMrUnconditionalCubeCounts - if dimension_types[1] == DT.MR - else _CatXCatUnconditionalCubeCounts + else ( + _MrXCatUnconditionalCubeCounts + if dimension_types[0] == DT.MR + else ( + _CatXMrUnconditionalCubeCounts + if dimension_types[1] == DT.MR + else _CatXCatUnconditionalCubeCounts + ) + ) ) return UnconditionalCubeCountsCls( dimensions, diff --git a/src/cr/cube/matrix/measure.py b/src/cr/cube/matrix/measure.py index 37392d37e..e8d4da563 100644 --- a/src/cr/cube/matrix/measure.py +++ b/src/cr/cube/matrix/measure.py @@ -1712,9 +1712,11 @@ def blocks(self): return ( self._second_order_measures.row_proportions.blocks if self._dimensions[-2].dimension_type == DT.CAT_DATE - else self._second_order_measures.column_proportions.blocks - if self._dimensions[-1].dimension_type == DT.CAT_DATE - else self._second_order_measures.table_proportions.blocks + else ( + self._second_order_measures.column_proportions.blocks + if self._dimensions[-1].dimension_type == DT.CAT_DATE + else self._second_order_measures.table_proportions.blocks + ) ) @@ -1744,9 +1746,11 @@ def blocks(self): return ( self._second_order_measures.row_std_err.blocks if self._dimensions[-2].dimension_type == DT.CAT_DATE - else self._second_order_measures.column_std_err.blocks - if self._dimensions[-1].dimension_type == DT.CAT_DATE - else self._second_order_measures.table_std_err.blocks + else ( + self._second_order_measures.column_std_err.blocks + if self._dimensions[-1].dimension_type == DT.CAT_DATE + else self._second_order_measures.table_std_err.blocks + ) ) diff --git a/src/cr/cube/measures/pairwise_significance.py b/src/cr/cube/measures/pairwise_significance.py index 6417319a2..625969055 100644 --- a/src/cr/cube/measures/pairwise_significance.py +++ b/src/cr/cube/measures/pairwise_significance.py @@ -8,7 +8,7 @@ from cr.cube.util import lazyproperty -class PairwiseSignificance(object): +class PairwiseSignificance: """Implementation of p-vals and t-tests for each column proportions comparison.""" def __init__(self, slice_, alpha=0.05, only_larger=True): @@ -57,7 +57,7 @@ def _scale_mean_pairwise_indices(self): return tuple(sig.scale_mean_pairwise_indices for sig in self.values) -class _ColumnPairwiseSignificance(object): +class _ColumnPairwiseSignificance: """Value object providing matrix of T-score based pairwise-comparison P-values""" def __init__(self, slice_, col_idx, alpha=0.05, only_larger=True): diff --git a/src/cr/cube/smoothing.py b/src/cr/cube/smoothing.py index ab38645d5..b8ae3e49f 100644 --- a/src/cr/cube/smoothing.py +++ b/src/cr/cube/smoothing.py @@ -8,7 +8,7 @@ from cr.cube.util import lazyproperty -class Smoother(object): +class Smoother: """Base object class for Smoother variants.""" @classmethod @@ -28,7 +28,7 @@ def factory(cls, dimension): ) -class _SingleSidedMovingAvgSmoother(object): +class _SingleSidedMovingAvgSmoother: """Create and configure smoothing function for one-sided moving average.""" def __init__(self, smoothing_dict, dimension_type): diff --git a/src/cr/cube/stripe/assembler.py b/src/cr/cube/stripe/assembler.py index 9ba9d530b..60170e084 100644 --- a/src/cr/cube/stripe/assembler.py +++ b/src/cr/cube/stripe/assembler.py @@ -42,9 +42,11 @@ def display_order(cls, rows_dimension, measures, format): HelperCls = ( _SortByMeasureHelper if order_spec.collation_method == CM.UNIVARIATE_MEASURE - else _SortByLabelHelper - if order_spec.collation_method == CM.LABEL - else _OrderHelper + else ( + _SortByLabelHelper + if order_spec.collation_method == CM.LABEL + else _OrderHelper + ) ) return HelperCls(rows_dimension, measures, format)._display_order diff --git a/src/cr/cube/util.py b/src/cr/cube/util.py index 830633e98..e0e43601e 100644 --- a/src/cr/cube/util.py +++ b/src/cr/cube/util.py @@ -2,10 +2,22 @@ """Utility functions for crunch cube, as well as other modules.""" +from datetime import datetime import functools -class lazyproperty(object): +def format(x) -> str: + return str(x) + + +def format_datetime(x, orig_format, out_format) -> str: + try: + return datetime.strptime(x, orig_format).strftime(out_format) + except ValueError: + return str(x) + + +class lazyproperty: """Decorator like @property, but evaluated only on first access. Like @property, this can only be used to decorate methods having only a `self` parameter, and is accessed like an attribute on an instance, diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index 09ae63824..fe4c8b51a 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -8,7 +8,7 @@ CUBES_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cubes") -class LazyCubeResponseLoader(object): +class LazyCubeResponseLoader: """Loads and caches cube-responses by name from fixture directory. Provides access to all the cube-response fixtures in a directory by diff --git a/tests/integration/test_headers_and_subtotals.py b/tests/integration/test_headers_and_subtotals.py index deccc7374..82cba8c53 100644 --- a/tests/integration/test_headers_and_subtotals.py +++ b/tests/integration/test_headers_and_subtotals.py @@ -11,7 +11,7 @@ from ..util import load_python_expression -class TestHeadersAndSubtotals(object): +class TestHeadersAndSubtotals: """Legacy unit-test suite for inserted rows and columns.""" def test_headings_econ_blame_one_subtotal(self): @@ -2761,7 +2761,7 @@ def it_provide_residual_test_stats_including_hs(self): assert slice_.residual_test_stats.shape == (2, 6, 7) -class DescribeIntegrated_SubtotalDifferences(object): +class DescribeIntegrated_SubtotalDifferences: """TDD driver(s) for Subtotal Difference insertions.""" def it_computes_measures_for_1D_cat_with_subdiffs(self): diff --git a/tests/unit/test_cube.py b/tests/unit/test_cube.py index 6381cff76..ab0156a6e 100644 --- a/tests/unit/test_cube.py +++ b/tests/unit/test_cube.py @@ -428,25 +428,25 @@ def it_knows_ca_as_0th( assert _ca_as_0th is expected_value @pytest.mark.parametrize( - ("cube_dict", "expected_value"), + ("cube_response", "expected_value"), (({"result": {}}, "Untitled"), ({"result": {"title": "Hipsters"}}, "Hipsters")), ) - def it_knows_its_title(self, _cube_dict_prop_, cube_dict, expected_value): - _cube_dict_prop_.return_value = cube_dict + def it_knows_its_title(self, _cube_response_prop_, cube_response, expected_value): + _cube_response_prop_.return_value = cube_response assert Cube(None).title == expected_value @pytest.mark.parametrize( - ("cube_dict", "expected_value"), + ("cube_response", "expected_value"), (({"result": {}}, False), ({"result": {"is_single_col_cube": True}}, True)), ) def it_knows_if_it_is_a_single_col_filter_cube( - self, _cube_dict_prop_, cube_dict, expected_value + self, _cube_response_prop_, cube_response, expected_value ): - _cube_dict_prop_.return_value = cube_dict + _cube_response_prop_.return_value = cube_response assert Cube(None)._is_single_filter_col_cube == expected_value def it_provides_access_to_the_cube_response_dict_to_help(self): - assert Cube({"cube": "dict"})._cube_dict == {"cube": "dict"} + assert Cube({"cube": "dict"})._cube_response == {"cube": "dict"} @pytest.mark.parametrize( ("cube_response", "expected_value"), @@ -476,7 +476,7 @@ def but_it_raises_on_other_cube_response_types( expected_value, ): with pytest.raises(TypeError) as e: - Cube(cube_response)._cube_dict + Cube(cube_response)._cube_response assert str(e.value) == expected_value @@ -604,58 +604,8 @@ def it_knows_its_numeric_references( assert numeric_references == expected_value - @pytest.mark.parametrize( - "cube_response, cube_idx_arg, numeric_subvars, num_array_dim, expected_value", - ( - ({}, None, [], {}, {}), - ({"result": {"foo": "bar"}}, None, [], {}, {"result": {"foo": "bar"}}), - ( - {"result": {"foo": "bar"}}, - None, - ["A", "B"], - {}, - {"result": {"foo": "bar"}}, - ), - ( - {"result": {"dimensions": []}}, - None, - ["A", "B"], - {"A": "B"}, - {"result": {"dimensions": [{"A": "B"}]}}, - ), - ( - {"result": {"dimensions": ["A", "B"]}}, - 1, - ["A", "B"], - {"A": "B"}, - {"result": {"dimensions": [{"A": "B"}, "A", "B"]}}, - ), - ), - ) - def it_knows_its_cube_dict( - self, - cube_response, - cube_idx_arg, - numeric_subvars, - num_array_dim, - expected_value, - _cube_response_prop_, - _numeric_subvariables_prop_, - _numeric_array_dimension_prop_, - ): - _cube_response_prop_.return_value = cube_response - _numeric_subvariables_prop_.return_value = numeric_subvars - _numeric_array_dimension_prop_.return_value = num_array_dim - cube = Cube(None, cube_idx=cube_idx_arg) - - assert cube._cube_dict == expected_value - # fixture components --------------------------------------------- - @pytest.fixture - def _cube_dict_prop_(self, request): - return property_mock(request, Cube, "_cube_dict") - @pytest.fixture def _cube_response_prop_(self, request): return property_mock(request, Cube, "_cube_response") diff --git a/tests/unit/test_smoothing.py b/tests/unit/test_smoothing.py index 3bf7ecb34..c3bbcfd9d 100644 --- a/tests/unit/test_smoothing.py +++ b/tests/unit/test_smoothing.py @@ -39,7 +39,7 @@ def but_it_raises_an_exception_when_function_is_not_implemented(self, request): assert str(e.value) == "Function foo is not available." -class Describe_SingleSideMovingAvgSmoother(object): +class Describe_SingleSideMovingAvgSmoother: def it_constructs_single_sided_moving_avg_to_help(self, request): _init_ = initializer_mock(request, _SingleSidedMovingAvgSmoother) single_sided_miving_avg = _SingleSidedMovingAvgSmoother(