Skip to content

Commit

Permalink
add maximum_seed_size_mib to global config
Browse files Browse the repository at this point in the history
  • Loading branch information
noppaz committed Apr 10, 2023
1 parent a1afca5 commit fa836bb
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 25 deletions.
1 change: 1 addition & 0 deletions core/dbt/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def invoke(self, args: List[str]) -> Tuple[Optional[List], bool]:
@p.warn_error
@p.warn_error_options
@p.write_json
@p.maximum_seed_size_mib
def cli(ctx, **kwargs):
"""An ELT tool for managing your SQL transformations and data models.
For more documentation on these commands, visit: docs.getdbt.com
Expand Down
10 changes: 9 additions & 1 deletion core/dbt/cli/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def _version_callback(ctx, _param, value):
envvar="DBT_WARN_ERROR_OPTIONS",
default="{}",
help="""If dbt would normally warn, instead raise an exception based on include/exclude configuration. Examples include --select that selects nothing, deprecations, configurations with no associated models, invalid test configurations,
and missing sources/refs in tests. This argument should be a YAML string, with keys 'include' or 'exclude'. eg. '{"include": "all", "exclude": ["NoNodesForSelectionCriteria"]}'""",
and missing sources/refs in tests. This argument should be a JSON string, with keys 'include' or 'exclude'. eg. '{"include": "all", "exclude": ["NoNodesForSelectionCriteria"]}'""",

This comment has been minimized.

Copy link
@noppaz

noppaz Apr 11, 2023

Author

This was simply a boy scout thing as I happened to notice it says yaml when it is json, can remove if you'd like.

type=WarnErrorOptionsType(),
)

Expand All @@ -479,3 +479,11 @@ def _version_callback(ctx, _param, value):
help="TODO: No help text currently available",
default=True,
)

maximum_seed_size_mib = click.option(
"--maximum-seed-size-mib",
envvar="DBT_MAXIMUM_SEED_SIZE_MIB",
help="Specify max size (MiB) for seed files that will be hashed for state comparison.",
type=click.INT,
default=1,
)
11 changes: 0 additions & 11 deletions core/dbt/constants.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,8 @@
import os

SECRET_ENV_PREFIX = "DBT_ENV_SECRET_"
DEFAULT_ENV_PLACEHOLDER = "DBT_DEFAULT_PLACEHOLDER"
METADATA_ENV_PREFIX = "DBT_ENV_CUSTOM_ENV_"


def get_max_seed_size():
mx = os.getenv("DBT_MAXIMUM_SEED_SIZE", "1")
return int(mx)


DEFAULT_MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
MAXIMUM_SEED_SIZE = get_max_seed_size() * DEFAULT_MAXIMUM_SEED_SIZE
MAXIMUM_SEED_SIZE_NAME = str(get_max_seed_size()) + "MiB"

PIN_PACKAGE_URL = (
"https://docs.getdbt.com/docs/package-management#section-specifying-package-versions"
)
1 change: 1 addition & 0 deletions core/dbt/contracts/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ class UserConfig(ExtensibleDbtClassMixin, Replaceable, UserConfigContract):
static_parser: Optional[bool] = None
indirect_selection: Optional[str] = None
cache_selected_only: Optional[bool] = None
maximum_seed_size_mib: Optional[int] = None


@dataclass
Expand Down
18 changes: 11 additions & 7 deletions core/dbt/events/types.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from dbt.ui import line_wrap_message, warning_tag, red, green, yellow
from dbt.constants import MAXIMUM_SEED_SIZE_NAME, PIN_PACKAGE_URL
from dbt.constants import PIN_PACKAGE_URL
from dbt.events.base_types import (
DynamicLevel,
NoFile,
Expand All @@ -19,7 +19,7 @@
from dbt.events.proto_types import EventInfo, RunResultMsg, ListOfStrings # noqa
from dbt.events.proto_types import NodeInfo, ReferenceKeyMsg, TimingInfoMsg # noqa
from dbt.events import proto_types as pt

from dbt.flags import get_flags
from dbt.node_types import NodeType


Expand Down Expand Up @@ -54,6 +54,10 @@ def format_adapter_message(name, base_msg, args) -> str:
return f"{name} adapter: {msg}"


def get_maximum_seed_size_name() -> str:
return str(get_flags().MAXIMUM_SEED_SIZE_MIB) + "MiB"


# =======================================================
# A - Pre-project loading
# =======================================================
Expand Down Expand Up @@ -972,8 +976,8 @@ def code(self):
def message(self) -> str:
msg = (
f"Found a seed ({self.package_name}.{self.name}) "
f">{MAXIMUM_SEED_SIZE_NAME} in size. The previous file was "
f"<={MAXIMUM_SEED_SIZE_NAME}, so it has changed"
f">{get_maximum_seed_size_name()} in size. The previous file was "
f"<={get_maximum_seed_size_name()}, so it has changed"
)
return msg

Expand All @@ -986,7 +990,7 @@ def code(self):
def message(self) -> str:
msg = (
f"Found a seed ({self.package_name}.{self.name}) "
f">{MAXIMUM_SEED_SIZE_NAME} in size at the same path, dbt "
f">{get_maximum_seed_size_name()} in size at the same path, dbt "
f"cannot tell if it has changed: assuming they are the same"
)
return msg
Expand All @@ -1000,7 +1004,7 @@ def code(self):
def message(self) -> str:
msg = (
f"Found a seed ({self.package_name}.{self.name}) "
f">{MAXIMUM_SEED_SIZE_NAME} in size. The previous file was in "
f">{get_maximum_seed_size_name()} in size. The previous file was in "
f"a different location, assuming it has changed"
)
return msg
Expand All @@ -1014,7 +1018,7 @@ def code(self):
def message(self) -> str:
msg = (
f"Found a seed ({self.package_name}.{self.name}) "
f">{MAXIMUM_SEED_SIZE_NAME} in size. The previous file had a "
f">{get_maximum_seed_size_name()} in size. The previous file had a "
f"checksum type of {self.checksum_name}, so it has changed"
)
return msg
Expand Down
1 change: 1 addition & 0 deletions core/dbt/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def get_flag_dict():
"cache_selected_only",
"target_path",
"log_path",
"maximum_seed_size_mib",
}
return {key: getattr(GLOBAL_FLAGS, key.upper(), None) for key in flag_attr}

Expand Down
15 changes: 12 additions & 3 deletions core/dbt/parser/read_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import Optional, Dict, List, Mapping
from dbt.events.types import InputFileDiffError
from dbt.events.functions import fire_event
from dbt.flags import get_flags


@dataclass
Expand All @@ -36,7 +37,14 @@ class FileDiff(dbtClassMixin):
changed: List[InputFile]
added: List[InputFile]

from dbt.constants import MAXIMUM_SEED_SIZE, DEFAULT_MAXIMUM_SEED_SIZE

DEFAULT_MAXIMUM_SEED_SIZE = 1 * 1024 * 1024


def get_max_seed_size() -> int:
"""The maximum seed size (MiB) that will be hashed for state comparison."""
flags = get_flags()
return flags.MAXIMUM_SEED_SIZE_MIB * DEFAULT_MAXIMUM_SEED_SIZE


# This loads the files contents and creates the SourceFile object
Expand Down Expand Up @@ -116,8 +124,9 @@ def validate_yaml(file_path, dct):

# Special processing for big seed files
def load_seed_source_file(match: FilePath, project_name) -> SourceFile:
# MAXIMUM_SEED_SIZE = 0 means no limit
if match.file_size() > MAXIMUM_SEED_SIZE and MAXIMUM_SEED_SIZE != 0:
maximum_seed_size = get_max_seed_size()
# maximum_seed_size = 0 means no limit
if match.file_size() > maximum_seed_size and maximum_seed_size != 0:
# We don't want to calculate a hash of this file. Use the path.
source_file = SourceFile.big_seed(match)
elif match.file_size() <= DEFAULT_MAXIMUM_SEED_SIZE:
Expand Down
4 changes: 2 additions & 2 deletions test/unit/test_graph_selector_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -1036,7 +1036,7 @@ def test_select_state_changed_seed_checksum_path_to_path(manifest, previous_stat
event = warn_or_error_patch.call_args[0][0]
assert type(event).__name__ == 'SeedExceedsLimitSamePath'
msg = event.message()
assert msg.startswith('Found a seed (pkg.seed) >1MB in size')
assert msg.startswith('Found a seed (pkg.seed) >1MiB in size')
with mock.patch('dbt.contracts.graph.nodes.warn_or_error') as warn_or_error_patch:
assert not search_manifest_using_method(manifest, method, 'new')
warn_or_error_patch.assert_not_called()
Expand All @@ -1053,7 +1053,7 @@ def test_select_state_changed_seed_checksum_sha_to_path(manifest, previous_state
event = warn_or_error_patch.call_args[0][0]
assert type(event).__name__ == 'SeedIncreased'
msg = event.message()
assert msg.startswith('Found a seed (pkg.seed) >1MB in size')
assert msg.startswith('Found a seed (pkg.seed) >1MiB in size')
with mock.patch('dbt.contracts.graph.nodes.warn_or_error') as warn_or_error_patch:
assert not search_manifest_using_method(manifest, method, 'new')
warn_or_error_patch.assert_not_called()
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/defer_state/test_modified_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def test_changed_seed_contents_state(self, project):
"./state",
]
)
assert ">1MB" in str(exc.value)
assert ">1MiB" in str(exc.value)

shutil.rmtree("./state")
self.copy_state()
Expand Down

0 comments on commit fa836bb

Please sign in to comment.