Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make MAXIMUM_SEED_SIZE configurable #7116

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230303-144700.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Make MAXIMUM_SEED_SIZE configurable
time: 2023-03-03T14:47:00.079887-05:00
custom:
Author: acurtis-evi
Issue: "7117"
11 changes: 9 additions & 2 deletions core/dbt/constants.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import os

SECRET_ENV_PREFIX = "DBT_ENV_SECRET_"
DEFAULT_ENV_PLACEHOLDER = "DBT_DEFAULT_PLACEHOLDER"
METADATA_ENV_PREFIX = "DBT_ENV_CUSTOM_ENV_"

MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
MAXIMUM_SEED_SIZE_NAME = "1MB"
def get_max_seed_size():
mx = os.getenv('DBT_MAXIMUM_SEED_SIZE', '1')
return int(mx)

DEFAULT_MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
MAXIMUM_SEED_SIZE = get_max_seed_size() * DEFAULT_MAXIMUM_SEED_SIZE
MAXIMUM_SEED_SIZE_NAME = str(get_max_seed_size()) + "MB"
acurtis-evi marked this conversation as resolved.
Show resolved Hide resolved

PIN_PACKAGE_URL = (
"https://docs.getdbt.com/docs/package-management#section-specifying-package-versions"
Expand Down
7 changes: 2 additions & 5 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from mashumaro.types import SerializableType
from typing import List, Optional, Union, Dict, Any

from dbt.constants import MAXIMUM_SEED_SIZE
from dbt.dataclass_schema import dbtClassMixin, StrEnum

from .util import SourceKey
Expand Down Expand Up @@ -65,10 +64,8 @@ def original_file_path(self) -> str:
# name, should it?
return os.path.join(self.searched_path, self.relative_path)

def seed_too_large(self) -> bool:
"""Return whether the file this represents is over the seed size limit"""
return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE

def file_size(self) -> int:
return os.stat(self.full_path).st_size

@dataclass
class FileHash(dbtClassMixin):
Expand Down
8 changes: 6 additions & 2 deletions core/dbt/parser/read_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from dbt.parser.search import filesystem_search
from typing import Optional

from dbt.constants import MAXIMUM_SEED_SIZE, DEFAULT_MAXIMUM_SEED_SIZE

# This loads the files contents and creates the SourceFile object
def load_source_file(
Expand Down Expand Up @@ -94,14 +95,17 @@ def validate_yaml(file_path, dct):

# Special processing for big seed files
def load_seed_source_file(match: FilePath, project_name) -> SourceFile:
if match.seed_too_large():
if match.file_size() < MAXIMUM_SEED_SIZE:
# We don't want to calculate a hash of this file. Use the path.
source_file = SourceFile.big_seed(match)
else:
elif match.file_size() <= DEFAULT_MAXIMUM_SEED_SIZE:
file_contents = load_file_contents(match.absolute_path, strip=False)
checksum = FileHash.from_contents(file_contents)
source_file = SourceFile(path=match, checksum=checksum)
source_file.contents = ""
else:
# Do new hash method
pass
source_file.parse_file_type = ParseFileType.Seed
source_file.project_name = project_name
return source_file
Expand Down