From 2eec48bfb2bb69198eebeb74569621cdb1afb56a Mon Sep 17 00:00:00 2001 From: winnie <91998347+gwenwindflower@users.noreply.github.com> Date: Wed, 20 Mar 2024 14:46:28 -0500 Subject: [PATCH] [WT-7] Improve Taskfile and development flow (#17) This PR seeks to improve the Taskfile easy setup path. In doing so, it removes SQLFluff temporarily until a path forward for using SQFluff and the dbt Cloud CLI can be resolved. SQLFluff's dbt templater need dbt-core to run, which eats the dbt bin in the virtual environment that would otherwise be the Cloud CLI. It's more important that this repo supports the Cloud CLI for the time being, so we're removing SQLFluff. SQL linting is still performed by pre-commit, as it installs isolated environments in which to run all its hooks, so SQL linting on commit as well as on demand via pre-commit commands is still available. --- .gitignore | 2 + .pre-commit-config.yaml | 14 +-- README.md | 34 ++++--- Taskfile.yml | 20 ++-- dbt_project.yml | 5 +- profiles-example.yml | 19 ---- profiles.yml | 25 ----- requirements.in | 3 - requirements.txt | 200 +--------------------------------------- 9 files changed, 47 insertions(+), 275 deletions(-) delete mode 100644 profiles-example.yml delete mode 100644 profiles.yml diff --git a/.gitignore b/.gitignore index dc7d127b..4d4b1afc 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ dbt_packages/ logs/ .DS_Store + +.user.yml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 38f042f6..6431b73f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,26 +7,26 @@ repos: - id: trailing-whitespace - id: requirements-txt-fixer - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.2.1 + rev: v0.3.3 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - repo: https://github.com/sqlfluff/sqlfluff - rev: "2.3.5" + rev: "3.0.1" hooks: - id: sqlfluff-lint additional_dependencies: [ - "dbt-metricflow[duckdb,snowflake,postgres]~=0.5.0", - "sqlfluff-templater-dbt~=2.3.5", + "dbt-metricflow[duckdb,snowflake,postgres]~=0.6.0", + "sqlfluff-templater-dbt~=3.0.1", ] - id: sqlfluff-fix additional_dependencies: [ - "dbt-metricflow[duckdb,snowflake,postgres]~=0.5.0", - "sqlfluff-templater-dbt~=2.3.5", + "dbt-metricflow[duckdb,snowflake,postgres]~=0.6.0", + "sqlfluff-templater-dbt~=3.0.1", ] - repo: https://github.com/psf/black - rev: "24.2.0" + rev: "24.3.0" hooks: - id: black diff --git a/README.md b/README.md index 93f96bdf..53db11ea 100644 --- a/README.md +++ b/README.md @@ -16,21 +16,24 @@ This is a sandbox project for exploring the basic functionality and latest featu ## Platform setup -### dbt Cloud IDE (most beginner friendly) +1. Set up a dbt Cloud account and follow Step 4 in the [Quickstart instructions for your data platform](https://docs.getdbt.com/quickstarts), to connect your platform to dbt Cloud, then follow one of the two paths below to set up your development environment. -1. Set up a dbt Cloud account and follow Step 4 in the [Quickstart instructions for your data platform](https://docs.getdbt.com/quickstarts), to connect your platform to dbt Cloud. +### dbt Cloud IDE (most beginner friendly) -2. Choose the repo you created in Step 1 as the repository for your dbt Project code. +1. Choose the repo you created in Step 1 as the repository for your dbt Project code. -3. Click `Develop` in the top nav, you should be prompted to run a `dbt deps`, which you should do. +2. Click `Develop` in the top nav, you should be prompted to run a `dbt deps`, which you should do. ### dbt Cloud CLI (if you prefer to work locally) -> If you'd like to use the dbt Cloud CLI, but are a little intimidated by the terminal, we've included a task runner called, fittingly, `task`. It's a simple way to run the commands you need to get started with dbt. You can install it by following the instructions [here](https://taskfile.dev/#/installation). We'll call out the `task` based alternative to each command below. +> [!NOTE] +> If you'd like to use the dbt Cloud CLI, but are a little intimidated by the terminal, we've included a task runner called, fittingly, `task`. It's a simple way to run the commands you need to get started with dbt. You can install it by following the instructions [here](https://taskfile.dev/#/installation). We'll call out the `task` based alternative to each command below. You can also run `task setup` to perform all the setup commands at once. 1. Run `git clone [new repo name]` to clone your new repo to your local machine. -2. Set up a virtual environment and activate it. I like to call my virtual environment `.venv` and add it to my `.gitignore` file (we've already done this if you name your virtual environment '.venv') so that I don't accidentally commit it to the repository, but you can call it whatever you want. +2. [Follow Step 1 on this page](https://cloud.getdbt.com/cloud-cli) to install the dbt Cloud CLI, we'll do the other steps in a second. + +3. Set up a virtual environment and activate it. I like to call my virtual environment `.venv` and add it to my `.gitignore` file (we've already done this if you name your virtual environment '.venv') so that I don't accidentally commit it to the repository, but you can call it whatever you want. ```shell python3 -m venv .venv # create a virtual environment @@ -40,7 +43,7 @@ This is a sandbox project for exploring the basic functionality and latest featu source .venv/bin/activate # activate the virtual environment ``` -3. Install the project's requirements into your virtual environment. +4. Install the project's requirements into your virtual environment. ```shell python3 -m pip install -r requirements.txt # install the project's requirements @@ -48,26 +51,27 @@ This is a sandbox project for exploring the basic functionality and latest featu task install # install the project's requirements ``` -4. [Follow steps 2 and 3 on this page](https://cloud.getdbt.com/cloud-cli) to setup dbt Cloud CLI's connection to dbt Cloud, only if you haven't already done so (we handled step 1 above and will do step 4 together next). +5. [Follow steps 2 and 3 on this page](https://cloud.getdbt.com/cloud-cli) to setup dbt Cloud CLI's connection to dbt Cloud, only if you haven't already done so (we handled step 1 above and will do step 4 together next). -5. Double check that your `dbt_project.yml` is set up correctly by running `dbt list`. You should get back a list of models and tests in your project. +6. Double check that your `dbt_project.yml` is set up correctly by running `dbt list`. You should get back a list of models and tests in your project. ## Project setup Once your development platform of choice is set up, use the following steps to get the project ready for whatever you'd like to do with it. -1. Run `task setup`. - -#### OR - 1. Run `dbt build` to load the sample data into your raw schema, build your models, and test your project. 2. Delete the `jaffle-data` directory now that the raw data is loaded into the warehouse. It will be loaded into a `raw_jaffle_shop` schema in your warehouse. That both `dev` and `prod` targets are set up to use. Take a look at the `generate_schema_name` macro in the `macros` directory to if you're curious how this is done. +#### OR + +1. Run `task build`. + ## Pre-commit and linting with SQLFluff This project uses a tool called [pre-commit](https://pre-commit.com/) to automatically run a suite of of processes on your code, like linters and formatters, when you commit. If it finds an issue and updates a file, you'll need to stage the changes and commit them again (the first commit will not have gone through because pre-commit found and fixed an issue). The outcome of this is that your code will be more consistent automatically, and everybody's changes will be running through the same set of processes. We recommend it for any project. You can see the configuration for pre-commit in the `.pre-commit-config.yaml` file. You can run the checks manually with `pre-commit run --all-files` to see what it does without making a commit. -The most important pre-commit hook that runs in this project is [SQLFluff](https://sqlfluff.com/), which will lint your SQL code. It's configured with the `.sqlfluff` file in the root of the project. You can also run this manually, either to lint your code or to fix it automatically (which also functions loosely as a fairly relaxed formatter), with `sqlfluff lint` and `sqlfluff fix` respectively, but if you don't, it will still run whenever you commit to ensure the committed code is consistent. +The most important pre-commit hook that runs in this project is [SQLFluff](https://sqlfluff.com/), which will lint your SQL code. It's configured with the `.sqlfluff` file in the root of the project. You can also run this manually, either to lint your code or to fix it automatically (which also functions loosely as a fairly relaxed formatter), with `pre-commit run sqlfluff-lint` or `pre-commit run sqlfluff-fix` respectively, but if you don't, it will still run whenever you commit to ensure the committed code is consistent. -SQLFluff offers a few different templating options to deal with Jinja in SQL, this project uses the `dbt` templater, which actually compiles your code with your dbt Core, to ensure maximum correctness. While dbt Cloud does not make use of a `profiles.yml`, dbt Core does, to define how it connects to your warehouse. Therefore, this project needs a `profiles.yml` file for SQLFluff to run. We've included a functioning `profiles.yml` file in the project, as well as a heavily commented `profiles-example.yml` for you to fill out and replace it with. Make sure to do this if you want to run SQLFluff. If you don't, you can remove the `.sqlfluff` and `.sqlfluffignore` files and the `sqlfluff` commands from the `.pre-commit-config.yaml` file to get rid of these checks. The same is true for any other pre-commit hooks you don't want to run. +> [!NOTE] +> SQLFluff's dbt templater relies on dbt Core, which conflicts with dbt Cloud CLI for the time being. Thankfully, pre-commit installs its hooks into isolated environments, so you can still use SQLFluff with dbt Cloud CLI via pre-commit, but you can't call SQLFluff directly. The dbt Labs team is actively working on a solution for this issue. diff --git a/Taskfile.yml b/Taskfile.yml index 16b65dd5..dc47af8c 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -7,13 +7,19 @@ tasks: install: cmds: - - python3 -m pip install --upgrade pip - - python3 -m pip install --progress-bar off -- -r requirements.txt + - source .venv/bin/activate && python3 -m pip install --upgrade pip + - source .venv/bin/activate && python3 -m pip install -r requirements.txt --progress-bar off - setup: + build: cmds: - - dbt deps - - dbt seed + - source .venv/bin/activate && dbt deps + - source .venv/bin/activate && dbt seed - rm -rf jaffle-data - - dbt run - - dbt test + - source .venv/bin/activate && dbt run + - source .venv/bin/activate && dbt test + + setup: + cmds: + - task: venv + - task: install + - task: build diff --git a/dbt_project.yml b/dbt_project.yml index 17fec684..db8bb68d 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -6,13 +6,10 @@ require-dbt-version: ">=1.7.1" dbt-cloud: project-id: 283328 # Put your project id here -# dbt Cloud CLI does not use a profiles.yml, this is for -# SQLFluff's dbt templater -profile: snowflake #Put your profile name here model-paths: ["models"] analysis-paths: ["analyses"] -test-paths: ["tests"] +test-paths: ["data-tests"] seed-paths: ["jaffle-data"] macro-paths: ["macros"] snapshot-paths: ["snapshots"] diff --git a/profiles-example.yml b/profiles-example.yml deleted file mode 100644 index 15aa5bc4..00000000 --- a/profiles-example.yml +++ /dev/null @@ -1,19 +0,0 @@ -# dbt Cloud CLI does not use a profiles.yml file. -# We need one for sqlfluff to run the dbt templater in CI, -# which utilizes dbt-core internally. - -your-profile-name: - target: dev # this is the default if no target is specified - # since this is just for the linter we'll just have one default dev target - outputs: - dev: - type: # snowflake, bigquery, etc - account: # if you're going to commit your profile make this an env var like below - user: #"{{ env_var('SNOWFLAKE_SANDBOX_USER') }}" - authenticator: externalbrowser # prefer not using a password, if you do make it an env var - database: analytics # database to target - warehouse: transforming # the compute if needed for your platform - # you can find more about the configs for your platform here: - # https://docs.getdbt.com/docs/trusted-adapters - schema: # give this a descriptive name like dbt_your_name - threads: 8 # generally go as high as your platform will support, this would be fine much higher diff --git a/profiles.yml b/profiles.yml deleted file mode 100644 index 6f05870a..00000000 --- a/profiles.yml +++ /dev/null @@ -1,25 +0,0 @@ -# dbt Cloud CLI does not use a profiles.yml file. -# We need one for sqlfluff to run the dbt templater in CI, -# which utilizes dbt-core internally. - -snowflake: - target: dev - outputs: - dev: - type: snowflake - account: "{{ env_var('SNOWFLAKE_SANDBOX_ACCOUNT') }}" - user: "{{ env_var('SNOWFLAKE_SANDBOX_USER') }}" - authenticator: externalbrowser - database: analytics - warehouse: transforming - schema: dbt_winnie - threads: 8 - prod: - type: snowflake - account: "{{ env_var('SNOWFLAKE_SANDBOX_ACCOUNT') }}" - user: "{{ env_var('SNOWFLAKE_SANDBOX_USER') }}" - authenticator: externalbrowser - database: analytics - warehouse: transforming - schema: dx_sandbox_prod - threads: 8 diff --git a/requirements.in b/requirements.in index b2f24ff1..9f247ade 100644 --- a/requirements.in +++ b/requirements.in @@ -1,4 +1 @@ -dbt~=1.0.0.36.0 pre-commit~=3.6.0 -sqlfluff~=2.3.5 -sqlfluff-templater-dbt~=2.3.5 diff --git a/requirements.txt b/requirements.txt index dfb6bd70..4e4a353e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,211 +1,21 @@ -# This file was autogenerated by uv v0.1.5 via the following command: +# This file was autogenerated by uv via the following command: # uv pip compile requirements.in -o requirements.txt -agate==1.7.1 - # via dbt-core -annotated-types==0.6.0 - # via pydantic -appdirs==1.4.4 - # via sqlfluff -attrs==23.2.0 - # via - # jsonschema - # referencing -babel==2.14.0 - # via agate -certifi==2024.2.2 - # via requests -cffi==1.16.0 - # via dbt-core cfgv==3.4.0 # via pre-commit -chardet==5.2.0 - # via - # diff-cover - # sqlfluff -charset-normalizer==3.3.2 - # via requests -click==8.1.7 - # via - # dbt-core - # dbt-semantic-interfaces - # sqlfluff -colorama==0.4.6 - # via - # dbt-core - # sqlfluff -dbt==1.0.0.36.4 -dbt-core==1.7.8 - # via sqlfluff-templater-dbt -dbt-extractor==0.5.1 - # via dbt-core -dbt-semantic-interfaces==0.4.3 - # via dbt-core -diff-cover==8.0.3 - # via sqlfluff distlib==0.3.8 # via virtualenv -exceptiongroup==1.2.0 - # via pytest filelock==3.13.1 # via virtualenv identify==2.5.35 # via pre-commit -idna==3.6 - # via - # dbt-core - # requests -importlib-metadata==6.11.0 - # via dbt-semantic-interfaces -iniconfig==2.0.0 - # via pytest -isodate==0.6.1 - # via - # agate - # dbt-core -jinja2==3.1.3 - # via - # dbt-core - # dbt-semantic-interfaces - # diff-cover - # jinja2-simple-tags - # sqlfluff -jinja2-simple-tags==0.5.0 - # via sqlfluff-templater-dbt -jsonschema==4.21.1 - # via - # dbt-core - # dbt-semantic-interfaces -jsonschema-specifications==2023.12.1 - # via jsonschema -leather==0.3.4 - # via agate -logbook==1.5.3 - # via dbt-core -markdown-it-py==3.0.0 - # via rich -markupsafe==2.1.5 - # via - # jinja2 - # sqlfluff-templater-dbt -mashumaro==3.12 - # via dbt-core -mdurl==0.1.2 - # via markdown-it-py -minimal-snowplow-tracker==0.0.2 - # via dbt-core -more-itertools==10.2.0 - # via dbt-semantic-interfaces -msgpack==1.0.7 - # via mashumaro -networkx==3.2.1 - # via dbt-core nodeenv==1.8.0 # via pre-commit -packaging==23.2 - # via - # dbt-core - # pytest -parsedatetime==2.6 - # via agate -pathspec==0.11.2 - # via - # dbt-core - # sqlfluff platformdirs==4.2.0 # via virtualenv -pluggy==1.4.0 - # via - # diff-cover - # pytest pre-commit==3.6.2 -protobuf==4.25.3 - # via dbt-core -pycparser==2.21 - # via cffi -pydantic==2.6.1 - # via - # dbt-semantic-interfaces - # sqlfluff-templater-dbt -pydantic-core==2.16.2 - # via pydantic -pygments==2.17.2 - # via - # diff-cover - # rich -pytest==8.0.1 - # via sqlfluff -python-dateutil==2.8.2 - # via dbt-semantic-interfaces -python-slugify==8.0.4 - # via agate -pytimeparse==1.1.8 - # via agate -pytz==2024.1 - # via dbt-core pyyaml==6.0.1 - # via - # dbt-core - # dbt-semantic-interfaces - # pre-commit - # sqlfluff -referencing==0.33.0 - # via - # jsonschema - # jsonschema-specifications -regex==2023.12.25 - # via sqlfluff -requests==2.31.0 - # via - # dbt-core - # minimal-snowplow-tracker -rich==13.7.0 - # via sqlfluff-templater-dbt -rpds-py==0.18.0 - # via - # jsonschema - # referencing -ruamel-yaml==0.18.6 - # via sqlfluff-templater-dbt -ruamel-yaml-clib==0.2.8 - # via ruamel-yaml -setuptools==69.1.0 - # via - # dbt - # nodeenv -six==1.16.0 - # via - # isodate - # leather - # minimal-snowplow-tracker - # python-dateutil -sqlfluff==2.3.5 - # via sqlfluff-templater-dbt -sqlfluff-templater-dbt==2.3.5 -sqlparse==0.4.4 - # via dbt-core -tblib==3.0.0 - # via sqlfluff -text-unidecode==1.3 - # via python-slugify -toml==0.10.2 - # via sqlfluff -tomli==2.0.1 - # via pytest -tqdm==4.66.2 - # via sqlfluff -typing-extensions==4.9.0 - # via - # dbt-core - # dbt-semantic-interfaces - # mashumaro - # pydantic - # pydantic-core - # sqlfluff -urllib3==1.26.18 - # via - # dbt-core - # requests -virtualenv==20.25.0 # via pre-commit -zipp==3.17.0 - # via importlib-metadata +setuptools==69.2.0 + # via nodeenv +virtualenv==20.25.1 + # via pre-commit