From e3db437378dcd42e2d450a4a955e0e2fd5397e21 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Thu, 11 Aug 2022 18:00:46 -0400 Subject: [PATCH 01/20] add generate_model_import_ctes macro and integration test --- dbt_project.yml | 2 +- .../models/model_without_import_ctes.sql | 30 ++++++ .../tests/test_generate_model_import_ctes.sql | 79 +++++++++++++++ ...erate_model_import_ctes_case_sensitive.sql | 80 ++++++++++++++++ .../test_generate_source_some_tables.sql | 2 +- ...est_generate_source_table_descriptions.sql | 2 +- .../tests/test_generate_source_table_name.sql | 2 +- macros/generate_model_import_ctes.sql | 95 +++++++++++++++++++ 8 files changed, 288 insertions(+), 4 deletions(-) create mode 100644 integration_tests/models/model_without_import_ctes.sql create mode 100644 integration_tests/tests/test_generate_model_import_ctes.sql create mode 100644 integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql create mode 100644 macros/generate_model_import_ctes.sql diff --git a/dbt_project.yml b/dbt_project.yml index 05cc6d7..b3358d0 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,7 +1,7 @@ name: 'codegen' version: '0.5.0' -require-dbt-version: [">=1.0.0", "<2.0.0"] +require-dbt-version: [">=1.2.0", "<2.0.0"] config-version: 2 target-path: "target" diff --git a/integration_tests/models/model_without_import_ctes.sql b/integration_tests/models/model_without_import_ctes.sql new file mode 100644 index 0000000..9bdee44 --- /dev/null +++ b/integration_tests/models/model_without_import_ctes.sql @@ -0,0 +1,30 @@ +with my_first_cte as ( + select + a.col_a, + b.col_b + from {{ ref('data__a_relation') }} as a + left join {{ ref('data__b_relation') }} as b + on a.col_a = b.col_b + left join {{ ref('data__a_relation') }} as aa + on a.col_a = aa.col_b +), +my_second_cte as ( + select + 1 as id + from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table + union all + select + 2 as id + from {{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table_case_sensitive') }} +) +-- my_third_cte as ( +-- select +-- a.col_a, +-- b.col_b +-- from `raw_relation_1` as a +-- left join "raw_relation_2" as b +-- on a.col_a = b.col_b +-- left join [raw_relation_3] as aa +-- on a.col_a = aa.col_b +-- ) +select * from my_second_cte \ No newline at end of file diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql new file mode 100644 index 0000000..2c51a1d --- /dev/null +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -0,0 +1,79 @@ +{% set actual_model_with_import_ctes = codegen.generate_model_import_ctes( + model_name = 'model_without_import_ctes', + ) +%} + +{% set expected_model_with_import_ctes %} +with codegen_integration_tests__data_source_table as ( + + select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + +), +codegen_integration_tests__data_source_table_case_sensitive as ( + + select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table_case_sensitive') }}{% endraw %} + -- CAUTION: It's best practice to create staging layer for raw sources + +), +data__a_relation as ( + + select * from {% raw %}{{ ref('data__a_relation') }}{% endraw %} + +), +data__b_relation as ( + + select * from {% raw %}{{ ref('data__b_relation') }}{% endraw %} + +), +raw_relation_1 as ( + + select * from `raw_relation_1` + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + +), +raw_relation_2 as ( + + select * from "raw_relation_2" + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + +), +raw_relation_3 as ( + + select * from [raw_relation_3] + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + +), +my_first_cte as ( + select + a.col_a, + b.col_b + from data__a_relation as a + left join data__b_relation as b + on a.col_a = b.col_b + left join data__a_relation as aa + on a.col_a = aa.col_b +), +my_second_cte as ( + select + 1 as id + from codegen_integration_tests__data_source_table + union all + select + 2 as id + from codegen_integration_tests__data_source_table_case_sensitive +) +-- my_third_cte as ( +-- select +-- a.col_a, +-- b.col_b +-- from raw_relation_1 as a +-- left join raw_relation_2 as b +-- on a.col_a = b.col_b +-- left join raw_relation_3 as aa +-- on a.col_a = aa.col_b +-- ) +select * from my_second_cte +{% endset %} + +{{ assert_equal (actual_model_with_import_ctes | trim, expected_model_with_import_ctes | trim) }} \ No newline at end of file diff --git a/integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql b/integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql new file mode 100644 index 0000000..072e715 --- /dev/null +++ b/integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql @@ -0,0 +1,80 @@ +{% set actual_model_with_import_ctes = codegen.generate_model_import_ctes( + model_name = 'model_without_import_ctes', + leading_commas = true + ) +%} + +{% set expected_model_with_import_ctes %} +with codegen_integration_tests__data_source_table as ( + + select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + +) +,codegen_integration_tests__data_source_table_case_sensitive as ( + + select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table_case_sensitive') }}{% endraw %} + -- CAUTION: It's best practice to create staging layer for raw sources + +) +,data__a_relation as ( + + select * from {% raw %}{{ ref('data__a_relation') }}{% endraw %} + +) +,data__b_relation as ( + + select * from {% raw %}{{ ref('data__b_relation') }}{% endraw %} + +) +,raw_relation_1 as ( + + select * from `raw_relation_1` + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + +) +,raw_relation_2 as ( + + select * from "raw_relation_2" + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + +) +,raw_relation_3 as ( + + select * from [raw_relation_3] + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + +) +,my_first_cte as ( + select + a.col_a, + b.col_b + from data__a_relation as a + left join data__b_relation as b + on a.col_a = b.col_b + left join data__a_relation as aa + on a.col_a = aa.col_b +), +my_second_cte as ( + select + 1 as id + from codegen_integration_tests__data_source_table + union all + select + 2 as id + from codegen_integration_tests__data_source_table_case_sensitive +) +-- my_third_cte as ( +-- select +-- a.col_a, +-- b.col_b +-- from raw_relation_1 as a +-- left join raw_relation_2 as b +-- on a.col_a = b.col_b +-- left join raw_relation_3 as aa +-- on a.col_a = aa.col_b +-- ) +select * from my_second_cte +{% endset %} + +{{ assert_equal (actual_model_with_import_ctes | trim, expected_model_with_import_ctes | trim) }} \ No newline at end of file diff --git a/integration_tests/tests/test_generate_source_some_tables.sql b/integration_tests/tests/test_generate_source_some_tables.sql index 3d6fae6..525f712 100644 --- a/integration_tests/tests/test_generate_source_some_tables.sql +++ b/integration_tests/tests/test_generate_source_some_tables.sql @@ -14,7 +14,7 @@ version: 2 sources: - - name: {{ raw_schema | trim }} + - name: {{ raw_schema | trim | lower }} description: "" tables: - name: data__a_relation diff --git a/integration_tests/tests/test_generate_source_table_descriptions.sql b/integration_tests/tests/test_generate_source_table_descriptions.sql index a8a1922..772c637 100644 --- a/integration_tests/tests/test_generate_source_table_descriptions.sql +++ b/integration_tests/tests/test_generate_source_table_descriptions.sql @@ -8,7 +8,7 @@ version: 2 sources: - - name: {{ raw_schema | trim }} + - name: {{ raw_schema | trim | lower }} description: "" tables: - name: data__a_relation diff --git a/integration_tests/tests/test_generate_source_table_name.sql b/integration_tests/tests/test_generate_source_table_name.sql index 968f7e3..a636ac0 100644 --- a/integration_tests/tests/test_generate_source_table_name.sql +++ b/integration_tests/tests/test_generate_source_table_name.sql @@ -9,7 +9,7 @@ version: 2 sources: - name: raw - schema: {{ raw_schema | trim }} + schema: {{ raw_schema | trim | lower }} tables: - name: data__a_relation - name: data__b_relation diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql new file mode 100644 index 0000000..2051a0f --- /dev/null +++ b/macros/generate_model_import_ctes.sql @@ -0,0 +1,95 @@ +{% macro generate_model_import_ctes(model_name, leading_commas = false) %} + + {%- if execute -%} + {%- set nodes = graph.nodes.values() -%} + + {%- set model = (nodes + | selectattr('name', 'equalto', model_name) + | selectattr('resource_type', 'equalto', 'model') + | list).pop() -%} + + {%- set model_raw_sql = model.raw_sql -%} + {%- else -%} + {%- set model_raw_sql = '' -%} + {%- endif -%} + + {#- + + REGEX Explanations + + # from_ref + - matches (from or join) followed by some spaces and then {{ref()}} + + # from_source + - matches (from or join) followed by some spaces and then {{source(,)}} + + # from_table_1 + - matches (from or join) followed by some spaces and then . + + # from_table_2 + - matches (from or join) followed by some spaces and then (` or [ or ")(` or ] or ") + + -#} + + {%- set from_regexes = { + 'from_ref':'(?i)(from|join)\s+({{\s*ref\s*\()([^)]+)(\)\s*}})', + 'from_source':'(?i)(from|join)\s+({{\s*source\s*\([^)]+,)([^)]+)(\)\s*}})', + 'from_table_1':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?\.)([\[`\"]?\w+[\]`\"]?)', + 'from_table_2':'(?i)(from|join)\s+([\[`\"])([\w ]+)([\]`\"])' + } -%} + + {%- set re = modules.re -%} + + {%- set from_list = [] -%} + + {%- for regex_name, regex_pattern in from_regexes.items() -%} + + {%- set all_regex_matches = re.findall(regex_pattern, model_raw_sql) -%} + + {%- for match in all_regex_matches -%} + {%- set full_from_clause = match[1:]|join|trim -%} + {%- set cte_name = match[2]|replace("'","")|trim|lower -%} + {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%} + {%- do from_list.append(match_tuple) -%} + + {%- endfor -%} + + {%- endfor -%} + + {%- set ns = namespace(model_sql = model_raw_sql) -%} + {%- set unique_from_list = set(from_list) -%} + +{%- set model_import_ctes -%} + {%- for from_obj in unique_from_list|sort -%} + + {%- set ns.model_sql = ns.model_sql|replace(from_obj[1], from_obj[0]) %} + +{%- if loop.first -%}with {% else -%}{%- if leading_commas -%},{%- endif -%}{%- endif -%}{{ from_obj[0] }} as ( + + select * from {{ from_obj[1] }} + {%- if from_obj[2] == 'from_source' and from_list|length > 1 %} + -- CAUTION: It's best practice to create staging layer for raw sources + {%- elif from_obj[2] == 'from_table_1' or from_obj[2] == 'from_table_2' %} + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + {%- endif %} + +){%- if not leading_commas -%},{%- endif %} +{% endfor -%} + +{%- if leading_commas -%} +{%- set replace_with = ',' -%} +{%- else -%} +{%- set replace_with = '' -%} +{%- endif -%} + +{{ re.sub('(?i)with\s', replace_with, ns.model_sql, 1) }} +{%- endset -%} + +{%- if execute -%} + +{{ log(model_import_ctes, info=True) }} +{% do return(model_import_ctes) %} + +{% endif %} + +{% endmacro %} \ No newline at end of file From 8d27fbc132341e3e35d8d84febc6b4f749e12574 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Thu, 11 Aug 2022 18:09:03 -0400 Subject: [PATCH 02/20] update readme --- README.md | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d31f437..49e5d3f 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,22 @@ Macros that generate dbt code, and log it to the command line. # Contents -* [generate_source](#generate_source-source) -* [generate_base_model](#generate_base_model-source) -* [generate_model_yaml](#generate_model_yaml-source) +- [dbt-codegen](#dbt-codegen) +- [Contents](#contents) +- [Installation instructions](#installation-instructions) +- [Macros](#macros) + - [generate_source (source)](#generate_source-source) + - [Arguments](#arguments) + - [Usage:](#usage) + - [generate_base_model (source)](#generate_base_model-source) + - [Arguments:](#arguments-1) + - [Usage:](#usage-1) + - [generate_model_yaml (source)](#generate_model_yaml-source) + - [Arguments:](#arguments-2) + - [Usage:](#usage-2) + - [generate_model_import_ctes (source)](#generate_model_import_ctes-source) + - [Arguments:](#arguments-3) + - [Usage:](#usage-3) # Installation instructions New to dbt packages? Read more about them [here](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/). @@ -164,3 +177,27 @@ models: ``` 4. Paste the output in to a schema.yml file, and refactor as required. + +## generate_model_import_ctes ([source](macros/generate_model_import_ctes.sql)) +This macro generates the SQL for a given model with all references pulled up into import CTEs, which you can then paste back into the model. + +### Arguments: +* `model_name` (required): The model table you wish to generate SQL with import CTEs for. +* `leading_commas` (optional, default = false): Whether you want your commas to be leading (vs trailing). + +### Usage: +1. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code. + +``` +{{ generate_model_import_ctes( + model_name = 'my_dbt_model' +) }} +``` + +Alternatively, call the macro as an [operation](https://docs.getdbt.com/docs/using-operations): + +``` +$ dbt run-operation generate_model_import_ctes --args '{"model_name": "my_dbt_model"}' +``` + +2. Replace the contents of the model's current SQL file with the compiled or logged code \ No newline at end of file From 32063a68351a9aae4aac9a05a8f28d3ec8294aea Mon Sep 17 00:00:00 2001 From: Grace Goheen <53586774+graciegoheen@users.noreply.github.com> Date: Thu, 11 Aug 2022 18:12:50 -0400 Subject: [PATCH 03/20] Update README.md --- README.md | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 49e5d3f..7801a35 100644 --- a/README.md +++ b/README.md @@ -3,22 +3,10 @@ Macros that generate dbt code, and log it to the command line. # Contents -- [dbt-codegen](#dbt-codegen) -- [Contents](#contents) -- [Installation instructions](#installation-instructions) -- [Macros](#macros) - - [generate_source (source)](#generate_source-source) - - [Arguments](#arguments) - - [Usage:](#usage) - - [generate_base_model (source)](#generate_base_model-source) - - [Arguments:](#arguments-1) - - [Usage:](#usage-1) - - [generate_model_yaml (source)](#generate_model_yaml-source) - - [Arguments:](#arguments-2) - - [Usage:](#usage-2) - - [generate_model_import_ctes (source)](#generate_model_import_ctes-source) - - [Arguments:](#arguments-3) - - [Usage:](#usage-3) +* [generate_source](#generate_source-source) +* [generate_base_model](#generate_base_model-source) +* [generate_model_yaml](#generate_model_yaml-source) +* [generate_model_import_ctes](#generate_model_import_ctes-source) # Installation instructions New to dbt packages? Read more about them [here](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/). From c5d0f825100c744fe4dcb96e8c95e9db51ec495f Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Thu, 11 Aug 2022 18:20:36 -0400 Subject: [PATCH 04/20] adjusted test example for postgres --- integration_tests/models/model_without_import_ctes.sql | 4 ++-- integration_tests/tests/test_generate_model_import_ctes.sql | 4 ++-- .../tests/test_generate_model_import_ctes_case_sensitive.sql | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/integration_tests/models/model_without_import_ctes.sql b/integration_tests/models/model_without_import_ctes.sql index 9bdee44..fd725b0 100644 --- a/integration_tests/models/model_without_import_ctes.sql +++ b/integration_tests/models/model_without_import_ctes.sql @@ -4,9 +4,9 @@ with my_first_cte as ( b.col_b from {{ ref('data__a_relation') }} as a left join {{ ref('data__b_relation') }} as b - on a.col_a = b.col_b + on a.col_a = b.col_a left join {{ ref('data__a_relation') }} as aa - on a.col_a = aa.col_b + on a.col_a = aa.col_a ), my_second_cte as ( select diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index 2c51a1d..df59402 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -50,9 +50,9 @@ my_first_cte as ( b.col_b from data__a_relation as a left join data__b_relation as b - on a.col_a = b.col_b + on a.col_a = b.col_a left join data__a_relation as aa - on a.col_a = aa.col_b + on a.col_a = aa.col_a ), my_second_cte as ( select diff --git a/integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql b/integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql index 072e715..1bfba3c 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql @@ -51,9 +51,9 @@ with codegen_integration_tests__data_source_table as ( b.col_b from data__a_relation as a left join data__b_relation as b - on a.col_a = b.col_b + on a.col_a = b.col_a left join data__a_relation as aa - on a.col_a = aa.col_b + on a.col_a = aa.col_a ), my_second_cte as ( select From 6634b98bdb16a9ad72a49e07c4c322f621b552d3 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Fri, 12 Aug 2022 09:27:07 -0400 Subject: [PATCH 05/20] added support for config blocks --- .../models/model_without_import_ctes.sql | 4 ++ .../tests/test_generate_model_import_ctes.sql | 4 ++ ...st_generate_model_import_ctes_leading.sql} | 4 ++ macros/generate_model_import_ctes.sql | 46 +++++++++++++++---- 4 files changed, 50 insertions(+), 8 deletions(-) rename integration_tests/tests/{test_generate_model_import_ctes_case_sensitive.sql => test_generate_model_import_ctes_leading.sql} (97%) diff --git a/integration_tests/models/model_without_import_ctes.sql b/integration_tests/models/model_without_import_ctes.sql index fd725b0..a20889b 100644 --- a/integration_tests/models/model_without_import_ctes.sql +++ b/integration_tests/models/model_without_import_ctes.sql @@ -1,3 +1,7 @@ +{{ config( + materialized='table', +) }} + with my_first_cte as ( select a.col_a, diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index df59402..9fa09d9 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -4,6 +4,10 @@ %} {% set expected_model_with_import_ctes %} +{% raw %}{{ config( + materialized='table', +) }}{% endraw %} + with codegen_integration_tests__data_source_table as ( select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table diff --git a/integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql b/integration_tests/tests/test_generate_model_import_ctes_leading.sql similarity index 97% rename from integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql rename to integration_tests/tests/test_generate_model_import_ctes_leading.sql index 1bfba3c..3406a1f 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_case_sensitive.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_leading.sql @@ -5,6 +5,10 @@ %} {% set expected_model_with_import_ctes %} +{% raw %}{{ config( + materialized='table', +) }}{% endraw %} + with codegen_integration_tests__data_source_table as ( select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index 2051a0f..3b94bbb 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -29,40 +29,63 @@ # from_table_2 - matches (from or join) followed by some spaces and then (` or [ or ")(` or ] or ") + # config block + - matches some spaces followed by {{config()}} + -#} {%- set from_regexes = { 'from_ref':'(?i)(from|join)\s+({{\s*ref\s*\()([^)]+)(\)\s*}})', 'from_source':'(?i)(from|join)\s+({{\s*source\s*\([^)]+,)([^)]+)(\)\s*}})', 'from_table_1':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?\.)([\[`\"]?\w+[\]`\"]?)', - 'from_table_2':'(?i)(from|join)\s+([\[`\"])([\w ]+)([\]`\"])' + 'from_table_2':'(?i)(from|join)\s+([\[`\"])([\w ]+)([\]`\"])', + 'config_block':'(?i)\s*{{\s*config\s*\([^)]+\)\s*}}' } -%} {%- set re = modules.re -%} {%- set from_list = [] -%} + {%- set config_list = [] -%} {%- for regex_name, regex_pattern in from_regexes.items() -%} {%- set all_regex_matches = re.findall(regex_pattern, model_raw_sql) -%} {%- for match in all_regex_matches -%} - {%- set full_from_clause = match[1:]|join|trim -%} - {%- set cte_name = match[2]|replace("'","")|trim|lower -%} - {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%} - {%- do from_list.append(match_tuple) -%} + + {%- if regex_name == 'config_block' -%} + {%- set match_tuple = (match|trim, regex_name) -%} + {%- do config_list.append(match_tuple) -%} + {%- else -%} + {%- set full_from_clause = match[1:]|join|trim -%} + {%- set cte_name = match[2]|replace("'","")|trim|lower -%} + {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%} + {%- do from_list.append(match_tuple) -%} + {%- endif -%} {%- endfor -%} {%- endfor -%} - {%- set ns = namespace(model_sql = model_raw_sql) -%} {%- set unique_from_list = set(from_list) -%} +{%- if unique_from_list|length > 0 -%} + +{%- set ns = namespace(model_sql = model_raw_sql) -%} + {%- set model_import_ctes -%} + + {%- for config_obj in config_list -%} + + {%- set ns.model_sql = ns.model_sql|replace(config_obj[0], '') -%} + +{{ config_obj[0] }} + +{% endfor -%} + {%- for from_obj in unique_from_list|sort -%} - {%- set ns.model_sql = ns.model_sql|replace(from_obj[1], from_obj[0]) %} + {%- set ns.model_sql = ns.model_sql|replace(from_obj[1], from_obj[0]) -%} {%- if loop.first -%}with {% else -%}{%- if leading_commas -%},{%- endif -%}{%- endif -%}{{ from_obj[0] }} as ( @@ -82,9 +105,16 @@ {%- set replace_with = '' -%} {%- endif -%} -{{ re.sub('(?i)with\s', replace_with, ns.model_sql, 1) }} +{{ re.sub('(?i)with\s', replace_with, ns.model_sql, 1)|trim }} + {%- endset -%} +{%- else -%} + +{% set model_import_ctes = model_raw_sql %} + +{%- endif -%} + {%- if execute -%} {{ log(model_import_ctes, info=True) }} From 08142a0618c8ceee027146cca94c6c03b653686f Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Fri, 12 Aug 2022 09:50:22 -0400 Subject: [PATCH 06/20] added support for comments --- integration_tests/models/model_without_import_ctes.sql | 4 ++++ integration_tests/tests/test_generate_model_import_ctes.sql | 4 ++++ .../tests/test_generate_model_import_ctes_leading.sql | 4 ++++ macros/generate_model_import_ctes.sql | 4 ++-- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/integration_tests/models/model_without_import_ctes.sql b/integration_tests/models/model_without_import_ctes.sql index a20889b..d3d446d 100644 --- a/integration_tests/models/model_without_import_ctes.sql +++ b/integration_tests/models/model_without_import_ctes.sql @@ -1,3 +1,7 @@ +/* + This is my model! +*/ + {{ config( materialized='table', ) }} diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index 9fa09d9..e43d78c 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -4,6 +4,10 @@ %} {% set expected_model_with_import_ctes %} +/* + This is my model! +*/ + {% raw %}{{ config( materialized='table', ) }}{% endraw %} diff --git a/integration_tests/tests/test_generate_model_import_ctes_leading.sql b/integration_tests/tests/test_generate_model_import_ctes_leading.sql index 3406a1f..b889c52 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_leading.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_leading.sql @@ -5,6 +5,10 @@ %} {% set expected_model_with_import_ctes %} +/* + This is my model! +*/ + {% raw %}{{ config( materialized='table', ) }}{% endraw %} diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index 3b94bbb..a99d4ab 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -30,7 +30,7 @@ - matches (from or join) followed by some spaces and then (` or [ or ")(` or ] or ") # config block - - matches some spaces followed by {{config()}} + - matches the start of the file followed by anything and then {{config()}} -#} @@ -39,7 +39,7 @@ 'from_source':'(?i)(from|join)\s+({{\s*source\s*\([^)]+,)([^)]+)(\)\s*}})', 'from_table_1':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?\.)([\[`\"]?\w+[\]`\"]?)', 'from_table_2':'(?i)(from|join)\s+([\[`\"])([\w ]+)([\]`\"])', - 'config_block':'(?i)\s*{{\s*config\s*\([^)]+\)\s*}}' + 'config_block':'(?i)(?s)^.*{{\s*config\s*\([^)]+\)\s*}}' } -%} {%- set re = modules.re -%} From 9ac5782f3a5eba98d584fc9b80dd3a4e89a47ef8 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Fri, 12 Aug 2022 10:52:21 -0400 Subject: [PATCH 07/20] added comma handling for sql with and without any CTEs --- .../models/model_without_any_ctes.sql | 3 +++ .../tests/test_generate_model_import_ctes.sql | 2 +- ...est_generate_model_import_ctes_no_ctes.sql | 17 +++++++++++++ macros/generate_model_import_ctes.sql | 24 ++++++++++++++----- 4 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 integration_tests/models/model_without_any_ctes.sql create mode 100644 integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql diff --git a/integration_tests/models/model_without_any_ctes.sql b/integration_tests/models/model_without_any_ctes.sql new file mode 100644 index 0000000..8a19c8a --- /dev/null +++ b/integration_tests/models/model_without_any_ctes.sql @@ -0,0 +1,3 @@ +select *, 2 as col2 +from {{ ref('model_without_import_ctes') }} +where id = 1 \ No newline at end of file diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index e43d78c..3717c3b 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -1,5 +1,5 @@ {% set actual_model_with_import_ctes = codegen.generate_model_import_ctes( - model_name = 'model_without_import_ctes', + model_name = 'model_without_import_ctes' ) %} diff --git a/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql new file mode 100644 index 0000000..e2e17da --- /dev/null +++ b/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql @@ -0,0 +1,17 @@ +{% set actual_model_with_import_ctes = codegen.generate_model_import_ctes( + model_name = 'model_without_any_ctes' + ) +%} + +{% set expected_model_with_import_ctes %} +with model_without_import_ctes as ( + + select * from {% raw %}{{ ref('model_without_import_ctes') }}{% endraw %} + +) +select *, 2 as col2 +from model_without_import_ctes +where id = 1 +{% endset %} + +{{ assert_equal (actual_model_with_import_ctes | trim, expected_model_with_import_ctes | trim) }} \ No newline at end of file diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index a99d4ab..4734141 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -17,6 +17,11 @@ REGEX Explanations + # with_regex + - matches (start of file followed by anything then whitespace + or whitespace + or a comma) followed by the word with then a space + # from_ref - matches (from or join) followed by some spaces and then {{ref()}} @@ -33,7 +38,12 @@ - matches the start of the file followed by anything and then {{config()}} -#} - + + {%- set re = modules.re -%} + + {%- set with_regex = '(?i)(?s)(^.*\s+|\s+|,)with\s' -%} + {%- set does_raw_sql_contain_cte = re.search(with_regex, model_raw_sql) -%} + {%- set from_regexes = { 'from_ref':'(?i)(from|join)\s+({{\s*ref\s*\()([^)]+)(\)\s*}})', 'from_source':'(?i)(from|join)\s+({{\s*source\s*\([^)]+,)([^)]+)(\)\s*}})', @@ -42,8 +52,6 @@ 'config_block':'(?i)(?s)^.*{{\s*config\s*\([^)]+\)\s*}}' } -%} - {%- set re = modules.re -%} - {%- set from_list = [] -%} {%- set config_list = [] -%} @@ -96,16 +104,20 @@ -- CAUTION: It's best practice to use the ref or source function instead of a direct reference {%- endif %} -){%- if not leading_commas -%},{%- endif %} +){%- if does_raw_sql_contain_cte and not leading_commas -%},{%- endif %} {% endfor -%} {%- if leading_commas -%} {%- set replace_with = ',' -%} {%- else -%} -{%- set replace_with = '' -%} +{%- set replace_with = '\g<1>' -%} {%- endif -%} -{{ re.sub('(?i)with\s', replace_with, ns.model_sql, 1)|trim }} +{%- if does_raw_sql_contain_cte -%} +{{ re.sub(with_regex, replace_with, ns.model_sql, 1)|trim }} +{%- else -%} +{{ ns.model_sql|trim }} +{%- endif -%} {%- endset -%} From e9d6555ccfdf2f92c81366375094b11a6f1f2870 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Fri, 12 Aug 2022 11:03:32 -0400 Subject: [PATCH 08/20] Use unique instead of set to be compatible with more versions of dbt --- dbt_project.yml | 2 +- macros/generate_model_import_ctes.sql | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/dbt_project.yml b/dbt_project.yml index b3358d0..05cc6d7 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,7 +1,7 @@ name: 'codegen' version: '0.5.0' -require-dbt-version: [">=1.2.0", "<2.0.0"] +require-dbt-version: [">=1.0.0", "<2.0.0"] config-version: 2 target-path: "target" diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index 4734141..1c2853a 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -75,9 +75,7 @@ {%- endfor -%} - {%- set unique_from_list = set(from_list) -%} - -{%- if unique_from_list|length > 0 -%} +{%- if from_list|length > 0 -%} {%- set ns = namespace(model_sql = model_raw_sql) -%} @@ -91,7 +89,7 @@ {% endfor -%} - {%- for from_obj in unique_from_list|sort -%} + {%- for from_obj in from_list|unique|sort -%} {%- set ns.model_sql = ns.model_sql|replace(from_obj[1], from_obj[0]) -%} From 6029dfc2b145a95631c404104a6acbf1bdf38706 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Fri, 12 Aug 2022 12:26:50 -0400 Subject: [PATCH 09/20] change CTE name to avoid duplicates --- .../models/model_without_import_ctes.sql | 2 +- .../tests/test_generate_model_import_ctes.sql | 10 +++++----- .../test_generate_model_import_ctes_leading.sql | 10 +++++----- macros/generate_model_import_ctes.sql | 17 +++++++++++++---- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/integration_tests/models/model_without_import_ctes.sql b/integration_tests/models/model_without_import_ctes.sql index d3d446d..ab5fee4 100644 --- a/integration_tests/models/model_without_import_ctes.sql +++ b/integration_tests/models/model_without_import_ctes.sql @@ -23,7 +23,7 @@ my_second_cte as ( union all select 2 as id - from {{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table_case_sensitive') }} + from {{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }} ) -- my_third_cte as ( -- select diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index 3717c3b..1817a8a 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -12,15 +12,15 @@ materialized='table', ) }}{% endraw %} -with codegen_integration_tests__data_source_table as ( +with codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as ( select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ), -codegen_integration_tests__data_source_table_case_sensitive as ( +codegen_integration_tests__data_source_table as ( - select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table_case_sensitive') }}{% endraw %} + select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }}{% endraw %} -- CAUTION: It's best practice to create staging layer for raw sources ), @@ -65,11 +65,11 @@ my_first_cte as ( my_second_cte as ( select 1 as id - from codegen_integration_tests__data_source_table + from codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table union all select 2 as id - from codegen_integration_tests__data_source_table_case_sensitive + from codegen_integration_tests__data_source_table ) -- my_third_cte as ( -- select diff --git a/integration_tests/tests/test_generate_model_import_ctes_leading.sql b/integration_tests/tests/test_generate_model_import_ctes_leading.sql index b889c52..1601cd4 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_leading.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_leading.sql @@ -13,15 +13,15 @@ materialized='table', ) }}{% endraw %} -with codegen_integration_tests__data_source_table as ( +with codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as ( select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ) -,codegen_integration_tests__data_source_table_case_sensitive as ( +,codegen_integration_tests__data_source_table as ( - select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table_case_sensitive') }}{% endraw %} + select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }}{% endraw %} -- CAUTION: It's best practice to create staging layer for raw sources ) @@ -66,11 +66,11 @@ with codegen_integration_tests__data_source_table as ( my_second_cte as ( select 1 as id - from codegen_integration_tests__data_source_table + from codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table union all select 2 as id - from codegen_integration_tests__data_source_table_case_sensitive + from codegen_integration_tests__data_source_table ) -- my_third_cte as ( -- select diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index 1c2853a..0eb9cc6 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -32,6 +32,9 @@ - matches (from or join) followed by some spaces and then . # from_table_2 + - matches (from or join) followed by some spaces and then .. + + # from_table_3 - matches (from or join) followed by some spaces and then (` or [ or ")(` or ] or ") # config block @@ -41,14 +44,15 @@ {%- set re = modules.re -%} - {%- set with_regex = '(?i)(?s)(^.*\s+|\s+|,)with\s' -%} + {%- set with_regex = '(?i)(?s)(^.*\s*|\s+|,)with\s' -%} {%- set does_raw_sql_contain_cte = re.search(with_regex, model_raw_sql) -%} {%- set from_regexes = { 'from_ref':'(?i)(from|join)\s+({{\s*ref\s*\()([^)]+)(\)\s*}})', 'from_source':'(?i)(from|join)\s+({{\s*source\s*\([^)]+,)([^)]+)(\)\s*}})', - 'from_table_1':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?\.)([\[`\"]?\w+[\]`\"]?)', - 'from_table_2':'(?i)(from|join)\s+([\[`\"])([\w ]+)([\]`\"])', + 'from_table_1':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?)\.([\[`\"]?\w+[\]`\"]?)', + 'from_table_2':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?)\.([\[`\"]?\w+[\]`\"]?)\.([\[`\"]?\w+[\]`\"]?)', + 'from_table_3':'(?i)(from|join)\s+([\[`\"])([\w ]+)([\]`\"])', 'config_block':'(?i)(?s)^.*{{\s*config\s*\([^)]+\)\s*}}' } -%} @@ -64,6 +68,11 @@ {%- if regex_name == 'config_block' -%} {%- set match_tuple = (match|trim, regex_name) -%} {%- do config_list.append(match_tuple) -%} + {%- elif regex_name == 'from_table_1' or regex_name == 'from_table_2' -%} + {%- set full_from_clause = match[1:]|join('.')|trim -%} + {%- set cte_name = match[1:]|join('_')|trim|lower -%} + {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%} + {%- do from_list.append(match_tuple) -%} {%- else -%} {%- set full_from_clause = match[1:]|join|trim -%} {%- set cte_name = match[2]|replace("'","")|trim|lower -%} @@ -98,7 +107,7 @@ select * from {{ from_obj[1] }} {%- if from_obj[2] == 'from_source' and from_list|length > 1 %} -- CAUTION: It's best practice to create staging layer for raw sources - {%- elif from_obj[2] == 'from_table_1' or from_obj[2] == 'from_table_2' %} + {%- elif from_obj[2] == 'from_table_1' or from_obj[2] == 'from_table_2' or from_obj[2] == 'from_table_3' %} -- CAUTION: It's best practice to use the ref or source function instead of a direct reference {%- endif %} From 654e521e0b97bb1c49e0a4c0c8de92847216812c Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Fri, 12 Aug 2022 17:11:27 -0400 Subject: [PATCH 10/20] fix for replacing without reliance on abc --- .../models/model_without_import_ctes.sql | 7 ++- .../tests/test_generate_model_import_ctes.sql | 27 ++++++--- ...est_generate_model_import_ctes_leading.sql | 27 ++++++--- macros/generate_model_import_ctes.sql | 55 ++++++++++++------- 4 files changed, 80 insertions(+), 36 deletions(-) diff --git a/integration_tests/models/model_without_import_ctes.sql b/integration_tests/models/model_without_import_ctes.sql index ab5fee4..3cdb888 100644 --- a/integration_tests/models/model_without_import_ctes.sql +++ b/integration_tests/models/model_without_import_ctes.sql @@ -6,6 +6,7 @@ materialized='table', ) }} +-- I love this cte with my_first_cte as ( select a.col_a, @@ -23,7 +24,11 @@ my_second_cte as ( union all select 2 as id - from {{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }} + from {{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }} + union all + select + 3 as id + from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table ) -- my_third_cte as ( -- select diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index 1817a8a..343723d 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -17,12 +17,6 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table -- CAUTION: It's best practice to use the ref or source function instead of a direct reference -), -codegen_integration_tests__data_source_table as ( - - select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }}{% endraw %} - -- CAUTION: It's best practice to create staging layer for raw sources - ), data__a_relation as ( @@ -33,6 +27,12 @@ data__b_relation as ( select * from {% raw %}{{ ref('data__b_relation') }}{% endraw %} +), +development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as ( + + select * from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + ), raw_relation_1 as ( @@ -52,12 +52,19 @@ raw_relation_3 as ( -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ), +source_codegen_integration_tests__data_source_table as ( + + select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }}{% endraw %} + -- CAUTION: It's best practice to create staging layer for raw sources + +), +-- I love this cte my_first_cte as ( select a.col_a, b.col_b from data__a_relation as a - left join data__b_relation as b + left join data__b_relation as b on a.col_a = b.col_a left join data__a_relation as aa on a.col_a = aa.col_a @@ -69,7 +76,11 @@ my_second_cte as ( union all select 2 as id - from codegen_integration_tests__data_source_table + from source_codegen_integration_tests__data_source_table + union all + select + 3 as id + from development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table ) -- my_third_cte as ( -- select diff --git a/integration_tests/tests/test_generate_model_import_ctes_leading.sql b/integration_tests/tests/test_generate_model_import_ctes_leading.sql index 1601cd4..06174a0 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_leading.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_leading.sql @@ -18,12 +18,6 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table -- CAUTION: It's best practice to use the ref or source function instead of a direct reference -) -,codegen_integration_tests__data_source_table as ( - - select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }}{% endraw %} - -- CAUTION: It's best practice to create staging layer for raw sources - ) ,data__a_relation as ( @@ -34,6 +28,12 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da select * from {% raw %}{{ ref('data__b_relation') }}{% endraw %} +) +,development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as ( + + select * from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + ) ,raw_relation_1 as ( @@ -53,12 +53,19 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ) +,source_codegen_integration_tests__data_source_table as ( + + select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }}{% endraw %} + -- CAUTION: It's best practice to create staging layer for raw sources + +) +-- I love this cte ,my_first_cte as ( select a.col_a, b.col_b from data__a_relation as a - left join data__b_relation as b + left join data__b_relation as b on a.col_a = b.col_a left join data__a_relation as aa on a.col_a = aa.col_a @@ -70,7 +77,11 @@ my_second_cte as ( union all select 2 as id - from codegen_integration_tests__data_source_table + from source_codegen_integration_tests__data_source_table + union all + select + 3 as id + from development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table ) -- my_third_cte as ( -- select diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index 0eb9cc6..2c0e4eb 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -48,16 +48,17 @@ {%- set does_raw_sql_contain_cte = re.search(with_regex, model_raw_sql) -%} {%- set from_regexes = { - 'from_ref':'(?i)(from|join)\s+({{\s*ref\s*\()([^)]+)(\)\s*}})', - 'from_source':'(?i)(from|join)\s+({{\s*source\s*\([^)]+,)([^)]+)(\)\s*}})', - 'from_table_1':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?)\.([\[`\"]?\w+[\]`\"]?)', - 'from_table_2':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?)\.([\[`\"]?\w+[\]`\"]?)\.([\[`\"]?\w+[\]`\"]?)', + 'from_ref':'(?i)(from|join)\s+({{\s*ref\s*\(\s*\'|\")([^)\'\"]+)(\'|\"\s*)(\)\s*}})', + 'from_source':'(?i)(from|join)\s+({{\s*source\s*\(\s*\'|\")([^)\'\"]+)(\'|\"\s*)(,)(\s*\'|\")([^)\'\"]+)(\'|\"\s*)(\)\s*}})', + 'from_table_1':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?)(\.)([\[`\"]?\w+[\]`\"]?)(?=\s|$)', + 'from_table_2':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?)(\.)([\[`\"]?\w+[\]`\"]?)(\.)([\[`\"]?\w+[\]`\"]?)(?=\s|$)', 'from_table_3':'(?i)(from|join)\s+([\[`\"])([\w ]+)([\]`\"])', 'config_block':'(?i)(?s)^.*{{\s*config\s*\([^)]+\)\s*}}' } -%} {%- set from_list = [] -%} {%- set config_list = [] -%} + {%- set ns = namespace(model_sql = model_raw_sql) -%} {%- for regex_name, regex_pattern in from_regexes.items() -%} @@ -68,26 +69,45 @@ {%- if regex_name == 'config_block' -%} {%- set match_tuple = (match|trim, regex_name) -%} {%- do config_list.append(match_tuple) -%} - {%- elif regex_name == 'from_table_1' or regex_name == 'from_table_2' -%} - {%- set full_from_clause = match[1:]|join('.')|trim -%} - {%- set cte_name = match[1:]|join('_')|trim|lower -%} + {%- elif regex_name == 'from_source' -%} + {%- set full_from_clause = match[1:]|join|trim -%} + {%- set cte_name = 'source_' + match[6]|lower -%} + {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%} + {%- do from_list.append(match_tuple) -%} + {%- elif regex_name == 'from_table_1' -%} + {%- set full_from_clause = match[1:]|join()|trim -%} + {%- set cte_name = match[1]|lower + '_' + match[3]|lower -%} + {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%} + {%- do from_list.append(match_tuple) -%} + {%- elif regex_name == 'from_table_2' -%} + {%- set full_from_clause = match[1:]|join()|trim -%} + {%- set cte_name = match[1]|lower + '_' + match[3]|lower + '_' + match[5]|lower -%} {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%} - {%- do from_list.append(match_tuple) -%} + {%- do from_list.append(match_tuple) -%} {%- else -%} {%- set full_from_clause = match[1:]|join|trim -%} - {%- set cte_name = match[2]|replace("'","")|trim|lower -%} + {%- set cte_name = match[2]|trim|lower -%} {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%} {%- do from_list.append(match_tuple) -%} {%- endif -%} {%- endfor -%} + {%- if regex_name == 'config_block' -%} + {%- elif regex_name == 'from_source' -%} + {%- set ns.model_sql = re.sub(regex_pattern, '\g<1> source_\g<7>', ns.model_sql) -%} + {%- elif regex_name == 'from_table_1' -%} + {%- set ns.model_sql = re.sub(regex_pattern, '\g<1> \g<2>_\g<4>', ns.model_sql) -%} + {%- elif regex_name == 'from_table_2' -%} + {%- set ns.model_sql = re.sub(regex_pattern, '\g<1> \g<2>_\g<4>_\g<6>', ns.model_sql) -%} + {%- else -%} + {%- set ns.model_sql = re.sub(regex_pattern, '\g<1> \g<3>', ns.model_sql) -%} + {% endif %} + {%- endfor -%} {%- if from_list|length > 0 -%} -{%- set ns = namespace(model_sql = model_raw_sql) -%} - {%- set model_import_ctes -%} {%- for config_obj in config_list -%} @@ -99,8 +119,6 @@ {% endfor -%} {%- for from_obj in from_list|unique|sort -%} - - {%- set ns.model_sql = ns.model_sql|replace(from_obj[1], from_obj[0]) -%} {%- if loop.first -%}with {% else -%}{%- if leading_commas -%},{%- endif -%}{%- endif -%}{{ from_obj[0] }} as ( @@ -114,13 +132,12 @@ ){%- if does_raw_sql_contain_cte and not leading_commas -%},{%- endif %} {% endfor -%} -{%- if leading_commas -%} -{%- set replace_with = ',' -%} -{%- else -%} -{%- set replace_with = '\g<1>' -%} -{%- endif -%} - {%- if does_raw_sql_contain_cte -%} + {%- if leading_commas -%} + {%- set replace_with = '\g<1>,' -%} + {%- else -%} + {%- set replace_with = '\g<1>' -%} + {%- endif -%} {{ re.sub(with_regex, replace_with, ns.model_sql, 1)|trim }} {%- else -%} {{ ns.model_sql|trim }} From ea0f86fc082166cf7909f5079fa7a024a4fa51eb Mon Sep 17 00:00:00 2001 From: Grace Goheen <53586774+graciegoheen@users.noreply.github.com> Date: Fri, 12 Aug 2022 17:12:40 -0400 Subject: [PATCH 11/20] option to use raw_sql or raw_code depending on dbt version Co-authored-by: Benoit Perigaud <8754100+b-per@users.noreply.github.com> --- macros/generate_model_import_ctes.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index 2c0e4eb..b6903d8 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -8,7 +8,7 @@ | selectattr('resource_type', 'equalto', 'model') | list).pop() -%} - {%- set model_raw_sql = model.raw_sql -%} + {%- set model_raw_sql = model.raw_sql or model.raw_code -%} {%- else -%} {%- set model_raw_sql = '' -%} {%- endif -%} From 9e833c78914ce45b21111201ca3d59119d1495f6 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Fri, 12 Aug 2022 17:24:45 -0400 Subject: [PATCH 12/20] commented out database example for integration tests --- integration_tests/models/model_without_import_ctes.sql | 8 ++++---- .../tests/test_generate_model_import_ctes.sql | 8 ++++---- .../tests/test_generate_model_import_ctes_leading.sql | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/integration_tests/models/model_without_import_ctes.sql b/integration_tests/models/model_without_import_ctes.sql index 3cdb888..7bb569b 100644 --- a/integration_tests/models/model_without_import_ctes.sql +++ b/integration_tests/models/model_without_import_ctes.sql @@ -25,10 +25,10 @@ my_second_cte as ( select 2 as id from {{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }} - union all - select - 3 as id - from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table + -- union all + -- select + -- 3 as id + -- from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table ) -- my_third_cte as ( -- select diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index 343723d..261bb32 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -77,10 +77,10 @@ my_second_cte as ( select 2 as id from source_codegen_integration_tests__data_source_table - union all - select - 3 as id - from development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table + -- union all + -- select + -- 3 as id + -- from development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table ) -- my_third_cte as ( -- select diff --git a/integration_tests/tests/test_generate_model_import_ctes_leading.sql b/integration_tests/tests/test_generate_model_import_ctes_leading.sql index 06174a0..fee5d6d 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_leading.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_leading.sql @@ -78,10 +78,10 @@ my_second_cte as ( select 2 as id from source_codegen_integration_tests__data_source_table - union all - select - 3 as id - from development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table + -- union all + -- select + -- 3 as id + -- from development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table ) -- my_third_cte as ( -- select From f7bc2899a77cf7ab6ebe1501cdb4b792ba273808 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Mon, 15 Aug 2022 11:12:53 -0400 Subject: [PATCH 13/20] Added regex matching for from var() --- integration_tests/dbt_project.yml | 3 +++ .../models/model_without_import_ctes.sql | 10 ++++++++- .../tests/test_generate_model_import_ctes.sql | 22 ++++++++++++++++++- ...est_generate_model_import_ctes_leading.sql | 22 ++++++++++++++++++- macros/generate_model_import_ctes.sql | 14 ++++++++++-- 5 files changed, 66 insertions(+), 5 deletions(-) diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 2b9d8ea..22a90e8 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -18,3 +18,6 @@ clean-targets: seeds: +schema: raw_data +quote_columns: false + +vars: + my_table_reference: table_c \ No newline at end of file diff --git a/integration_tests/models/model_without_import_ctes.sql b/integration_tests/models/model_without_import_ctes.sql index 7bb569b..cba6662 100644 --- a/integration_tests/models/model_without_import_ctes.sql +++ b/integration_tests/models/model_without_import_ctes.sql @@ -12,7 +12,7 @@ with my_first_cte as ( a.col_a, b.col_b from {{ ref('data__a_relation') }} as a - left join {{ ref('data__b_relation') }} as b + left join {{ ref("data__b_relation") }} as b on a.col_a = b.col_a left join {{ ref('data__a_relation') }} as aa on a.col_a = aa.col_a @@ -29,6 +29,14 @@ my_second_cte as ( -- select -- 3 as id -- from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table + -- union all + -- select + -- 4 as id + -- from {{ var("my_table_reference") }} + -- union all + -- select + -- 5 as id + -- from {{ var("my_other_table_reference", "table_d") }} ) -- my_third_cte as ( -- select diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index 261bb32..94f8006 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -25,7 +25,7 @@ data__a_relation as ( ), data__b_relation as ( - select * from {% raw %}{{ ref('data__b_relation') }}{% endraw %} + select * from {% raw %}{{ ref("data__b_relation") }}{% endraw %} ), development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as ( @@ -33,6 +33,18 @@ development_codegen_integration_tests__data_source_schema_codegen_integration_te select * from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table -- CAUTION: It's best practice to use the ref or source function instead of a direct reference +), +my_other_table_reference as ( + + select * from {% raw %}{{ var("my_other_table_reference", "table_d") }}{% endraw %} + -- CAUTION: It's best practice to use the ref or source function instead of a var + +), +my_table_reference as ( + + select * from {% raw %}{{ var("my_table_reference") }}{% endraw %} + -- CAUTION: It's best practice to use the ref or source function instead of a var + ), raw_relation_1 as ( @@ -81,6 +93,14 @@ my_second_cte as ( -- select -- 3 as id -- from development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table + -- union all + -- select + -- 4 as id + -- from my_table_reference + -- union all + -- select + -- 5 as id + -- from my_other_table_reference ) -- my_third_cte as ( -- select diff --git a/integration_tests/tests/test_generate_model_import_ctes_leading.sql b/integration_tests/tests/test_generate_model_import_ctes_leading.sql index fee5d6d..3b17e7a 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_leading.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_leading.sql @@ -26,7 +26,7 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da ) ,data__b_relation as ( - select * from {% raw %}{{ ref('data__b_relation') }}{% endraw %} + select * from {% raw %}{{ ref("data__b_relation") }}{% endraw %} ) ,development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as ( @@ -34,6 +34,18 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da select * from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table -- CAUTION: It's best practice to use the ref or source function instead of a direct reference +) +,my_other_table_reference as ( + + select * from {% raw %}{{ var("my_other_table_reference", "table_d") }}{% endraw %} + -- CAUTION: It's best practice to use the ref or source function instead of a var + +) +,my_table_reference as ( + + select * from {% raw %}{{ var("my_table_reference") }}{% endraw %} + -- CAUTION: It's best practice to use the ref or source function instead of a var + ) ,raw_relation_1 as ( @@ -82,6 +94,14 @@ my_second_cte as ( -- select -- 3 as id -- from development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table + -- union all + -- select + -- 4 as id + -- from my_table_reference + -- union all + -- select + -- 5 as id + -- from my_other_table_reference ) -- my_third_cte as ( -- select diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index b6903d8..7000845 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -28,6 +28,12 @@ # from_source - matches (from or join) followed by some spaces and then {{source(,)}} + # from_var_1 + - matches (from or join) followed by some spaces and then {{var()}} + + # from_var_2 + - matches (from or join) followed by some spaces and then {{var(,)}} + # from_table_1 - matches (from or join) followed by some spaces and then . @@ -48,8 +54,10 @@ {%- set does_raw_sql_contain_cte = re.search(with_regex, model_raw_sql) -%} {%- set from_regexes = { - 'from_ref':'(?i)(from|join)\s+({{\s*ref\s*\(\s*\'|\")([^)\'\"]+)(\'|\"\s*)(\)\s*}})', - 'from_source':'(?i)(from|join)\s+({{\s*source\s*\(\s*\'|\")([^)\'\"]+)(\'|\"\s*)(,)(\s*\'|\")([^)\'\"]+)(\'|\"\s*)(\)\s*}})', + 'from_ref':'(?i)(from|join)\s+({{\s*ref\s*\(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(\)\s*}})', + 'from_source':'(?i)(from|join)\s+({{\s*source\s*\(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(,)(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(\)\s*}})', + 'from_var_1':'(?i)(from|join)\s+({{\s*var\s*\(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(\)\s*}})', + 'from_var_2':'(?i)(from|join)\s+({{\s*var\s*\(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(,)(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(\)\s*}})', 'from_table_1':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?)(\.)([\[`\"]?\w+[\]`\"]?)(?=\s|$)', 'from_table_2':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?)(\.)([\[`\"]?\w+[\]`\"]?)(\.)([\[`\"]?\w+[\]`\"]?)(?=\s|$)', 'from_table_3':'(?i)(from|join)\s+([\[`\"])([\w ]+)([\]`\"])', @@ -127,6 +135,8 @@ -- CAUTION: It's best practice to create staging layer for raw sources {%- elif from_obj[2] == 'from_table_1' or from_obj[2] == 'from_table_2' or from_obj[2] == 'from_table_3' %} -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + {%- elif from_obj[2] == 'from_var_1' or from_obj[2] == 'from_var_2' %} + -- CAUTION: It's best practice to use the ref or source function instead of a var {%- endif %} ){%- if does_raw_sql_contain_cte and not leading_commas -%},{%- endif %} From e46d9e664c706a28af211a112b6ea9efb85ffa5d Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Mon, 15 Aug 2022 13:30:06 -0400 Subject: [PATCH 14/20] Pickup raw references enclosed by single quote --- .../models/model_without_import_ctes.sql | 4 +++ .../tests/test_generate_model_import_ctes.sql | 16 +++++++++++ ...est_generate_model_import_ctes_leading.sql | 16 +++++++++++ macros/generate_model_import_ctes.sql | 27 ++++++++++--------- 4 files changed, 51 insertions(+), 12 deletions(-) diff --git a/integration_tests/models/model_without_import_ctes.sql b/integration_tests/models/model_without_import_ctes.sql index cba6662..94ace74 100644 --- a/integration_tests/models/model_without_import_ctes.sql +++ b/integration_tests/models/model_without_import_ctes.sql @@ -47,5 +47,9 @@ my_second_cte as ( -- on a.col_a = b.col_b -- left join [raw_relation_3] as aa -- on a.col_a = aa.col_b +-- left join 'raw_relation_4' as ab +-- on a.col_a = ab.col_b +-- left join 'my_schema'.'raw_relation_5' as ac +-- on a.col_a = ac.col_b -- ) select * from my_second_cte \ No newline at end of file diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index 94f8006..18e42aa 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -39,6 +39,12 @@ my_other_table_reference as ( select * from {% raw %}{{ var("my_other_table_reference", "table_d") }}{% endraw %} -- CAUTION: It's best practice to use the ref or source function instead of a var +), +my_schema_raw_relation_5 as ( + + select * from 'my_schema'.'raw_relation_5' + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + ), my_table_reference as ( @@ -63,6 +69,12 @@ raw_relation_3 as ( select * from [raw_relation_3] -- CAUTION: It's best practice to use the ref or source function instead of a direct reference +), +raw_relation_4 as ( + + select * from 'raw_relation_4' + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + ), source_codegen_integration_tests__data_source_table as ( @@ -111,6 +123,10 @@ my_second_cte as ( -- on a.col_a = b.col_b -- left join raw_relation_3 as aa -- on a.col_a = aa.col_b +-- left join raw_relation_4 as ab +-- on a.col_a = ab.col_b +-- left join my_schema_raw_relation_5 as ac +-- on a.col_a = ac.col_b -- ) select * from my_second_cte {% endset %} diff --git a/integration_tests/tests/test_generate_model_import_ctes_leading.sql b/integration_tests/tests/test_generate_model_import_ctes_leading.sql index 3b17e7a..e2e8b7d 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_leading.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_leading.sql @@ -40,6 +40,12 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da select * from {% raw %}{{ var("my_other_table_reference", "table_d") }}{% endraw %} -- CAUTION: It's best practice to use the ref or source function instead of a var +) +,my_schema_raw_relation_5 as ( + + select * from 'my_schema'.'raw_relation_5' + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + ) ,my_table_reference as ( @@ -64,6 +70,12 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da select * from [raw_relation_3] -- CAUTION: It's best practice to use the ref or source function instead of a direct reference +) +,raw_relation_4 as ( + + select * from 'raw_relation_4' + -- CAUTION: It's best practice to use the ref or source function instead of a direct reference + ) ,source_codegen_integration_tests__data_source_table as ( @@ -112,6 +124,10 @@ my_second_cte as ( -- on a.col_a = b.col_b -- left join raw_relation_3 as aa -- on a.col_a = aa.col_b +-- left join raw_relation_4 as ab +-- on a.col_a = ab.col_b +-- left join my_schema_raw_relation_5 as ac +-- on a.col_a = ac.col_b -- ) select * from my_second_cte {% endset %} diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index 7000845..07d1f5d 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -36,12 +36,15 @@ # from_table_1 - matches (from or join) followed by some spaces and then . + where each is enclosed by (` or [ or " or ' or nothing) # from_table_2 - matches (from or join) followed by some spaces and then .. + where each is enclosed by (` or [ or " or ' or nothing) # from_table_3 - - matches (from or join) followed by some spaces and then (` or [ or ")(` or ] or ") + - matches (from or join) followed by some spaces and then + where is enclosed by (` or [ or " or ') # config block - matches the start of the file followed by anything and then {{config()}} @@ -54,13 +57,13 @@ {%- set does_raw_sql_contain_cte = re.search(with_regex, model_raw_sql) -%} {%- set from_regexes = { - 'from_ref':'(?i)(from|join)\s+({{\s*ref\s*\(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(\)\s*}})', - 'from_source':'(?i)(from|join)\s+({{\s*source\s*\(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(,)(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(\)\s*}})', - 'from_var_1':'(?i)(from|join)\s+({{\s*var\s*\(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(\)\s*}})', - 'from_var_2':'(?i)(from|join)\s+({{\s*var\s*\(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(,)(\s*(?:\'|\"))([^)\'\"]+)((?:\'|\")\s*)(\)\s*}})', - 'from_table_1':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?)(\.)([\[`\"]?\w+[\]`\"]?)(?=\s|$)', - 'from_table_2':'(?i)(from|join)\s+([\[`\"]?\w+[\]`\"]?)(\.)([\[`\"]?\w+[\]`\"]?)(\.)([\[`\"]?\w+[\]`\"]?)(?=\s|$)', - 'from_table_3':'(?i)(from|join)\s+([\[`\"])([\w ]+)([\]`\"])', + 'from_ref':'(?i)(from|join)\s+({{\s*ref\s*\(\s*[\'\"]?)([^)\'\"]+)([\'\"]?\s*)(\)\s*}})', + 'from_source':'(?i)(from|join)\s+({{\s*source\s*\(\s*[\'\"]?)([^)\'\"]+)([\'\"]?\s*)(,)(\s*[\'\"]?)([^)\'\"]+)([\'\"]?\s*)(\)\s*}})', + 'from_var_1':'(?i)(from|join)\s+({{\s*var\s*\(\s*[\'\"]?)([^)\'\"]+)([\'\"]?\s*)(\)\s*}})', + 'from_var_2':'(?i)(from|join)\s+({{\s*var\s*\(\s*[\'\"]?)([^)\'\"]+)([\'\"]?\s*)(,)(\s*[\'\"]?)([^)\'\"]+)([\'\"]?\s*)(\)\s*}})', + 'from_table_1':'(?i)(from|join)\s+([\[`\"\']?)(\w+)([\]`\"\']?)(\.)([\[`\"\']?)(\w+)([\]`\"\']?)(?=\s|$)', + 'from_table_2':'(?i)(from|join)\s+([\[`\"\']?)(\w+)([\]`\"\']?)(\.)([\[`\"\']?)(\w+)([\]`\"\']?)(\.)([\[`\"\']?)(\w+)([\]`\"\']?)(?=\s|$)', + 'from_table_3':'(?i)(from|join)\s+([\[`\"\'])([\w ]+)([\]`\"\'])(?=\s|$)', 'config_block':'(?i)(?s)^.*{{\s*config\s*\([^)]+\)\s*}}' } -%} @@ -84,12 +87,12 @@ {%- do from_list.append(match_tuple) -%} {%- elif regex_name == 'from_table_1' -%} {%- set full_from_clause = match[1:]|join()|trim -%} - {%- set cte_name = match[1]|lower + '_' + match[3]|lower -%} + {%- set cte_name = match[2]|lower + '_' + match[6]|lower -%} {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%} {%- do from_list.append(match_tuple) -%} {%- elif regex_name == 'from_table_2' -%} {%- set full_from_clause = match[1:]|join()|trim -%} - {%- set cte_name = match[1]|lower + '_' + match[3]|lower + '_' + match[5]|lower -%} + {%- set cte_name = match[2]|lower + '_' + match[6]|lower + '_' + match[10]|lower -%} {%- set match_tuple = (cte_name, full_from_clause, regex_name) -%} {%- do from_list.append(match_tuple) -%} {%- else -%} @@ -105,9 +108,9 @@ {%- elif regex_name == 'from_source' -%} {%- set ns.model_sql = re.sub(regex_pattern, '\g<1> source_\g<7>', ns.model_sql) -%} {%- elif regex_name == 'from_table_1' -%} - {%- set ns.model_sql = re.sub(regex_pattern, '\g<1> \g<2>_\g<4>', ns.model_sql) -%} + {%- set ns.model_sql = re.sub(regex_pattern, '\g<1> \g<3>_\g<7>', ns.model_sql) -%} {%- elif regex_name == 'from_table_2' -%} - {%- set ns.model_sql = re.sub(regex_pattern, '\g<1> \g<2>_\g<4>_\g<6>', ns.model_sql) -%} + {%- set ns.model_sql = re.sub(regex_pattern, '\g<1> \g<3>_\g<7>_\g<11>', ns.model_sql) -%} {%- else -%} {%- set ns.model_sql = re.sub(regex_pattern, '\g<1> \g<3>', ns.model_sql) -%} {% endif %} From 4e2fe31bfbb440a20768b06c6022a062ae7f2972 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Fri, 9 Sep 2022 13:54:29 -0400 Subject: [PATCH 15/20] Fixed missing commas issue --- macros/generate_model_import_ctes.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index 07d1f5d..605c478 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -142,7 +142,7 @@ -- CAUTION: It's best practice to use the ref or source function instead of a var {%- endif %} -){%- if does_raw_sql_contain_cte and not leading_commas -%},{%- endif %} +){%- if (loop.last and does_raw_sql_contain_cte) or (not loop.last) and (not leading_commas) -%},{%- endif %} {% endfor -%} {%- if does_raw_sql_contain_cte -%} From c592471509ca22e80d7333bbeefaa902c85d28fb Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Fri, 9 Sep 2022 14:06:39 -0400 Subject: [PATCH 16/20] comma fix --- macros/generate_model_import_ctes.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index 605c478..cff103f 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -142,7 +142,7 @@ -- CAUTION: It's best practice to use the ref or source function instead of a var {%- endif %} -){%- if (loop.last and does_raw_sql_contain_cte) or (not loop.last) and (not leading_commas) -%},{%- endif %} +){%- if ((loop.last and does_raw_sql_contain_cte) or (not loop.last)) and not leading_commas -%},{%- endif %} {% endfor -%} {%- if does_raw_sql_contain_cte -%} From d3e4b4f3bbeb9a43331d4edd6e58852989da7ede Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Wed, 28 Sep 2022 15:42:50 -0400 Subject: [PATCH 17/20] Added integration test to check for commas on sql without import ctes --- integration_tests/models/model_without_any_ctes.sql | 3 ++- .../tests/test_generate_model_import_ctes_no_ctes.sql | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/integration_tests/models/model_without_any_ctes.sql b/integration_tests/models/model_without_any_ctes.sql index 8a19c8a..aa152ff 100644 --- a/integration_tests/models/model_without_any_ctes.sql +++ b/integration_tests/models/model_without_any_ctes.sql @@ -1,3 +1,4 @@ select *, 2 as col2 -from {{ ref('model_without_import_ctes') }} +from {{ ref('model_without_import_ctes') }} as m +left join (select 2 as col_a from {{ ref('data__a_relation') }}) as a on a.col_a = m.id where id = 1 \ No newline at end of file diff --git a/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql index e2e17da..6e54a2e 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql @@ -4,13 +4,19 @@ %} {% set expected_model_with_import_ctes %} -with model_without_import_ctes as ( +with data__a_relation as ( + + select * from {% raw %}{{ ref('data__a_relation') }}{% endraw %} + +), +model_without_import_ctes as ( select * from {% raw %}{{ ref('model_without_import_ctes') }}{% endraw %} ) select *, 2 as col2 -from model_without_import_ctes +from model_without_import_ctes as m +left join (select 2 as col_a from data__a_relation) as a on a.col_a = m.id where id = 1 {% endset %} From a2c6029e1d3457f15784ee37ab80d4740baaf594 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Thu, 29 Sep 2022 09:23:02 -0400 Subject: [PATCH 18/20] update readme --- README.md | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7801a35..94c6e39 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,22 @@ Macros that generate dbt code, and log it to the command line. # Contents -* [generate_source](#generate_source-source) -* [generate_base_model](#generate_base_model-source) -* [generate_model_yaml](#generate_model_yaml-source) -* [generate_model_import_ctes](#generate_model_import_ctes-source) +- [dbt-codegen](#dbt-codegen) +- [Contents](#contents) +- [Installation instructions](#installation-instructions) +- [Macros](#macros) + - [generate_source (source)](#generate_source-source) + - [Arguments](#arguments) + - [Usage:](#usage) + - [generate_base_model (source)](#generate_base_model-source) + - [Arguments:](#arguments-1) + - [Usage:](#usage-1) + - [generate_model_yaml (source)](#generate_model_yaml-source) + - [Arguments:](#arguments-2) + - [Usage:](#usage-2) + - [generate_model_import_ctes (source)](#generate_model_import_ctes-source) + - [Arguments:](#arguments-3) + - [Usage:](#usage-3) # Installation instructions New to dbt packages? Read more about them [here](https://docs.getdbt.com/docs/building-a-dbt-project/package-management/). @@ -177,7 +189,7 @@ This macro generates the SQL for a given model with all references pulled up int 1. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code. ``` -{{ generate_model_import_ctes( +{{ codegen.generate_model_import_ctes( model_name = 'my_dbt_model' ) }} ``` From 6d0d6ff78f80eecc16791824129b9d18ef77de77 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Fri, 30 Sep 2022 13:35:26 -0400 Subject: [PATCH 19/20] Added space between import CTEs --- .../tests/test_generate_model_import_ctes.sql | 12 ++++++++++++ .../test_generate_model_import_ctes_leading.sql | 12 ++++++++++++ .../test_generate_model_import_ctes_no_ctes.sql | 2 ++ macros/generate_model_import_ctes.sql | 1 + 4 files changed, 27 insertions(+) diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index 18e42aa..8975550 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -18,70 +18,82 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ), + data__a_relation as ( select * from {% raw %}{{ ref('data__a_relation') }}{% endraw %} ), + data__b_relation as ( select * from {% raw %}{{ ref("data__b_relation") }}{% endraw %} ), + development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as ( select * from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ), + my_other_table_reference as ( select * from {% raw %}{{ var("my_other_table_reference", "table_d") }}{% endraw %} -- CAUTION: It's best practice to use the ref or source function instead of a var ), + my_schema_raw_relation_5 as ( select * from 'my_schema'.'raw_relation_5' -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ), + my_table_reference as ( select * from {% raw %}{{ var("my_table_reference") }}{% endraw %} -- CAUTION: It's best practice to use the ref or source function instead of a var ), + raw_relation_1 as ( select * from `raw_relation_1` -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ), + raw_relation_2 as ( select * from "raw_relation_2" -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ), + raw_relation_3 as ( select * from [raw_relation_3] -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ), + raw_relation_4 as ( select * from 'raw_relation_4' -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ), + source_codegen_integration_tests__data_source_table as ( select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }}{% endraw %} -- CAUTION: It's best practice to create staging layer for raw sources ), + -- I love this cte my_first_cte as ( select diff --git a/integration_tests/tests/test_generate_model_import_ctes_leading.sql b/integration_tests/tests/test_generate_model_import_ctes_leading.sql index e2e8b7d..fd4d3e2 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_leading.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_leading.sql @@ -19,70 +19,82 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ) + ,data__a_relation as ( select * from {% raw %}{{ ref('data__a_relation') }}{% endraw %} ) + ,data__b_relation as ( select * from {% raw %}{{ ref("data__b_relation") }}{% endraw %} ) + ,development_codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as ( select * from development.codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ) + ,my_other_table_reference as ( select * from {% raw %}{{ var("my_other_table_reference", "table_d") }}{% endraw %} -- CAUTION: It's best practice to use the ref or source function instead of a var ) + ,my_schema_raw_relation_5 as ( select * from 'my_schema'.'raw_relation_5' -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ) + ,my_table_reference as ( select * from {% raw %}{{ var("my_table_reference") }}{% endraw %} -- CAUTION: It's best practice to use the ref or source function instead of a var ) + ,raw_relation_1 as ( select * from `raw_relation_1` -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ) + ,raw_relation_2 as ( select * from "raw_relation_2" -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ) + ,raw_relation_3 as ( select * from [raw_relation_3] -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ) + ,raw_relation_4 as ( select * from 'raw_relation_4' -- CAUTION: It's best practice to use the ref or source function instead of a direct reference ) + ,source_codegen_integration_tests__data_source_table as ( select * from {% raw %}{{ source('codegen_integration_tests__data_source_schema', 'codegen_integration_tests__data_source_table') }}{% endraw %} -- CAUTION: It's best practice to create staging layer for raw sources ) + -- I love this cte ,my_first_cte as ( select diff --git a/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql index 6e54a2e..c72e212 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_no_ctes.sql @@ -9,11 +9,13 @@ with data__a_relation as ( select * from {% raw %}{{ ref('data__a_relation') }}{% endraw %} ), + model_without_import_ctes as ( select * from {% raw %}{{ ref('model_without_import_ctes') }}{% endraw %} ) + select *, 2 as col2 from model_without_import_ctes as m left join (select 2 as col_a from data__a_relation) as a on a.col_a = m.id diff --git a/macros/generate_model_import_ctes.sql b/macros/generate_model_import_ctes.sql index cff103f..414f1ba 100644 --- a/macros/generate_model_import_ctes.sql +++ b/macros/generate_model_import_ctes.sql @@ -143,6 +143,7 @@ {%- endif %} ){%- if ((loop.last and does_raw_sql_contain_cte) or (not loop.last)) and not leading_commas -%},{%- endif %} + {% endfor -%} {%- if does_raw_sql_contain_cte -%} From 8dd50911465f9a3540959e6639577d4a36e1bae3 Mon Sep 17 00:00:00 2001 From: Grace Goheen Date: Wed, 5 Oct 2022 10:19:35 -0400 Subject: [PATCH 20/20] update README --- README.md | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 94c6e39..0b553b0 100644 --- a/README.md +++ b/README.md @@ -182,11 +182,12 @@ models: This macro generates the SQL for a given model with all references pulled up into import CTEs, which you can then paste back into the model. ### Arguments: -* `model_name` (required): The model table you wish to generate SQL with import CTEs for. +* `model_name` (required): The model you wish to generate SQL with import CTEs for. * `leading_commas` (optional, default = false): Whether you want your commas to be leading (vs trailing). ### Usage: -1. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code. +1. Create a model with your original SQL query +2. Copy the macro into a statement tab in the dbt Cloud IDE, or into an analysis file, and compile your code ``` {{ codegen.generate_model_import_ctes( @@ -200,4 +201,70 @@ Alternatively, call the macro as an [operation](https://docs.getdbt.com/docs/usi $ dbt run-operation generate_model_import_ctes --args '{"model_name": "my_dbt_model"}' ``` -2. Replace the contents of the model's current SQL file with the compiled or logged code \ No newline at end of file +3. The new SQL - with all references pulled up into import CTEs - will be logged to the command line + +``` +with customers as ( + + select * from {{ ref('stg_customers') }} + +), + +orders as ( + + select * from {{ ref('stg_orders') }} + +), + +payments as ( + + select * from {{ ref('stg_payments') }} + +), + +customer_orders as ( + + select + customer_id, + min(order_date) as first_order, + max(order_date) as most_recent_order, + count(order_id) as number_of_orders + from orders + group by customer_id + +), + +customer_payments as ( + + select + orders.customer_id, + sum(amount) as total_amount + from payments + left join orders on + payments.order_id = orders.order_id + group by orders.customer_id + +), + +final as ( + + select + customers.customer_id, + customers.first_name, + customers.last_name, + customer_orders.first_order, + customer_orders.most_recent_order, + customer_orders.number_of_orders, + customer_payments.total_amount as customer_lifetime_value + from customers + left join customer_orders + on customers.customer_id = customer_orders.customer_id + left join customer_payments + on customers.customer_id = customer_payments.customer_id + +) + +select * from final +``` + +4. Replace the contents of the model's current SQL file with the compiled or logged code \ No newline at end of file