Skip to content

Commit

Permalink
Added partition by in lag tests (calogica#146)
Browse files Browse the repository at this point in the history
* Added partition by parameter

* Added documentation

* Modified the test to accept list instead of string

* created data and implemented tests

* corrected test in yml

* Update formatting, test data and test macro

Co-authored-by: clausherther <[email protected]>
  • Loading branch information
Lucasthenoob and clausherther authored Mar 7, 2022
1 parent b5a022d commit 13eb72c
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 17 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -479,23 +479,25 @@ tests:
- dbt_expectations.expect_column_values_to_be_increasing:
sort_column: date_day
row_condition: "id is not null" # (Optional)
strictly: true # (Optional for comparison operator. Default is 'true', and it uses '>'. If set to 'flase' it uses '>='.)
strictly: true # (Optional for comparison operator. Default is 'true', and it uses '>'. If set to 'false' it uses '>='.)
group_by: [group_id, other_group_id, ...] # (Optional)
```

### [expect_column_values_to_be_decreasing](macros/schema_tests/column_values_basic/expect_column_values_to_be_decreasing.sql)

Expect column values to be decreasing.

If strictly=True, then this expectation is only satisfied if each consecutive value is strictly increasing–equal values are treated as failures.
If `strictly=True`, then this expectation is only satisfied if each consecutive value is strictly increasing–equal values are treated as failures.

*Applies to:* Column

```yaml
tests:
- dbt_expectations.expect_column_values_to_be_decreasing:
sort_column: col_numeric_a
strictly: true # (Optional for comparison operator. Default is 'true' and it uses '<'. If set to 'false', it uses '<='.)
row_condition: "id is not null" # (Optional)
strictly: true # (Optional for comparison operator. Default is 'true' and it uses '<'. If set to 'false', it uses '<='.)
group_by: [group_id, other_group_id, ...] # (Optional)
```

### [expect_column_value_lengths_to_be_between](macros/schema_tests/string_matching/expect_column_value_lengths_to_be_between.sql)
Expand Down
14 changes: 7 additions & 7 deletions integration_tests/models/schema_tests/data_test.sql
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ select
union all

select
4 as idx,
'2020-10-23' as date_col,
0.5 as col_numeric_a,
0.5 as col_numeric_b,
'c' as col_string_a,
'abcd' as col_string_b,
null as col_null
4 as idx,
'2020-10-23' as date_col,
0.5 as col_numeric_a,
0.5 as col_numeric_b,
'c' as col_string_a,
'abcd' as col_string_b,
null as col_null
17 changes: 17 additions & 0 deletions integration_tests/models/schema_tests/emails.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
{% if execute %}
{%- set source_relation = adapter.get_relation(
database="bigquery-public-data",
schema="new_york_citibike",
identifier="citibike_trips") -%}

{{ log("Source Relation: " ~ source_relation, info=true) }}
{% endif %}

{% if execute %}
{%- set source_relation_2 = adapter.get_relation(
database=this.database,
schema=this.schema,
identifier=this.name) -%}

{{ log("Source Relation: " ~ source_relation_2, info=true) }}
{% endif %}
select
'[email protected]' as email_address,
'@[^.]*' as reg_exp
Expand Down
16 changes: 16 additions & 0 deletions integration_tests/models/schema_tests/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,19 @@ models:
datepart: day
interval: 1
row_condition: group_id = 4

- name: window_function_test
columns:
- name: rolling_sum_increasing
tests :
- dbt_expectations.expect_column_values_to_be_increasing:
group_by: ['idx']
strictly: true
sort_column: date_col

- name: rolling_sum_decreasing
tests :
- dbt_expectations.expect_column_values_to_be_decreasing:
group_by: ['idx']
strictly: true
sort_column: date_col
48 changes: 48 additions & 0 deletions integration_tests/models/schema_tests/window_function_test.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
with data_example as (

select
1 as idx,
'2020-10-21' as date_col,
cast(0 as {{ dbt_utils.type_float() }}) as col_numeric_a

union all

select
2 as idx,
'2020-10-22' as date_col,
1 as col_numeric_a

union all

select
2 as idx,
'2020-10-23' as date_col,
2 as col_numeric_a

union all

select
2 as idx,
'2020-10-24' as date_col,
1 as col_numeric_a

union all

select
3 as idx,
'2020-10-23' as date_col,
0.5 as col_numeric_a
union all

select
4 as idx,
'2020-10-23' as date_col,
0.5 as col_numeric_a

)
select
*,
sum(col_numeric_a) over (partition by idx order by date_col) as rolling_sum_increasing,
sum(col_numeric_a) over (partition by idx order by date_col desc) as rolling_sum_decreasing
from
data_example
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
{% test expect_column_values_to_be_decreasing(model, column_name,
sort_column=None,
strictly=True,
row_condition=None) %}
row_condition=None,
group_by=None) %}

{%- set sort_column = column_name if not sort_column else sort_column -%}
{%- set operator = "<" if strictly else "<=" %}
with all_values as (

select
{{ sort_column }} as sort_column,
{%- if group_by -%}
{{ group_by | join(", ") }},
{%- endif %}
{{ column_name }} as value_field

from {{ model }}
{% if row_condition %}
where {{ row_condition }}
Expand All @@ -22,7 +25,12 @@ add_lag_values as (
select
sort_column,
value_field,
lag(value_field) over(order by sort_column) as value_field_lag
lag(value_field) over
{%- if not group_by -%}
(order by sort_column)
{%- else -%}
(partition by {{ group_by | join(", ") }} order by sort_column)
{%- endif %} as value_field_lag
from
all_values

Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
{% test expect_column_values_to_be_increasing(model, column_name,
sort_column=None,
strictly=True,
row_condition=None) %}
row_condition=None,
group_by=None) %}

{%- set sort_column = column_name if not sort_column else sort_column -%}
{%- set operator = ">" if strictly else ">=" %}
with all_values as (

select
{{ sort_column }} as sort_column,
{%- if group_by -%}
{{ group_by | join(", ") }},
{%- endif %}
{{ column_name }} as value_field

from {{ model }}
{% if row_condition %}
where {{ row_condition }}
Expand All @@ -22,13 +25,17 @@ add_lag_values as (
select
sort_column,
value_field,
lag(value_field) over(order by sort_column) as value_field_lag
lag(value_field) over
{%- if not group_by -%}
(order by sort_column)
{%- else -%}
(partition by {{ group_by | join(", ") }} order by sort_column)
{%- endif %} as value_field_lag
from
all_values

),
validation_errors as (

select
*
from
Expand Down

0 comments on commit 13eb72c

Please sign in to comment.