Skip to content

Effectivity satellite handles incorrectly values that are updated to history values. #244

@koillinengit

Description

@koillinengit

Describe the bug
If we update for example sales group (SALES_GROUP_HK) from 100 --> 200 --> 100. Only the two first values are recorded in effectivity satellite because the latest records in effectivity satellites is checked over the partition of linked key (l_SALES_DOCUMENT_HK), when it is supposed to be done with the driving key.

Environment

dbt version: 1.8.5
automate_dv version: 0.11.0
Database/Platform: DataBricks

To Reproduce
Steps to reproduce the behavior:

{{ config(materialized='incremental', schema='rdv')  }}
 
{%- set source_model = "v_stg__vbak_sales_document" -%}
{%- set src_pk = "l_SALES_DOCUMENT_HK" -%}
{%- set src_dfk = "SALES_DOCUMENT_HK"       -%}
{%- set src_sfk = ["SALES_ORGANIZATION_HK","SOLDTO_PARTY_HK","SALES_OFFICE_HK","SALES_GROUP_HK"]         -%}
{%- set src_start_date = "START_DATE"  -%}
{%- set src_end_date = "END_DATE"     -%}
{%- set src_eff = "EFFECTIVE_FROM"     -%}
{%- set src_ldts = "LOAD_DATE"    -%}
{%- set src_source = "RECORD_SOURCE"  -%}
 
 
 

{{ automate_dv.eff_sat(src_pk=src_pk, src_dfk=src_dfk, src_sfk=src_sfk,
src_start_date=src_start_date,
src_end_date=src_end_date,
src_eff=src_eff, src_ldts=src_ldts,
src_source=src_source,
source_model=source_model) }}

-- Generated by AutomateDV (formerly known as dbtvault)
 
   
 
WITH source_data AS (
    SELECT a.l_SALES_DOCUMENT_HK, a.SALES_DOCUMENT_HK, a.SALES_ORGANIZATION_HK, a.SOLDTO_PARTY_HK, a.SALES_OFFICE_HK, a.SALES_GROUP_HK, a.START_DATE, a.END_DATE, a.EFFECTIVE_FROM, a.LOAD_DATE, a.RECORD_SOURCE
    FROM `dbw_7ense_databricks_demo`.`staging_sh4100`.`v_stg__vbak_sales_document` AS a
    WHERE a.SALES_DOCUMENT_HK IS NOT NULL
    AND a.SALES_ORGANIZATION_HK IS NOT NULL
    AND a.SOLDTO_PARTY_HK IS NOT NULL
    AND a.SALES_OFFICE_HK IS NOT NULL
    AND a.SALES_GROUP_HK IS NOT NULL
),
 
latest_records AS (
    SELECT * FROM (
        SELECT b.l_SALES_DOCUMENT_HK, b.SALES_DOCUMENT_HK, b.SALES_ORGANIZATION_HK, b.SOLDTO_PARTY_HK, b.SALES_OFFICE_HK, b.SALES_GROUP_HK, b.START_DATE, b.END_DATE, b.EFFECTIVE_FROM, b.LOAD_DATE, b.RECORD_SOURCE
        FROM `dbw_7ense_databricks_demo`.`rdv`.`es__sales_document_vbak_sh4100` AS b
        QUALIFY ROW_NUMBER() OVER (
            PARTITION BY **b.l_SALES_DOCUMENT_HK** **<-- Here is the error! This should be the driving key!**
            ORDER BY b.LOAD_DATE DESC
        ) = 1
        ) AS inner_select
   
),
 
latest_open AS (
    SELECT c.l_SALES_DOCUMENT_HK, c.SALES_DOCUMENT_HK, c.SALES_ORGANIZATION_HK, c.SOLDTO_PARTY_HK, c.SALES_OFFICE_HK, c.SALES_GROUP_HK, c.START_DATE, c.END_DATE, c.EFFECTIVE_FROM, c.LOAD_DATE, c.RECORD_SOURCE
    FROM latest_records AS c
    WHERE TO_DATE(c.END_DATE) = TO_DATE(TO_TIMESTAMP('9999-12-31 23:59:59.999999'))
),
 
latest_closed AS (
    SELECT d.l_SALES_DOCUMENT_HK, d.SALES_DOCUMENT_HK, d.SALES_ORGANIZATION_HK, d.SOLDTO_PARTY_HK, d.SALES_OFFICE_HK, d.SALES_GROUP_HK, d.START_DATE, d.END_DATE, d.EFFECTIVE_FROM, d.LOAD_DATE, d.RECORD_SOURCE
    FROM latest_records AS d
    WHERE TO_DATE(d.END_DATE) != TO_DATE(TO_TIMESTAMP('9999-12-31 23:59:59.999999'))
),
 
new_open_records AS (
    SELECT DISTINCT
        f.l_SALES_DOCUMENT_HK, f.SALES_DOCUMENT_HK, f.SALES_ORGANIZATION_HK, f.SOLDTO_PARTY_HK, f.SALES_OFFICE_HK, f.SALES_GROUP_HK,
        f.START_DATE AS START_DATE,
        f.END_DATE AS END_DATE,
        f.EFFECTIVE_FROM AS EFFECTIVE_FROM,
        f.LOAD_DATE,
        f.RECORD_SOURCE
    FROM source_data AS f
    LEFT JOIN latest_records AS lr
    ON f.l_SALES_DOCUMENT_HK = lr.l_SALES_DOCUMENT_HK
    WHERE lr.l_SALES_DOCUMENT_HK IS NULL
),
 
new_reopened_records AS (
    SELECT DISTINCT
        lc.l_SALES_DOCUMENT_HK, lc.SALES_DOCUMENT_HK, lc.SALES_ORGANIZATION_HK, lc.SOLDTO_PARTY_HK, lc.SALES_OFFICE_HK, lc.SALES_GROUP_HK,
        g.START_DATE AS START_DATE,
        g.END_DATE AS END_DATE,
        g.EFFECTIVE_FROM AS EFFECTIVE_FROM,
        g.LOAD_DATE,
        g.RECORD_SOURCE
    FROM source_data AS g
    INNER JOIN latest_closed AS lc
    ON g.l_SALES_DOCUMENT_HK = lc.l_SALES_DOCUMENT_HK
    WHERE TO_DATE(g.END_DATE) = TO_DATE(TO_TIMESTAMP('9999-12-31 23:59:59.999999'))
),
 
 
 
new_closed_records AS (
    SELECT DISTINCT
        lo.l_SALES_DOCUMENT_HK, lo.SALES_DOCUMENT_HK, lo.SALES_ORGANIZATION_HK, lo.SOLDTO_PARTY_HK, lo.SALES_OFFICE_HK, lo.SALES_GROUP_HK,
        h.START_DATE AS START_DATE,
        h.END_DATE AS END_DATE,
        h.EFFECTIVE_FROM AS EFFECTIVE_FROM,
        h.LOAD_DATE,
        lo.RECORD_SOURCE
    FROM source_data AS h
    LEFT JOIN latest_open AS lo
    ON lo.l_SALES_DOCUMENT_HK = h.l_SALES_DOCUMENT_HK
    LEFT JOIN latest_closed AS lc
    ON lc.l_SALES_DOCUMENT_HK = h.l_SALES_DOCUMENT_HK
    WHERE TO_DATE(h.END_DATE) != TO_DATE(TO_TIMESTAMP('9999-12-31 23:59:59.999999'))
    AND lo.l_SALES_DOCUMENT_HK IS NOT NULL
    AND lc.l_SALES_DOCUMENT_HK IS NULL
),
 
records_to_insert AS (
    SELECT * FROM new_open_records
    UNION
    SELECT * FROM new_reopened_records
    UNION
    SELECT * FROM new_closed_records
)
 
SELECT * FROM records_to_insert

Expected behavior
Now there is only two records in the effectivity satellite and the middle values (or row) are effective when the latest values should be

Screenshots

Log files

Additional context

AB#5602

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions