Skip to content

Commit 5951f5b

Browse files
Merge pull request #52 from DataKitchen/release/4.32.5
Release/4.32.5
2 parents e9c7bb2 + a630c1b commit 5951f5b

File tree

230 files changed

+15196
-3361
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

230 files changed

+15196
-3361
lines changed

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
88

99
[project]
1010
name = "dataops-testgen"
11-
version = "4.26.1"
11+
version = "4.32.5"
1212
description = "DataKitchen's Data Quality DataOps TestGen"
1313
authors = [
1414
{ "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },
@@ -33,6 +33,7 @@ dependencies = [
3333
"sqlalchemy==1.4.46",
3434
"databricks-sql-connector==2.9.3",
3535
"snowflake-sqlalchemy==1.6.1",
36+
"sqlalchemy-bigquery==1.14.1",
3637
"pyodbc==5.0.0",
3738
"psycopg2-binary==2.9.9",
3839
"pycryptodome==3.21",

testgen/__main__.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from testgen.commands.run_observability_exporter import run_observability_exporter
3232
from testgen.commands.run_profiling_bridge import run_profiling_queries
3333
from testgen.commands.run_quick_start import run_quick_start, run_quick_start_increment
34+
from testgen.commands.run_test_metadata_exporter import run_test_metadata_exporter
3435
from testgen.commands.run_upgrade_db_config import get_schema_revision, is_db_revision_up_to_date, run_upgrade_db_config
3536
from testgen.common import (
3637
configure_logging,
@@ -503,6 +504,25 @@ def export_data(configuration: Configuration, project_key: str, test_suite_key:
503504
click.echo("\nexport-observability completed successfully.\n")
504505

505506

507+
@click.option(
508+
"--path",
509+
help="Path to the templates folder. Defaults to path from project root.",
510+
required=False,
511+
default="testgen/template",
512+
)
513+
@cli.command("export-test-metadata", help="Exports current test metadata records to yaml files.")
514+
@pass_configuration
515+
def export_test_metadata(configuration: Configuration, path: str):
516+
click.echo("export-test-metadata")
517+
LOG.info("CurrentStep: Main Program - Test Metadata Export")
518+
if not os.path.isdir(path):
519+
LOG.error(f"Provided path {path} is not a directory. Please correct the --path option.")
520+
return
521+
run_test_metadata_exporter(path)
522+
LOG.info("CurrentStep: Main Program - Test Metadata Export - DONE")
523+
click.echo("\nexport-test-metadata completed successfully.\n")
524+
525+
506526
@cli.command("list-test-types", help="Lists all available TestGen test types.")
507527
@click.option("-d", "--display", help="Show command output in the terminal.", is_flag=True, default=False)
508528
@pass_configuration

testgen/commands/queries/execute_cat_tests_query.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ class CATTestParams(TypedDict):
1717
class CCATExecutionSQL:
1818
project_code = ""
1919
flavor = ""
20-
concat_operator = ""
2120
test_suite = ""
2221
run_date = ""
2322
test_run_id = ""
@@ -35,8 +34,7 @@ def __init__(self, strProjectCode, strTestSuiteId, strTestSuite, strSQLFlavor, m
3534
self.test_suite_id = strTestSuiteId
3635
self.test_suite = strTestSuite
3736
self.project_code = strProjectCode
38-
flavor_service = get_flavor_service(strSQLFlavor)
39-
self.concat_operator = flavor_service.get_concat_operator()
37+
self.flavor_service = get_flavor_service(strSQLFlavor)
4038
self.flavor = strSQLFlavor
4139
self.max_query_chars = max_query_chars
4240
self.today = date_service.get_now_as_string_with_offset(minutes_offset)
@@ -47,7 +45,7 @@ def _get_rollup_scores_sql(self) -> CRollupScoresSQL:
4745
self._rollup_scores_sql = CRollupScoresSQL(self.test_run_id, self.table_groups_id)
4846

4947
return self._rollup_scores_sql
50-
48+
5149
def _get_query(self, template_file_name: str, sub_directory: str | None = "exec_cat_tests", no_bind: bool = False) -> tuple[str, dict | None]:
5250
query = read_template_sql_file(template_file_name, sub_directory)
5351
params = {
@@ -58,8 +56,9 @@ def _get_query(self, template_file_name: str, sub_directory: str | None = "exec_
5856
"TEST_SUITE_ID": self.test_suite_id,
5957
"TABLE_GROUPS_ID": self.table_groups_id,
6058
"SQL_FLAVOR": self.flavor,
61-
"ID_SEPARATOR": "`" if self.flavor == "databricks" else '"',
62-
"CONCAT_OPERATOR": self.concat_operator,
59+
"QUOTE": self.flavor_service.quote_character,
60+
"VARCHAR_TYPE": self.flavor_service.varchar_type,
61+
"CONCAT_OPERATOR": self.flavor_service.concat_operator,
6362
"SCHEMA_NAME": self.target_schema,
6463
"TABLE_NAME": self.target_table,
6564
"NOW_DATE": "GETDATE()",
@@ -73,7 +72,7 @@ def _get_query(self, template_file_name: str, sub_directory: str | None = "exec_
7372
query = replace_params(query, params)
7473
query = replace_templated_functions(query, self.flavor)
7574

76-
if no_bind and self.flavor != "databricks":
75+
if no_bind:
7776
# Adding escape character where ':' is referenced
7877
query = query.replace(":", "\\:")
7978

testgen/commands/queries/execute_tests_query.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from typing import ClassVar, TypedDict
22

3-
from testgen.common import AddQuotesToIdentifierCSV, CleanSQL, ConcatColumnList, date_service, read_template_sql_file
4-
from testgen.common.database.database_service import replace_params
3+
from testgen.common import date_service, read_template_sql_file
4+
from testgen.common.clean_sql import CleanSQL, ConcatColumnList, quote_identifiers
5+
from testgen.common.database.database_service import get_flavor_service, replace_params
56

67

78
class TestParams(TypedDict):
@@ -54,6 +55,7 @@ class CTestExecutionSQL:
5455
def __init__(self, strProjectCode, strFlavor, strTestSuiteId, strTestSuite, minutes_offset=0):
5556
self.project_code = strProjectCode
5657
self.flavor = strFlavor
58+
self.flavor_service = get_flavor_service(strFlavor)
5759
self.test_suite_id = strTestSuiteId
5860
self.test_suite = strTestSuite
5961
self.today = date_service.get_now_as_string_with_offset(minutes_offset)
@@ -100,20 +102,21 @@ def _get_query(
100102
"TEST_SUITE_ID": self.test_suite_id,
101103
"TEST_SUITE": self.test_suite,
102104
"SQL_FLAVOR": self.flavor,
105+
"QUOTE": self.flavor_service.quote_character,
103106
"TEST_RUN_ID": self.test_run_id,
104107
"INPUT_PARAMETERS": self._get_input_parameters(),
105108
"RUN_DATE": self.run_date,
106109
"EXCEPTION_MESSAGE": self.exception_message,
107110
"START_TIME": self.today,
108111
"PROCESS_ID": self.process_id,
109-
"VARCHAR_TYPE": "STRING" if self.flavor == "databricks" else "VARCHAR",
112+
"VARCHAR_TYPE": self.flavor_service.varchar_type,
110113
"NOW_TIMESTAMP": date_service.get_now_as_string_with_offset(self.minutes_offset),
111114
**{key.upper(): value or "" for key, value in self.test_params.items()},
112115
}
113116

114117
if self.test_params:
115118
column_name = self.test_params["column_name"]
116-
params["COLUMN_NAME"] = AddQuotesToIdentifierCSV(column_name) if column_name else ""
119+
params["COLUMN_NAME"] = quote_identifiers(column_name, self.flavor) if column_name else ""
117120
# Shows contents without double-quotes for display and aggregate expressions
118121
params["COLUMN_NAME_NO_QUOTES"] = column_name or ""
119122
# Concatenates column list into single expression for relative entropy
@@ -126,11 +129,13 @@ def _get_query(
126129
)
127130

128131
subset_condition = self.test_params["subset_condition"]
129-
params["SUBSET_DISPLAY"] = subset_condition.replace("'", "''") if subset_condition else ""
132+
params["SUBSET_DISPLAY"] = subset_condition.replace(
133+
"'", self.flavor_service.escaped_single_quote
134+
) if subset_condition else ""
130135

131136
query = replace_params(query, params)
132137

133-
if no_bind and self.flavor != "databricks":
138+
if no_bind:
134139
# Adding escape character where ':' is referenced
135140
query = query.replace(":", "\\:")
136141

testgen/commands/queries/generate_tests_query.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import ClassVar, TypedDict
33

44
from testgen.common import CleanSQL, date_service, read_template_sql_file
5-
from testgen.common.database.database_service import replace_params
5+
from testgen.common.database.database_service import get_flavor_service, replace_params
66
from testgen.common.read_file import get_template_files
77

88
LOG = logging.getLogger("testgen")
@@ -29,7 +29,10 @@ class CDeriveTestsSQL:
2929

3030
_use_clean = False
3131

32-
def __init__(self):
32+
def __init__(self, flavor):
33+
self.sql_flavor = flavor
34+
self.flavor_service = get_flavor_service(flavor)
35+
3336
today = date_service.get_now_as_string()
3437
self.run_date = today
3538
self.as_of_date = today
@@ -47,7 +50,7 @@ def _get_params(self) -> dict:
4750
"GENERATION_SET": self.generation_set,
4851
"AS_OF_DATE": self.as_of_date,
4952
"DATA_SCHEMA": self.data_schema,
50-
"ID_SEPARATOR": "`" if self.sql_flavor == "databricks" else '"',
53+
"QUOTE": self.flavor_service.quote_character,
5154
}
5255

5356
def _get_query(self, template_file_name: str, sub_directory: str | None = "generation") -> tuple[str, dict]:

testgen/commands/queries/profiling_query.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
import re
12
import typing
23

34
from testgen.commands.queries.refresh_data_chars_query import CRefreshDataCharsSQL
45
from testgen.commands.queries.rollup_scores_query import CRollupScoresSQL
56
from testgen.common import date_service, read_template_sql_file, read_template_yaml_file
6-
from testgen.common.database.database_service import replace_params
7+
from testgen.common.database.database_service import get_flavor_service, replace_params
78
from testgen.common.read_file import replace_templated_functions
89

910

@@ -21,6 +22,7 @@ class CProfilingSQL:
2122
col_name = ""
2223
col_gen_type = ""
2324
col_type = ""
25+
db_data_type = ""
2426
col_ordinal_position = "0"
2527
col_is_decimal = ""
2628
col_top_freq_update = ""
@@ -98,6 +100,7 @@ def _get_params(self) -> dict:
98100
"COL_NAME_SANITIZED": self.col_name.replace("'", "''"),
99101
"COL_GEN_TYPE": self.col_gen_type,
100102
"COL_TYPE": self.col_type or "",
103+
"DB_DATA_TYPE": self.db_data_type or "",
101104
"COL_POS": self.col_ordinal_position,
102105
"TOP_FREQ": self.col_top_freq_update,
103106
"PROFILE_RUN_ID": self.profile_run_id,
@@ -118,6 +121,7 @@ def _get_params(self) -> dict:
118121
"CONTINGENCY_MAX_VALUES": self.contingency_max_values,
119122
"PROCESS_ID": self.process_id,
120123
"SQL_FLAVOR": self.flavor,
124+
"QUOTE": get_flavor_service(self.flavor).quote_character
121125
}
122126

123127
def _get_query(
@@ -130,6 +134,7 @@ def _get_query(
130134
params = {}
131135

132136
if query:
137+
query = self._process_conditionals(query)
133138
if extra_params:
134139
params.update(extra_params)
135140
params.update(self._get_params())
@@ -139,6 +144,33 @@ def _get_query(
139144

140145
return query, params
141146

147+
def _process_conditionals(self, query: str):
148+
re_pattern = re.compile(r"^--\s+TG-(IF|ELSE|ENDIF)(?:\s+(\w+))?\s*$")
149+
condition = None
150+
updated_query = []
151+
for line in query.splitlines(True):
152+
if re_match := re_pattern.match(line):
153+
match re_match.group(1):
154+
case "IF" if condition is None and re_match.group(2) is not None:
155+
condition = bool(getattr(self, re_match.group(2)))
156+
case "ELSE" if condition is not None:
157+
condition = not condition
158+
case "ENDIF" if condition is not None:
159+
condition = None
160+
case _:
161+
raise ValueError("Template conditional misused")
162+
elif condition is not False:
163+
updated_query.append(line)
164+
165+
if condition is not None:
166+
raise ValueError("Template conditional misused")
167+
168+
return "".join(updated_query)
169+
170+
@property
171+
def do_sample_bool(self):
172+
return self.parm_do_sample == "Y"
173+
142174
def GetSecondProfilingColumnsQuery(self) -> tuple[str, dict]:
143175
# Runs on App database
144176
return self._get_query("secondary_profiling_columns.sql")
@@ -260,7 +292,12 @@ def GetProfilingQuery(self) -> tuple[str, dict]:
260292
else:
261293
strQ += dctSnippetTemplate["strTemplate01_else"]
262294

263-
strQ += dctSnippetTemplate["strTemplate02_all"]
295+
strQ += dctSnippetTemplate["strTemplate01_5"]
296+
297+
if self.col_gen_type == "X":
298+
strQ += dctSnippetTemplate["strTemplate02_X"]
299+
else:
300+
strQ += dctSnippetTemplate["strTemplate02_else"]
264301

265302
if self.col_gen_type in ["A", "D", "N"]:
266303
strQ += dctSnippetTemplate["strTemplate03_ADN"]

testgen/commands/queries/refresh_data_chars_query.py

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from testgen.common import read_template_sql_file
2-
from testgen.common.database.database_service import replace_params
2+
from testgen.common.database.database_service import get_flavor_service, replace_params
33
from testgen.common.database.flavor.flavor_service import SQLFlavor
44
from testgen.utils import chunk_queries
55

@@ -44,43 +44,41 @@ def _get_query(self, template_file_name: str, sub_directory: str | None = "data_
4444
query = replace_params(query, params)
4545
return query, params
4646

47-
def _get_mask_query(self, mask: str, is_include: bool) -> str:
48-
escape = ""
49-
if self.sql_flavor.startswith("mssql"):
50-
escaped_underscore = "[_]"
51-
elif self.sql_flavor == "snowflake":
52-
escaped_underscore = "\\\\_"
53-
escape = "ESCAPE '\\\\'"
54-
elif self.sql_flavor == "redshift":
55-
escaped_underscore = "\\\\_"
56-
else:
57-
escaped_underscore = "\\_"
58-
59-
table_names = [ item.strip().replace("_", escaped_underscore) for item in mask.split(",") ]
60-
sub_query = f"""
61-
AND {"NOT" if not is_include else ""} (
62-
{" OR ".join([ f"(c.table_name LIKE '{item}' {escape})" for item in table_names ])}
63-
)
64-
"""
65-
66-
return sub_query
67-
68-
def GetDDFQuery(self) -> tuple[str, dict]:
69-
# Runs on Target database
70-
query, params = self._get_query(f"schema_ddf_query_{self.sql_flavor}.sql", f"flavors/{self.sql_flavor}/data_chars")
71-
47+
def _get_table_criteria(self) -> str:
7248
table_criteria = ""
49+
flavor_service = get_flavor_service(self.sql_flavor)
50+
7351
if self.profiling_table_set:
74-
table_criteria += f" AND c.table_name IN ({self.profiling_table_set})"
52+
table_criteria += f" AND c.{flavor_service.ddf_table_ref} IN ({self.profiling_table_set})"
7553

7654
if self.profiling_include_mask:
77-
table_criteria += self._get_mask_query(self.profiling_include_mask, is_include=True)
55+
include_table_names = [
56+
item.strip().replace("_", flavor_service.escaped_underscore)
57+
for item in self.profiling_include_mask.split(",")
58+
]
59+
table_criteria += f"""
60+
AND (
61+
{" OR ".join([ f"(c.{flavor_service.ddf_table_ref} LIKE '{item}' {flavor_service.escape_clause})" for item in include_table_names ])}
62+
)
63+
"""
7864

7965
if self.profiling_exclude_mask:
80-
table_criteria += self._get_mask_query(self.profiling_exclude_mask, is_include=False)
81-
82-
query = query.replace("{TABLE_CRITERIA}", table_criteria)
66+
exclude_table_names = [
67+
item.strip().replace("_", flavor_service.escaped_underscore)
68+
for item in self.profiling_exclude_mask.split(",")
69+
]
70+
table_criteria += f"""
71+
AND NOT (
72+
{" OR ".join([ f"(c.{flavor_service.ddf_table_ref} LIKE '{item}' {flavor_service.escape_clause})" for item in exclude_table_names ])}
73+
)
74+
"""
8375

76+
return table_criteria
77+
78+
def GetDDFQuery(self) -> tuple[str, dict]:
79+
# Runs on Target database
80+
query, params = self._get_query(f"schema_ddf_query_{self.sql_flavor}.sql", f"flavors/{self.sql_flavor}/data_chars")
81+
query = query.replace("{TABLE_CRITERIA}", self._get_table_criteria())
8482
return query, params
8583

8684
def GetRecordCountQueries(self, schema_tables: list[str]) -> list[tuple[str, None]]:

0 commit comments

Comments
 (0)