DataKitchen
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎testgen/__main__.py‎
Lines changed: 20 additions & 0 deletions b/‎testgen/__main__.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎testgen/commands/queries/execute_cat_tests_query.py‎
Lines changed: 6 additions & 7 deletions b/‎testgen/commands/queries/execute_cat_tests_query.py‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎testgen/commands/queries/execute_tests_query.py‎
Lines changed: 11 additions & 6 deletions b/‎testgen/commands/queries/execute_tests_query.py‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎testgen/commands/queries/generate_tests_query.py‎
Lines changed: 6 additions & 3 deletions b/‎testgen/commands/queries/generate_tests_query.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎testgen/commands/queries/profiling_query.py‎
Lines changed: 39 additions & 2 deletions b/‎testgen/commands/queries/profiling_query.py‎
Lines changed: 39 additions & 2 deletions
diff --git a/‎testgen/commands/queries/refresh_data_chars_query.py‎
Lines changed: 29 additions & 31 deletions b/‎testgen/commands/queries/refresh_data_chars_query.py‎
Lines changed: 29 additions & 31 deletions
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "dataops-testgen"
-version = "4.26.1"
+version = "4.32.5"
 description = "DataKitchen's Data Quality DataOps TestGen"
 authors = [
     { "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },
@@ -33,6 +33,7 @@ dependencies = [
     "sqlalchemy==1.4.46",
     "databricks-sql-connector==2.9.3",
     "snowflake-sqlalchemy==1.6.1",
+    "sqlalchemy-bigquery==1.14.1",
     "pyodbc==5.0.0",
     "psycopg2-binary==2.9.9",
     "pycryptodome==3.21",
 
@@ -31,6 +31,7 @@
 from testgen.commands.run_observability_exporter import run_observability_exporter
 from testgen.commands.run_profiling_bridge import run_profiling_queries
 from testgen.commands.run_quick_start import run_quick_start, run_quick_start_increment
+from testgen.commands.run_test_metadata_exporter import run_test_metadata_exporter
 from testgen.commands.run_upgrade_db_config import get_schema_revision, is_db_revision_up_to_date, run_upgrade_db_config
 from testgen.common import (
     configure_logging,
@@ -503,6 +504,25 @@ def export_data(configuration: Configuration, project_key: str, test_suite_key:
     click.echo("\nexport-observability completed successfully.\n")
 
 
+@click.option(
+    "--path",
+    help="Path to the templates folder. Defaults to path from project root.",
+    required=False,
+    default="testgen/template",
+)
+@cli.command("export-test-metadata", help="Exports current test metadata records to yaml files.")
+@pass_configuration
+def export_test_metadata(configuration: Configuration, path: str):
+    click.echo("export-test-metadata")
+    LOG.info("CurrentStep: Main Program - Test Metadata Export")
+    if not os.path.isdir(path):
+        LOG.error(f"Provided path {path} is not a directory. Please correct the --path option.")
+        return
+    run_test_metadata_exporter(path)
+    LOG.info("CurrentStep: Main Program - Test Metadata Export - DONE")
+    click.echo("\nexport-test-metadata completed successfully.\n")
+
+
 @cli.command("list-test-types", help="Lists all available TestGen test types.")
 @click.option("-d", "--display", help="Show command output in the terminal.", is_flag=True, default=False)
 @pass_configuration
 
@@ -17,7 +17,6 @@ class CATTestParams(TypedDict):
 class CCATExecutionSQL:
     project_code = ""
     flavor = ""
-    concat_operator = ""
     test_suite = ""
     run_date = ""
     test_run_id = ""
@@ -35,8 +34,7 @@ def __init__(self, strProjectCode, strTestSuiteId, strTestSuite, strSQLFlavor, m
         self.test_suite_id = strTestSuiteId
         self.test_suite = strTestSuite
         self.project_code = strProjectCode
-        flavor_service = get_flavor_service(strSQLFlavor)
-        self.concat_operator = flavor_service.get_concat_operator()
+        self.flavor_service = get_flavor_service(strSQLFlavor)
         self.flavor = strSQLFlavor
         self.max_query_chars = max_query_chars
         self.today = date_service.get_now_as_string_with_offset(minutes_offset)
@@ -47,7 +45,7 @@ def _get_rollup_scores_sql(self) -> CRollupScoresSQL:
             self._rollup_scores_sql = CRollupScoresSQL(self.test_run_id, self.table_groups_id)
 
         return self._rollup_scores_sql
-    
+
     def _get_query(self, template_file_name: str, sub_directory: str | None = "exec_cat_tests", no_bind: bool = False) -> tuple[str, dict | None]:
         query = read_template_sql_file(template_file_name, sub_directory)
         params = {
@@ -58,8 +56,9 @@ def _get_query(self, template_file_name: str, sub_directory: str | None = "exec_
             "TEST_SUITE_ID": self.test_suite_id,
             "TABLE_GROUPS_ID": self.table_groups_id,
             "SQL_FLAVOR": self.flavor,
-            "ID_SEPARATOR": "`" if self.flavor == "databricks" else '"',
-            "CONCAT_OPERATOR": self.concat_operator,
+            "QUOTE": self.flavor_service.quote_character,
+            "VARCHAR_TYPE": self.flavor_service.varchar_type,
+            "CONCAT_OPERATOR": self.flavor_service.concat_operator,
             "SCHEMA_NAME": self.target_schema,
             "TABLE_NAME": self.target_table,
             "NOW_DATE": "GETDATE()",
@@ -73,7 +72,7 @@ def _get_query(self, template_file_name: str, sub_directory: str | None = "exec_
         query = replace_params(query, params)
         query = replace_templated_functions(query, self.flavor)
 
-        if no_bind and self.flavor != "databricks":
+        if no_bind:
             # Adding escape character where ':' is referenced
             query = query.replace(":", "\\:")
 
 
@@ -1,7 +1,8 @@
 from typing import ClassVar, TypedDict
 
-from testgen.common import AddQuotesToIdentifierCSV, CleanSQL, ConcatColumnList, date_service, read_template_sql_file
-from testgen.common.database.database_service import replace_params
+from testgen.common import date_service, read_template_sql_file
+from testgen.common.clean_sql import CleanSQL, ConcatColumnList, quote_identifiers
+from testgen.common.database.database_service import get_flavor_service, replace_params
 
 
 class TestParams(TypedDict):
@@ -54,6 +55,7 @@ class CTestExecutionSQL:
     def __init__(self, strProjectCode, strFlavor, strTestSuiteId, strTestSuite, minutes_offset=0):
         self.project_code = strProjectCode
         self.flavor = strFlavor
+        self.flavor_service = get_flavor_service(strFlavor)
         self.test_suite_id = strTestSuiteId
         self.test_suite = strTestSuite
         self.today = date_service.get_now_as_string_with_offset(minutes_offset)
@@ -100,20 +102,21 @@ def _get_query(
             "TEST_SUITE_ID": self.test_suite_id,
             "TEST_SUITE": self.test_suite,
             "SQL_FLAVOR": self.flavor,
+            "QUOTE": self.flavor_service.quote_character,
             "TEST_RUN_ID": self.test_run_id,
             "INPUT_PARAMETERS": self._get_input_parameters(),
             "RUN_DATE": self.run_date,
             "EXCEPTION_MESSAGE": self.exception_message,
             "START_TIME": self.today,
             "PROCESS_ID": self.process_id,
-            "VARCHAR_TYPE": "STRING" if self.flavor == "databricks" else "VARCHAR",
+            "VARCHAR_TYPE": self.flavor_service.varchar_type,
             "NOW_TIMESTAMP": date_service.get_now_as_string_with_offset(self.minutes_offset),
             **{key.upper(): value or "" for key, value in self.test_params.items()},
         }
 
         if self.test_params:
             column_name = self.test_params["column_name"]
-            params["COLUMN_NAME"] = AddQuotesToIdentifierCSV(column_name) if column_name else ""
+            params["COLUMN_NAME"] = quote_identifiers(column_name, self.flavor) if column_name else ""
             # Shows contents without double-quotes for display and aggregate expressions
             params["COLUMN_NAME_NO_QUOTES"] = column_name or ""
             # Concatenates column list into single expression for relative entropy
@@ -126,11 +129,13 @@ def _get_query(
             )
 
             subset_condition = self.test_params["subset_condition"]
-            params["SUBSET_DISPLAY"] = subset_condition.replace("'", "''") if subset_condition else ""
+            params["SUBSET_DISPLAY"] = subset_condition.replace(
+                "'", self.flavor_service.escaped_single_quote
+            ) if subset_condition else ""
 
         query = replace_params(query, params)
 
-        if no_bind and self.flavor != "databricks":
+        if no_bind:
             # Adding escape character where ':' is referenced
             query = query.replace(":", "\\:")
 
 
@@ -2,7 +2,7 @@
 from typing import ClassVar, TypedDict
 
 from testgen.common import CleanSQL, date_service, read_template_sql_file
-from testgen.common.database.database_service import replace_params
+from testgen.common.database.database_service import get_flavor_service, replace_params
 from testgen.common.read_file import get_template_files
 
 LOG = logging.getLogger("testgen")
@@ -29,7 +29,10 @@ class CDeriveTestsSQL:
 
     _use_clean = False
 
-    def __init__(self):
+    def __init__(self, flavor):
+        self.sql_flavor = flavor
+        self.flavor_service = get_flavor_service(flavor)
+
         today = date_service.get_now_as_string()
         self.run_date = today
         self.as_of_date = today
@@ -47,7 +50,7 @@ def _get_params(self) -> dict:
             "GENERATION_SET": self.generation_set,
             "AS_OF_DATE": self.as_of_date,
             "DATA_SCHEMA": self.data_schema,
-            "ID_SEPARATOR":  "`" if self.sql_flavor == "databricks" else '"',
+            "QUOTE": self.flavor_service.quote_character,
         }
 
     def _get_query(self, template_file_name: str, sub_directory: str | None = "generation") -> tuple[str, dict]:
 
@@ -1,9 +1,10 @@
+import re
 import typing
 
 from testgen.commands.queries.refresh_data_chars_query import CRefreshDataCharsSQL
 from testgen.commands.queries.rollup_scores_query import CRollupScoresSQL
 from testgen.common import date_service, read_template_sql_file, read_template_yaml_file
-from testgen.common.database.database_service import replace_params
+from testgen.common.database.database_service import get_flavor_service, replace_params
 from testgen.common.read_file import replace_templated_functions
 
 
@@ -21,6 +22,7 @@ class CProfilingSQL:
     col_name = ""
     col_gen_type = ""
     col_type = ""
+    db_data_type = ""
     col_ordinal_position = "0"
     col_is_decimal = ""
     col_top_freq_update = ""
@@ -98,6 +100,7 @@ def _get_params(self) -> dict:
             "COL_NAME_SANITIZED": self.col_name.replace("'", "''"),
             "COL_GEN_TYPE": self.col_gen_type,
             "COL_TYPE": self.col_type or "",
+            "DB_DATA_TYPE": self.db_data_type or "",
             "COL_POS": self.col_ordinal_position,
             "TOP_FREQ": self.col_top_freq_update,
             "PROFILE_RUN_ID": self.profile_run_id,
@@ -118,6 +121,7 @@ def _get_params(self) -> dict:
             "CONTINGENCY_MAX_VALUES": self.contingency_max_values,
             "PROCESS_ID": self.process_id,
             "SQL_FLAVOR": self.flavor,
+            "QUOTE": get_flavor_service(self.flavor).quote_character
         }
 
     def _get_query(
@@ -130,6 +134,7 @@ def _get_query(
         params = {}
 
         if query:
+            query = self._process_conditionals(query)
             if extra_params:
                 params.update(extra_params)
             params.update(self._get_params())
@@ -139,6 +144,33 @@ def _get_query(
 
         return query, params
 
+    def _process_conditionals(self, query: str):
+        re_pattern = re.compile(r"^--\s+TG-(IF|ELSE|ENDIF)(?:\s+(\w+))?\s*$")
+        condition = None
+        updated_query = []
+        for line in query.splitlines(True):
+            if re_match := re_pattern.match(line):
+                match re_match.group(1):
+                    case "IF" if condition is None and re_match.group(2) is not None:
+                        condition = bool(getattr(self, re_match.group(2)))
+                    case "ELSE" if condition is not None:
+                        condition = not condition
+                    case "ENDIF" if condition is not None:
+                        condition = None
+                    case _:
+                        raise ValueError("Template conditional misused")
+            elif condition is not False:
+                updated_query.append(line)
+
+        if condition is not None:
+            raise ValueError("Template conditional misused")
+
+        return "".join(updated_query)
+
+    @property
+    def do_sample_bool(self):
+        return self.parm_do_sample == "Y"
+
     def GetSecondProfilingColumnsQuery(self) -> tuple[str, dict]:
         # Runs on App database
         return self._get_query("secondary_profiling_columns.sql")
@@ -260,7 +292,12 @@ def GetProfilingQuery(self) -> tuple[str, dict]:
         else:
             strQ += dctSnippetTemplate["strTemplate01_else"]
 
-        strQ += dctSnippetTemplate["strTemplate02_all"]
+        strQ += dctSnippetTemplate["strTemplate01_5"]
+
+        if self.col_gen_type == "X":
+            strQ += dctSnippetTemplate["strTemplate02_X"]
+        else:
+            strQ += dctSnippetTemplate["strTemplate02_else"]
 
         if self.col_gen_type in ["A", "D", "N"]:
             strQ += dctSnippetTemplate["strTemplate03_ADN"]
 
@@ -1,5 +1,5 @@
 from testgen.common import read_template_sql_file
-from testgen.common.database.database_service import replace_params
+from testgen.common.database.database_service import get_flavor_service, replace_params
 from testgen.common.database.flavor.flavor_service import SQLFlavor
 from testgen.utils import chunk_queries
 
@@ -44,43 +44,41 @@ def _get_query(self, template_file_name: str, sub_directory: str | None = "data_
         query = replace_params(query, params)
         return query, params
 
-    def _get_mask_query(self, mask: str, is_include: bool) -> str:
-        escape = ""
-        if self.sql_flavor.startswith("mssql"):
-            escaped_underscore = "[_]"
-        elif self.sql_flavor == "snowflake":
-            escaped_underscore = "\\\\_"
-            escape = "ESCAPE '\\\\'"
-        elif self.sql_flavor == "redshift":
-            escaped_underscore = "\\\\_"
-        else:
-            escaped_underscore = "\\_"
-
-        table_names = [ item.strip().replace("_", escaped_underscore) for item in mask.split(",") ]
-        sub_query = f"""
-            AND {"NOT" if not is_include else ""} (
-                {" OR ".join([ f"(c.table_name LIKE '{item}' {escape})" for item in table_names ])}
-            )
-        """
-
-        return sub_query
-    
-    def GetDDFQuery(self) -> tuple[str, dict]:
-        # Runs on Target database
-        query, params = self._get_query(f"schema_ddf_query_{self.sql_flavor}.sql", f"flavors/{self.sql_flavor}/data_chars")
-
+    def _get_table_criteria(self) -> str:
         table_criteria = ""
+        flavor_service = get_flavor_service(self.sql_flavor)
+        
         if self.profiling_table_set:
-            table_criteria += f" AND c.table_name IN ({self.profiling_table_set})"
+            table_criteria += f" AND c.{flavor_service.ddf_table_ref} IN ({self.profiling_table_set})"
 
         if self.profiling_include_mask:
-            table_criteria += self._get_mask_query(self.profiling_include_mask, is_include=True)
+            include_table_names = [
+                item.strip().replace("_", flavor_service.escaped_underscore)
+                for item in self.profiling_include_mask.split(",")
+            ]
+            table_criteria += f"""
+            AND (
+                {" OR ".join([ f"(c.{flavor_service.ddf_table_ref} LIKE '{item}' {flavor_service.escape_clause})" for item in include_table_names ])}
+            )
+            """
 
         if self.profiling_exclude_mask:
-            table_criteria += self._get_mask_query(self.profiling_exclude_mask, is_include=False)
-
-        query = query.replace("{TABLE_CRITERIA}", table_criteria)
+            exclude_table_names = [
+                item.strip().replace("_", flavor_service.escaped_underscore)
+                for item in self.profiling_exclude_mask.split(",")
+            ]
+            table_criteria += f"""
+            AND NOT (
+                {" OR ".join([ f"(c.{flavor_service.ddf_table_ref} LIKE '{item}' {flavor_service.escape_clause})" for item in exclude_table_names ])}
+            )
+            """
 
+        return table_criteria
+    
+    def GetDDFQuery(self) -> tuple[str, dict]:
+        # Runs on Target database
+        query, params = self._get_query(f"schema_ddf_query_{self.sql_flavor}.sql", f"flavors/{self.sql_flavor}/data_chars")
+        query = query.replace("{TABLE_CRITERIA}", self._get_table_criteria())
         return query, params
 
     def GetRecordCountQueries(self, schema_tables: list[str]) -> list[tuple[str, None]]: