Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2112,7 +2112,9 @@ def needs_oauth2(cls, ex: Exception) -> bool:
return g and hasattr(g, "user") and isinstance(ex, cls.oauth2_exception)

@classmethod
def make_label_compatible(cls, label: str) -> str | quoted_name:
def make_label_compatible(
cls, label: str, database: Optional[Database] = None
) -> str | quoted_name:
"""
Conditionally mutate and/or quote a sqlalchemy expression label. If
force_column_alias_quotes is set to True, return the label as a
Expand All @@ -2122,9 +2124,15 @@ def make_label_compatible(cls, label: str) -> str | quoted_name:
generate a truncated label by calling truncate_label().

:param label: expected expression label/alias
:param database: optional Database instance for db-specific label mutation logic
:return: conditionally mutated label supported by the db engine
"""
label_mutated = cls._mutate_label(label)
if "database" in signature(cls._mutate_label).parameters:
# Dynamic dispatch based on subclass signature
label_mutated = cls._mutate_label(label, database=database) # type: ignore[call-arg]
else:
label_mutated = cls._mutate_label(label)

if (
cls.max_column_name_length
and len(label_mutated) > cls.max_column_name_length
Expand Down
36 changes: 31 additions & 5 deletions superset/db_engine_specs/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,15 +519,41 @@ def validate_parameters(
return []

@staticmethod
def _mutate_label(label: str) -> str:
def _mutate_label(label: str, database: Database | None = None) -> str:
"""
Suffix with the first six characters from the md5 of the label to avoid
collisions with original column names
Conditionally suffix the label with the first six characters from the md5
of the label to avoid collisions with original column names.

By default, labels are mutated for backward compatibility. To disable this
behavior on a per-database basis, set `mutate_label_name` to `false` in
the database's "extra" JSON configuration in the Superset UI:

{
"mutate_label_name": false
}

This is useful when your ClickHouse queries refer to their own aliases
within the same SELECT statement.

:param label: Expected expression label
:return: Conditionally mutated label
:param database: Database instance for db-specific label mutation logic
:return: Mutated label if mutation is enabled, otherwise the original label
"""
return f"{label}_{hash_from_str(label)[:6]}"
mutate_label = True

if database:
try:
extra = database.get_extra()
mutate_label = extra.get("mutate_label_name", True)
except Exception: # pylint: disable=broad-except
logger.warning(
"Error retrieving mutate_label_name setting. Falling back to "
"default behavior of True."
Comment on lines +544 to +551
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Overly broad exception handling in try-except

Replace the broad Exception catch with a more specific exception type. Consider catching (KeyError, ValueError, TypeError) or other specific exceptions that database.get_extra() might raise.

Code suggestion
Check the AI-generated fix before applying
Suggested change
if database:
try:
extra = database.get_extra()
mutate_label = extra.get("mutate_label_name", True)
except Exception: # pylint: disable=broad-except
logger.warning(
"Error retrieving mutate_label_name setting. Falling back to "
"default behavior of True."
if database:
try:
extra = database.get_extra()
mutate_label = extra.get("mutate_label_name", True)
except (KeyError, ValueError, TypeError, AttributeError):
logger.warning(
"Error retrieving mutate_label_name setting. Falling back to "
"default behavior of True."

Code Review Run #7382ba


Should Bito avoid suggestions like this for future reviews? (Manage Rules)

  • Yes, avoid them

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the broad exception here is ok. get_extra() calls get_extra_params() which could raise json.JSONDecodeError and isn't even the suggested list. Other error types could be potentially raised along the way as well. This is a simple label formatting helper limited to the clickhouse spec so if anything goes wrong, no matter what it is, it'd be preferable to fall back to default rather than crash.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The broad exception handling in the suggestion is appropriate for this label formatting helper, as it prioritizes graceful fallback over potential crashes from various errors during settings retrieval.

superset/db_engine_specs/clickhouse.py

if database:
            try:
                extra = database.get_extra()
                mutate_label = extra.get("mutate_label_name", True)
            except (KeyError, ValueError, TypeError, AttributeError):
                logger.warning(
                    "Error retrieving mutate_label_name setting. Falling back to "
                    "default behavior of True."

)

if mutate_label:
return f"{label}_{hash_from_str(label)[:6]}"
return label

@classmethod
def adjust_engine_params(
Expand Down
4 changes: 3 additions & 1 deletion superset/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,7 +1217,9 @@ def make_sqla_column_compatible(
label_expected = label or sqla_col.name
# add quotes to tables
if self.db_engine_spec.get_allows_alias_in_select(self):
label = self.db_engine_spec.make_label_compatible(label_expected)
label = self.db_engine_spec.make_label_compatible(
label_expected, database=self
)
sqla_col = sqla_col.label(label)
sqla_col.key = label_expected
return sqla_col
Expand Down
13 changes: 9 additions & 4 deletions superset/models/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,7 +1013,9 @@ def make_sqla_column_compatible(
db_engine_spec = self.db_engine_spec
# add quotes to tables
if db_engine_spec.get_allows_alias_in_select(self.database):
label = db_engine_spec.make_label_compatible(label_expected)
label = db_engine_spec.make_label_compatible(
label_expected, database=self.database
)
sqla_col = sqla_col.label(label)
sqla_col.key = label_expected
return sqla_col
Expand Down Expand Up @@ -2684,8 +2686,9 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
rejected_adhoc_filters_columns: list[Union[str, ColumnTyping]] = []
applied_adhoc_filters_columns: list[Union[str, ColumnTyping]] = []
db_engine_spec = self.db_engine_spec
# Ensure db-specific label mutation logic can access the current database
series_column_labels = [
db_engine_spec.make_label_compatible(column)
db_engine_spec.make_label_compatible(column, database=self.database)
for column in utils.get_column_names(
columns=series_columns or [],
)
Expand Down Expand Up @@ -3301,7 +3304,9 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
# in this case the column name, not the alias, needs to be
# conditionally mutated, as it refers to the column alias in
# the inner query
col_name = db_engine_spec.make_label_compatible(gby_name + "__")
col_name = db_engine_spec.make_label_compatible(
gby_name + "__", database=self.database
)
on_clause.append(gby_obj == sa.column(col_name))

# Use LEFT JOIN when grouping others, INNER JOIN otherwise
Expand All @@ -3318,7 +3323,7 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
def _create_join_condition(col_name: str, expr: Any) -> Any:
# Get the corresponding column from the subquery
subq_col_name = db_engine_spec.make_label_compatible(
col_name + "__"
col_name + "__", database=self.database
)
# Reference the column from the already-created aliased subquery
subq_col = subq_alias.c[subq_col_name]
Expand Down
27 changes: 27 additions & 0 deletions tests/unit_tests/db_engine_specs/test_clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,33 @@ def test_connect_make_label_compatible(column_name: str, expected_result: str) -
assert label == expected_result


@pytest.mark.parametrize(
"extra,column_name,expected_result",
[
# Default behavior (mutate labels)
(None, "time", "time_07cc69"),
({}, "time", "time_07cc69"),
# Explicitly enable
({"mutate_label_name": True}, "time", "time_07cc69"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: The new parametrized test for label mutation with extra expects a different default mutated label (time_07cc69) than the existing test_connect_make_label_compatible (time_336074), even though both exercise the same default behavior for make_label_compatible, which will either cause this test to fail or incorrectly document the function's output. The expected mutated labels in the new test should be aligned with the existing test so that default behavior (with or without a database argument when mutate_label_name is enabled or absent) is consistent. [logic error]

Severity Level: Major ⚠️
- ❌ ClickHouseConnect label mutation tests cannot both pass simultaneously.
- ⚠️ Confusing contract for default label mutation with extra.
- ⚠️ CI reliability reduced by contradictory test expectations.
Suggested change
(None, "time", "time_07cc69"),
({}, "time", "time_07cc69"),
# Explicitly enable
({"mutate_label_name": True}, "time", "time_07cc69"),
(None, "time", "time_336074"),
({}, "time", "time_336074"),
# Explicitly enable
({"mutate_label_name": True}, "time", "time_336074"),
Steps of Reproduction ✅
1. Run the unit tests for ClickHouse connect engine specs: `pytest
tests/unit_tests/db_engine_specs/test_clickhouse.py`.

2. Observe `test_connect_make_label_compatible` (around line 223) which imports
`ClickHouseConnectEngineSpec` and asserts that `spec.make_label_compatible("time")`
returns `"time_336074"` as the default mutated label.

3. Observe `test_connect_make_label_compatible_with_extra` (around line 245) in the same
file, which also calls `ClickHouseConnectEngineSpec.make_label_compatible("time",
database=mock_database)` with `extra` set to `None`, `{}`, or `{"mutate_label_name":
True}`, but asserts the default mutated label is `"time_07cc69"` (lines 235–239).

4. Because both tests exercise the same function and both treat these cases as the
"default behavior (mutate labels)", there is no possible implementation where both
expectations (`"time_336074"` and `"time_07cc69"` for the same default setting) can be
satisfied simultaneously; running the test file will therefore cause at least one of these
tests to fail, or it will encode an inconsistent contract for default label mutation if
the implementation is changed to satisfy one side only.
Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** tests/unit_tests/db_engine_specs/test_clickhouse.py
**Line:** 236:239
**Comment:**
	*Logic Error: The new parametrized test for label mutation with `extra` expects a different default mutated label (`time_07cc69`) than the existing `test_connect_make_label_compatible` (`time_336074`), even though both exercise the same default behavior for `make_label_compatible`, which will either cause this test to fail or incorrectly document the function's output. The expected mutated labels in the new test should be aligned with the existing test so that default behavior (with or without a `database` argument when `mutate_label_name` is enabled or absent) is consistent.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
👍 | 👎

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Label mutation hash default switched from md5 to sha256. Will fix.

# Disabled (no mutation)
({"mutate_label_name": False}, "time", "time"),
({"mutate_label_name": False}, "count", "count"),
],
Comment on lines 235 to 243
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Incorrect test expectations

The test expects MD5-based hash suffixes ('time_07cc69'), but the code now uses SHA-256 by default, producing 'time_336074'. Update all expected results to match SHA-256 hashes to prevent test failures.

Code suggestion
Check the AI-generated fix before applying
Suggested change
# Default behavior (mutate labels)
(None, "time", "time_07cc69"),
({}, "time", "time_07cc69"),
# Explicitly enable
({"mutate_label_name": True}, "time", "time_07cc69"),
# Disabled (no mutation)
({"mutate_label_name": False}, "time", "time"),
({"mutate_label_name": False}, "count", "count"),
],
# Default behavior (mutate labels)
(None, "time", "time_336074"),
({}, "time", "time_336074"),
# Explicitly enable
({"mutate_label_name": True}, "time", "time_336074"),
# Disabled (no mutation)
({"mutate_label_name": False}, "time", "time"),
({"mutate_label_name": False}, "count", "count"),
({"mutate_label_name": True}, "count", "count_6c3549"),
],

Code Review Run #7382ba


Should Bito avoid suggestions like this for future reviews? (Manage Rules)

  • Yes, avoid them

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Captured above by codeant, will fix.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This question isn’t related to the pull request. I can only help with questions about the PR’s code or comments.

)
def test_connect_make_label_compatible_with_extra(
extra: Optional[dict[str, Any]], column_name: str, expected_result: str
) -> None:
from superset.db_engine_specs.clickhouse import (
ClickHouseConnectEngineSpec as spec, # noqa: N813
)

mock_database = Mock()
mock_database.get_extra = Mock(return_value=extra or {})

label = spec.make_label_compatible(column_name, database=mock_database)
assert label == expected_result


@pytest.mark.parametrize(
"schema, expected_result",
[
Expand Down