Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
4c60a58
fix: handle spaces in variable names for crosstab (#305)
JessUWE Jan 30, 2026
4fdce36
refactor: rename functions for clarity and update docstrings for SDC …
JessUWE Jan 30, 2026
2d407e6
fix: handled spaces in variable names for crosstab
JessUWE Jan 30, 2026
598dd3e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 30, 2026
0127c27
fix: add coverage pragma to add_backticks function and ensure newline…
JessUWE Jan 30, 2026
9045ae3
Merge remote changes for code coverage pragma comment
JessUWE Jan 30, 2026
6c5ab0c
docs: add pragma no cover annotations to defensive/edge case code for…
JessUWE Jan 30, 2026
17570d8
fix: remove coverage pragma from rounded_survival_table function and …
JessUWE Jan 30, 2026
4922df6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 30, 2026
0c88c2e
test: add unit tests and fix import for rounded_survival_table
JessUWE Jan 30, 2026
fe31723
Merge test/add-rounded-survival-table-test into 305 branch
JessUWE Jan 30, 2026
74ac078
Add tests for backticks and crosstab with spaced variable names (Issu…
JessUWE Jan 30, 2026
1456dfd
Add tests for crosstab with spaced variable names (Issue #305)
JessUWE Jan 30, 2026
f322368
Remove import and Added tests for crosstab with spaced variable names…
JessUWE Jan 30, 2026
bbc21b1
Merge branch 'main' into 305-bug-error-when-using-crosstab-when-varia…
JessUWE Jan 31, 2026
9d63be9
fix: add type hints to _format_label_condition function
JessUWE Jan 31, 2026
deab03e
tested contents are right for cross tab with spaces in names
jim-smith Feb 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 94 additions & 58 deletions acro/acro_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ def survival_plot( # pylint: disable=too-many-arguments
filename, extension = os.path.splitext(filename)
if not extension: # pragma: no cover
logger.info("Please provide a valid file extension")
return None
return None # pragma: no cover
increment_number = 0
while os.path.exists(
f"acro_artifacts/{filename}_{increment_number}{extension}"
Expand Down Expand Up @@ -1273,6 +1273,92 @@ def get_summary(sdc: dict) -> tuple[str, str]:
return status, summary


def add_backticks(name: str) -> str:
"""Add backticks to a name if it contains spaces and doesn't have them.

Parameters
----------
name : str
The name to add backticks to.

Returns
-------
str
The name with backticks if needed.
"""
if isinstance(name, str) and " " in name and not name.startswith("`"):
return f"`{name}`"
return name # pragma: no cover


def _format_label_condition(level_names: list, label: str) -> list[str]:
"""Format a label into a list of condition strings.

Parameters
----------
level_names : list
The names of the levels.
label : tuple or scalar
The label value(s).

Returns
-------
list[str]
List of condition strings for this label.
"""
parts = []
if isinstance(label, tuple):
for level, val in zip(level_names, label, strict=False):
level = add_backticks(str(level))
if isinstance(val, (int, float)):
parts.append(f"({level} == {val})")
else:
parts.append(f'({level} == "{val}")')
else:
level = add_backticks(str(level_names[0]))
if isinstance(label, (int, float)):
parts.append(f"({level} == {label})")
else:
parts.append(f'({level} == "{label}")')
return parts


def _get_cell_query(
mask, row_index, col_index, index_level_names, column_level_names
) -> str | None:
"""Generate a query string for a cell if it's marked as true in the mask.

Parameters
----------
mask : DataFrame
The suppression mask.
row_index : int
Row index.
col_index : int
Column index.
index_level_names : list
Names of index levels.
column_level_names : list
Names of column levels.

Returns
-------
str or None
Query string if cell is true, None otherwise.
"""
if not mask.iloc[row_index, col_index]:
return None

parts = []
row_label = mask.index[row_index]
col_label = mask.columns[col_index]

parts.extend(_format_label_condition(index_level_names, row_label))
parts.extend(_format_label_condition(column_level_names, col_label))

return " & ".join(parts)


def get_queries(masks, aggfunc) -> list[str]:
"""Return a list of the boolean conditions for each true cell in the suppression masks.

Expand All @@ -1288,69 +1374,20 @@ def get_queries(masks, aggfunc) -> list[str]:
str
The boolean conditions for each true cell in the suppression masks.
"""
# initialize a list to store queries for true cells
true_cell_queries = []
for _, mask in masks.items():
# drop the name of the mask
if aggfunc is not None:
if mask.columns.nlevels > 1:
mask = mask.droplevel(0, axis=1)
# identify level names for rows and columns
index_level_names = mask.index.names
column_level_names = mask.columns.names
# iterate through the masks to identify the true cells and extract queries
for col_index, col_label in enumerate(mask.columns):
for row_index, row_label in enumerate(mask.index):
if mask.iloc[row_index, col_index]:
if isinstance(row_label, tuple):
index_query = " & ".join(
[
(
f"({level} == {val})"
if isinstance(val, (int, float))
else f'({level} == "{val}")'
)
for level, val in zip(
index_level_names, row_label, strict=False
)
]
)
else:
index_query = " & ".join(
[
(
f"({index_level_names} == {row_label})"
if isinstance(row_label, (int, float))
else (f'({index_level_names}== "{row_label}")')
)
]
)
if isinstance(col_label, tuple):
column_query = " & ".join(
[
(
f"({level} == {val})"
if isinstance(val, (int, float))
else f'({level} == "{val}")'
)
for level, val in zip(
column_level_names, col_label, strict=False
)
]
)
else:
column_query = " & ".join(
[
(
f"({column_level_names} == {col_label})"
if isinstance(col_label, (int, float))
else (f'({column_level_names}== "{col_label}")')
)
]
)
query = f"{index_query} & {column_query}"
for col_index, _ in enumerate(mask.columns):
for row_index, _ in enumerate(mask.index):
query = _get_cell_query(
mask, row_index, col_index, index_level_names, column_level_names
)
if query is not None:
true_cell_queries.append(query)
# delete the duplication
true_cell_queries = list(set(true_cell_queries))
return true_cell_queries

Expand Down Expand Up @@ -1392,7 +1429,7 @@ def create_dataframe(index, columns) -> DataFrame:

try:
data = pd.concat([index_df, columns_df], axis=1)
except (ValueError, TypeError):
except (ValueError, TypeError): # pragma: no cover
data = empty_dataframe

return data
Expand Down Expand Up @@ -1502,7 +1539,6 @@ def crosstab_with_totals( # pylint: disable=too-many-arguments,too-many-locals
data = create_dataframe(index, columns)
# apply the queries to the data
for query in true_cell_queries:
query = str(query).replace("['", "").replace("']", "")
data = data.query(f"not ({query})")

# get the index and columns from the data after the queries are applied
Expand Down
49 changes: 49 additions & 0 deletions test/test_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,55 @@ def acro() -> ACRO:
return ACRO(suppress=True)


def test_add_backticks():
"""Test the add_backticks helper function."""
# Test simple string without spaces (no backticks added)
assert acro_tables.add_backticks("foo") == "foo"

# Test string with spaces (backticks should be added)
assert acro_tables.add_backticks("foo bar") == "`foo bar`"

# Test string already with backticks (no change)
assert acro_tables.add_backticks("`foo bar`") == "`foo bar`"

# Test multiple spaces
assert acro_tables.add_backticks("foo bar baz") == "`foo bar baz`"


def test_crosstab_with_spaces_in_variable_names(data, acro):
"""Test crosstab with spaces in column names (Issue #305)."""
# Create a test dataframe with a column name containing spaces
test_data = data.copy()
test_data["grant type with spaces"] = test_data["grant_type"]
test_data["year of study"] = test_data["year"]

# This should handle spaces in variable names correctly
# first check is that it behaves the same as pandas without suppression
acro.suppress = False
pandas_nospace_version = pd.crosstab(data["year"], data["grant_type"], margins=True)
acro_with_spaces_version = acro.crosstab(
test_data["year of study"], test_data["grant type with spaces"], margins=True
)
assert (
acro_with_spaces_version.to_numpy() == pandas_nospace_version.to_numpy()
).all()
# Verify that suppression was not applied in this case
assert acro.results.get_index(-1).status == "fail"

# Verify the crosstab was created successfully

# turn suppression back on, run rest of checks
acro.suppress = True
result = acro.crosstab(
test_data["year of study"], test_data["grant type with spaces"], margins=True
)
assert isinstance(result, pd.DataFrame)
assert not result.empty

# Verify that suppression was applied in second case
assert acro.results.get_index(-1).status == "review"


def test_crosstab_without_suppression(data):
"""Crosstab threshold without automatic suppression."""
acro = ACRO(suppress=False)
Expand Down