Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 38 additions & 6 deletions treeherder/model/data_cycling/removal_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,45 @@ def max_timestamp(self):
return self._max_timestamp

def remove(self, using: CursorWrapper):
"""
Raw SQL is used to avoid Django ORM cascade deletes on performance_datum_replicate.
Although the WHERE clause in del_replicate looks redundant, it is intentionally kept to guide
the PostgreSQL planner toward a more efficient execution plan.
"""
chunk_size = self._find_ideal_chunk_size()
deleted, _ = PerformanceDatum.objects.filter(
id__in=PerformanceDatum.objects.filter(
push_timestamp__lte=self._max_timestamp
).values_list("id")[:chunk_size]
).delete()
using.rowcount = deleted
using.execute(
"""
WITH target_datum AS (
SELECT pd.id, pd.push_timestamp
FROM performance_datum pd
WHERE pd.push_timestamp <= %s
ORDER BY pd.push_timestamp
LIMIT %s
),
del_replicate AS (
DELETE FROM performance_datum_replicate r1
WHERE r1.performance_datum_id IN (
SELECT td.id
FROM target_datum td
WHERE td.push_timestamp <= %s
AND EXISTS (
SELECT 1
FROM performance_datum_replicate r2
WHERE r2.performance_datum_id = td.id
)
)
),
del_multi AS (
DELETE FROM perf_multicommitdatum pm
USING target_datum td
WHERE pm.perf_datum_id = td.id
)
DELETE FROM performance_datum pd
USING target_datum td
WHERE pd.id = td.id
""",
[self._max_timestamp, chunk_size, self._max_timestamp],
)

@property
def name(self) -> str:
Expand Down