Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions backend/kesaseteli/applications/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,26 +576,33 @@ def get_queryset(self, request):
class EmployerSummerVoucherAdmin(admin.ModelAdmin):
list_display = [
"id",
"summer_voucher_serial_number",
"youth_summer_voucher_id",
"_obsolete_unclean_serial_number",
"has_migrated_obsolete_serial_number",
"target_group",
"application__company__name",
"created_at",
"modified_at",
]
list_filter = [
"target_group",
("youth_summer_voucher_id", admin.EmptyFieldListFilter),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

("_obsolete_unclean_serial_number", admin.EmptyFieldListFilter),
"created_at",
"modified_at",
]
date_hierarchy = "created_at"
search_fields = [
"summer_voucher_serial_number",
"youth_summer_voucher__summer_voucher_serial_number",
"_obsolete_unclean_serial_number",
"id",
"application__company__name",
]
autocomplete_fields = ["application"]
autocomplete_fields = [
"application",
"youth_summer_voucher",
]
readonly_fields = [
"summer_voucher_serial_number",
"target_group",
"application",
"masked_employee_ssn",
Expand Down Expand Up @@ -656,8 +663,24 @@ def queryset(self, request):
.queryset(request)
.select_related("application")
.select_related("application__company")
.select_related("youth_summer_voucher")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good! 👍

)

def has_migrated_obsolete_serial_number(self, obj):
old_serial = obj._obsolete_unclean_serial_number.strip()
# Either the old serial is empty, or it matches the current
# youth summer voucher ID
return not old_serial or (
old_serial.isdigit()
and obj.youth_summer_voucher
and int(old_serial) == obj.youth_summer_voucher.summer_voucher_serial_number
)

has_migrated_obsolete_serial_number.boolean = True # i.e. True/False icon
has_migrated_obsolete_serial_number.short_description = _(
"Has obsolete serial number been migrated?"
)

def masked_employee_ssn(self, obj):
"""Mask employee social security number for display."""
return mask_social_security_number(obj.employee_ssn)
Expand Down
7 changes: 7 additions & 0 deletions backend/kesaseteli/applications/api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,13 @@ class EmployerSummerVoucherSerializer(serializers.ModelSerializer):
many=True,
help_text="Attachments of the application (read-only)",
)
# Backward compatibility field for frontend using
# EmployerSummerVoucher summer_voucher_serial_number property and its setter:
summer_voucher_serial_number = serializers.CharField(
max_length=256,
allow_blank=True,
required=False,
)

class Meta:
model = EmployerSummerVoucher
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class ExcelField(NamedTuple):
INVOICER_EMAIL_FIELD_TITLE = _("Laskuttajan sähköposti")
INVOICER_NAME_FIELD_TITLE = _("Laskuttajan nimi")
INVOICER_PHONE_NUMBER_FIELD_TITLE = _("Laskuttajan Puhelin")
VOUCHER_NUMBER_FIELD_TITLE = _("Setelin numero")

REMOVABLE_REPORTING_FIELD_TITLES = [
_("Y-tunnus"),
Expand Down Expand Up @@ -105,7 +106,7 @@ class ExcelField(NamedTuple):
APPLICATION_LANGUAGE_FIELD_TITLE, "%s", ["application__language"], 15, "white"
),
ExcelField(
_("Setelin numero"), "%s", ["summer_voucher_serial_number"], 30, "white"
VOUCHER_NUMBER_FIELD_TITLE, "%s", ["summer_voucher_serial_number"], 30, "white"
),
ExcelField(
SPECIAL_CASE_FIELD_TITLE,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("applications", "0040_alter_employerapplication_options_and_more"),
]

operations = [
migrations.RenameField(
model_name="employersummervoucher",
old_name="summer_voucher_serial_number",
new_name="_obsolete_unclean_serial_number",
),
migrations.RenameField(
model_name="historicalemployersummervoucher",
old_name="summer_voucher_serial_number",
new_name="_obsolete_unclean_serial_number",
),
migrations.AlterField(
model_name="employersummervoucher",
name="_obsolete_unclean_serial_number",
field=models.CharField(
blank=True,
help_text="Old obsolete unclean summer_voucher_serial_number values before data migration in early 2026. Can be used for manual data cleaning and as fallback summer_voucher_serial_number values in historical data.",
max_length=256,
verbose_name="obsolete unclean summer voucher serial number",
),
),
migrations.AlterField(
model_name="historicalemployersummervoucher",
name="_obsolete_unclean_serial_number",
field=models.CharField(
blank=True,
help_text="Old obsolete unclean summer_voucher_serial_number values before data migration in early 2026. Can be used for manual data cleaning and as fallback summer_voucher_serial_number values in historical data.",
max_length=256,
verbose_name="obsolete unclean summer voucher serial number",
),
),
migrations.AddField(
model_name="employersummervoucher",
name="youth_summer_voucher",
field=models.ForeignKey(
db_column="summer_voucher_serial_number",
null=True,
blank=True,
on_delete=django.db.models.deletion.PROTECT,
related_name="employer_summer_vouchers",
to="applications.youthsummervoucher",
to_field="summer_voucher_serial_number",
verbose_name="youth summer voucher",
),
),
migrations.AddField(
model_name="historicalemployersummervoucher",
name="youth_summer_voucher",
field=models.ForeignKey(
blank=True,
db_column="summer_voucher_serial_number",
db_constraint=False,
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="+",
to="applications.youthsummervoucher",
to_field="summer_voucher_serial_number",
verbose_name="youth summer voucher",
),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from django.db import migrations

from applications.migrations.helpers.serial_number_foreign_keys import (
set_current_valid_serial_number_based_foreign_keys,
set_historical_serial_number_based_foreign_keys,
)


def set_current_and_historical_serial_number_based_foreign_keys(apps, schema_editor):
"""
Set youth_summer_voucher_id ForeignKey values in EmployerSummerVoucher and
HistoricalEmployerSummerVoucher models based on the _obsolete_unclean_serial_number
values.
"""
employer_summer_voucher_model = apps.get_model(
"applications", "EmployerSummerVoucher"
)
youth_summer_voucher_model = apps.get_model("applications", "YouthSummerVoucher")
historical_employer_summer_voucher_model = apps.get_model(
"applications", "HistoricalEmployerSummerVoucher"
)
set_current_valid_serial_number_based_foreign_keys(
employer_summer_voucher_model, youth_summer_voucher_model
)
set_historical_serial_number_based_foreign_keys(
historical_employer_summer_voucher_model
)


class Migration(migrations.Migration):
dependencies = [
(
"applications",
"0041_rename_serial_number_field_and_add_youth_voucher_foreign_key",
),
]

operations = [
migrations.RunPython(
set_current_and_historical_serial_number_based_foreign_keys,
migrations.RunPython.noop,
),
]
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import logging
from collections import defaultdict

from django.db.models import PositiveBigIntegerField
from django.db.models.functions import Cast, Trim
from stdnum.fi.hetu import is_valid as is_valid_finnish_social_security_number

LOGGER = logging.getLogger(__name__)


def set_current_valid_serial_number_based_foreign_keys(
employer_summer_voucher_model, youth_summer_voucher_model
):
"""
Convert valid serial number strings in _obsolete_unclean_serial_number to actual valid
ForeignKey references in youth_summer_voucher_id in EmployerSummerVoucher model.

Matching is done first by purely numeric serial numbers, and if that fails, by matching
social security number and creation year.
"""
# Real data sizes in production on 2026-01-27 for memory and performance context:
# ~8k EmployerSummerVoucher objects
# ~17k YouthSummerVoucher objects
# ~19k YouthApplication objects
total_count = matched_by_ssn_count = matched_by_serial_count = 0

# Load all YouthSummerVoucher objects into memory for efficient lookup:
youth_vouchers = list(
youth_summer_voucher_model.objects.select_related("youth_application").only(
"id",
"summer_voucher_serial_number",
"youth_application__social_security_number",
"youth_application__encrypted_social_security_number",
"youth_application__created_at",
)
)

# Create a mapping from unique serial numbers to YouthSummerVoucher for quick lookup:
serial_to_youth_voucher = {
v.summer_voucher_serial_number: v
for v in youth_vouchers
if v.summer_voucher_serial_number
}

# Create a mapping from valid social security numbers to YouthSummerVouchers for quick lookup:
ssn_to_youth_vouchers = defaultdict(list)
for v in youth_vouchers:
ssn = v.youth_application.social_security_number
if is_valid_finnish_social_security_number(ssn):
ssn_to_youth_vouchers[ssn].append(v)

employer_vouchers_to_update = []

# Try to find matching YouthSummerVoucher for each EmployerSummerVoucher
for employer_voucher in employer_summer_voucher_model.objects.all().iterator(
chunk_size=1000
):
total_count += 1
serial_number = employer_voucher._obsolete_unclean_serial_number.strip()

# Purely numeric serial numbers? These should be the majority
if serial_number.isdigit() and (
youth_voucher := serial_to_youth_voucher.get(int(serial_number))
):
employer_voucher.youth_summer_voucher = youth_voucher
employer_vouchers_to_update.append(employer_voucher)
matched_by_serial_count += 1
else: # Try matching by social security number & application year
youth_vouchers = [
v
for v in ssn_to_youth_vouchers.get(employer_voucher.employee_ssn, [])
if v.youth_application.created_at.year
== employer_voucher.created_at.year
and v.youth_application.created_at <= employer_voucher.created_at
]
# There should be only at most one youth voucher per SSN per year
if len(youth_vouchers) == 1:
youth_voucher = youth_vouchers[0]
employer_voucher.youth_summer_voucher = youth_voucher
employer_vouchers_to_update.append(employer_voucher)
matched_by_ssn_count += 1

# Bulk update all matched EmployerSummerVoucher objects
updated_count = employer_summer_voucher_model.objects.bulk_update(
employer_vouchers_to_update,
["youth_summer_voucher"],
batch_size=500, # To limit a single batch's SQL UPDATE clause size
)

# Log summary of results
LOGGER.info(
f"Handled {total_count} employer summer vouchers, updated {updated_count}:"
)
LOGGER.info(f"- Matched by voucher serial number: {matched_by_serial_count}")
LOGGER.info(f"- Matched by social security number & year: {matched_by_ssn_count}")
LOGGER.info(f"- Failed to match: {total_count - updated_count} and left as is")


def set_historical_serial_number_based_foreign_keys(
historical_employer_summer_voucher_model,
):
"""
Convert non-negative integer string values in _obsolete_unclean_serial_number to actual integer
values in youth_summer_voucher_id in HistoricalEmployerSummerVoucher model.

Not trying to make the historical records perfect, just doing the minimum obvious conversion.
"""
total_count = historical_employer_summer_voucher_model.objects.count()

# Real data in production was Jan 2026 around ~55k HistoricalEmployerSummerVoucher rows
updated_count = historical_employer_summer_voucher_model.objects.filter(
# Non-negative integer strings with possible leading/trailing whitespace:
_obsolete_unclean_serial_number__regex=r"^\s*[0-9]+\s*$"
).update(
youth_summer_voucher_id=Cast(
Trim("_obsolete_unclean_serial_number"), PositiveBigIntegerField()
)
)

LOGGER.info(f"Handled {total_count} historical employer summer vouchers:")
LOGGER.info(f"- Converted numeric serial numbers to integers: {updated_count}")
LOGGER.info(f"- Left as NULL: {total_count - updated_count}")
Comment on lines 1 to 122
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From AI:

Performance (N+1 Problem): The current implementation iterates over every EmployerSummerVoucher (approx. 8000 rows) and performs 1-2 database queries inside the loop (.filter(...).count() and .first()). This results in 8,000–16,000 queries, which will be quite slow (likely minutes).

Unnecessary Logging: Logging every successful match (8000 times) will flood the terminal/logs. It is better to log only failures or a final summary.

Proposed Improvements:

In-Memory Lookups: Since the dataset is relatively small (~8,000–20,000 records), we can pre-fetch the YouthSummerVoucher data into dictionaries. This reduces the number of queries to just 2 (one for each table) plus one save operation.

bulk_update: Instead of calling employer_voucher.save() in every iteration, we can collect the modified objects and save them all at once at the end.

Cleaner Logging: Log only the summary stats and warnings for failure cases.

Suggested change
import logging
from django.db.models import PositiveBigIntegerField
from django.db.models.functions import Cast, Trim
from stdnum.fi.hetu import is_valid as is_valid_finnish_social_security_number
LOGGER = logging.getLogger(__name__)
def set_current_valid_serial_number_based_foreign_keys(
employer_summer_voucher_model, youth_summer_voucher_model
):
"""
Convert valid serial number strings in _obsolete_unclean_serial_number to actual valid
ForeignKey references in youth_summer_voucher_id in EmployerSummerVoucher model.
Matching is done first by purely numeric serial numbers, and if that fails, by matching
social security number and creation year.
"""
# Real data in production was at start of 2026 around ~8000 EmployerSummerVoucher objects,
# so performance should be negligible even without bulk updates or better Django ORM use.
total_count = employer_summer_voucher_model.objects.count()
matched_by_ssn_count = matched_by_serial_count = failure_count = 0
for employer_voucher in employer_summer_voucher_model.objects.all().iterator(
chunk_size=1000
):
serial_number = employer_voucher._obsolete_unclean_serial_number.strip()
# Purely numeric serial numbers? These should be the majority
if (
serial_number.isdigit()
and (
youth_vouchers := youth_summer_voucher_model.objects.filter(
summer_voucher_serial_number=int(serial_number)
)
).count()
== 1
):
youth_voucher = youth_vouchers.first()
employer_voucher.youth_summer_voucher = youth_voucher
employer_voucher.save()
# Successfully matched by numeric serial number, skip to next
matched_by_serial_count += 1
LOGGER.info(
f"Matched employer voucher {employer_voucher.id} to "
f"youth voucher {youth_voucher.id} by summer voucher serial number."
)
elif is_valid_finnish_social_security_number(
employer_voucher.employee_ssn
) and (
(
youth_vouchers := youth_summer_voucher_model.objects.filter(
youth_application__social_security_number=employer_voucher.employee_ssn,
youth_application__created_at__year=employer_voucher.created_at.year,
youth_application__created_at__lte=employer_voucher.created_at,
)
).count()
== 1
):
youth_voucher = youth_vouchers.first()
employer_voucher.youth_summer_voucher = youth_voucher
employer_voucher.save()
matched_by_ssn_count += 1
LOGGER.info(
f"Matched employer voucher {employer_voucher.id} to "
f"youth voucher {youth_voucher.id} by social security number & year."
)
else:
LOGGER.warning(
f"Unable to find youth voucher for employer voucher {employer_voucher.id}"
)
failure_count += 1
# Log summary of results
LOGGER.info(f"Handled {total_count} employer summer vouchers:")
LOGGER.info(f"- Matched by voucher serial number: {matched_by_serial_count}")
LOGGER.info(f"- Matched by social security number & year: {matched_by_ssn_count}")
LOGGER.info(f"- Failed to match: {failure_count}")
def set_historical_serial_number_based_foreign_keys(
historical_employer_summer_voucher_model,
):
"""
Convert non-negative integer string values in _obsolete_unclean_serial_number to actual integer
values in youth_summer_voucher_id in HistoricalEmployerSummerVoucher model.
Not trying to make the historical records perfect, just doing the minimum obvious conversion.
"""
total_count = historical_employer_summer_voucher_model.objects.count()
# Real data in production was Jan 2026 around ~55k HistoricalEmployerSummerVoucher rows
updated_count = historical_employer_summer_voucher_model.objects.filter(
# Non-negative integer strings with possible leading/trailing whitespace:
_obsolete_unclean_serial_number__regex=r"^\s*[0-9]+\s*$"
).update(
youth_summer_voucher_id=Cast(
Trim("_obsolete_unclean_serial_number"), PositiveBigIntegerField()
)
)
LOGGER.info(f"Handled {total_count} historical employer summer vouchers:")
LOGGER.info(f"- Converted numeric serial numbers to integers: {updated_count}")
LOGGER.info(f"- Left as NULL: {total_count - updated_count}")
import logging
from collections import defaultdict
from django.db.models import PositiveBigIntegerField
from django.db.models.functions import Cast, Trim
from stdnum.fi.hetu import is_valid as is_valid_finnish_social_security_number
LOGGER = logging.getLogger(__name__)
def set_current_valid_serial_number_based_foreign_keys(
employer_summer_voucher_model, youth_summer_voucher_model
):
"""
Convert valid serial number strings in _obsolete_unclean_serial_number to actual valid
ForeignKey references in youth_summer_voucher_id in EmployerSummerVoucher model.
Matching is done first by purely numeric serial numbers, and if that fails, by matching
social security number and creation year.
"""
# Pre-fetch all youth vouchers to avoid N+1 queries.
# We select related youth_application to access SSN and created_at efficiently.
youth_vouchers = list(youth_summer_voucher_model.objects.select_related("youth_application").only(
"id",
"summer_voucher_serial_number",
"youth_application__social_security_number",
"youth_application__created_at"
))
# Build lookup maps
# Map: serial_number (int) -> Match object or list of objects
serial_map = defaultdict(list)
# Map: (ssn, year) -> list of YouthSummerVoucher objects
ssn_year_map = defaultdict(list)
for yv in youth_vouchers:
if yv.summer_voucher_serial_number:
serial_map[yv.summer_voucher_serial_number].append(yv)
# Ensure we have the necessary application data for SSN matching
if yv.youth_application:
ssn = yv.youth_application.social_security_number
created_at = yv.youth_application.created_at
if ssn and created_at:
ssn_year_map[(ssn, created_at.year)].append(yv)
total_count = employer_summer_voucher_model.objects.count()
matched_by_ssn_count = matched_by_serial_count = failure_count = 0
updates = []
# Iterate over employer vouchers
for employer_voucher in employer_summer_voucher_model.objects.all():
serial_number_str = employer_voucher._obsolete_unclean_serial_number.strip()
matched_voucher = None
# 1. Try matching by Numeric Serial Number
if serial_number_str.isdigit():
serial_number = int(serial_number_str)
candidates = serial_map.get(serial_number, [])
if len(candidates) == 1:
matched_voucher = candidates[0]
matched_by_serial_count += 1
# 2. Try matching by SSN + Year (if no serial match yet)
if not matched_voucher and is_valid_finnish_social_security_number(employer_voucher.employee_ssn):
candidates = ssn_year_map.get((employer_voucher.employee_ssn, employer_voucher.created_at.year), [])
# Filter candidates: must be created BEFORE the employer voucher
valid_candidates = [
yv for yv in candidates
if yv.youth_application.created_at <= employer_voucher.created_at
]
if len(valid_candidates) == 1:
matched_voucher = valid_candidates[0]
matched_by_ssn_count += 1
if matched_voucher:
employer_voucher.youth_summer_voucher = matched_voucher
updates.append(employer_voucher)
else:
LOGGER.warning(
f"Unable to find youth voucher for employer voucher {employer_voucher.id}"
)
failure_count += 1
# Perform Bulk Update
if updates:
employer_summer_voucher_model.objects.bulk_update(updates, ["youth_summer_voucher"])
LOGGER.info(f"Handled {total_count} employer summer vouchers:")
LOGGER.info(f"- Matched by voucher serial number: {matched_by_serial_count}")
LOGGER.info(f"- Matched by social security number & year: {matched_by_ssn_count}")
LOGGER.info(f"- Failed to match: {failure_count}")
def set_historical_serial_number_based_foreign_keys(
historical_employer_summer_voucher_model,
):
"""
Convert non-negative integer string values in _obsolete_unclean_serial_number to actual integer
values in youth_summer_voucher_id in HistoricalEmployerSummerVoucher model.
"""
total_count = historical_employer_summer_voucher_model.objects.count()
updated_count = historical_employer_summer_voucher_model.objects.filter(
_obsolete_unclean_serial_number__regex=r"^\s*[0-9]+\s*$"
).update(
youth_summer_voucher_id=Cast(
Trim("_obsolete_unclean_serial_number"), PositiveBigIntegerField()
)
)
LOGGER.info(f"Handled {total_count} historical employer summer vouchers:")
LOGGER.info(f"- Converted numeric serial numbers to integers: {updated_count}")
LOGGER.info(f"- Left as NULL: {total_count - updated_count}")

Copy link
Collaborator Author

@karisal-anders karisal-anders Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taken into account.

Loading
Loading