Skip to content

Commit 8bc6d7e

Browse files
authored
SYS-2166: Fix batch update date issue (#95)
* Import `load_input_data` from `batch_update.py` into `views.py` and use it to load input data from uploaded spreadsheet * Coerce DB values to `str` when checking for changes * Print logs only after changes have been applied for a given record * Report invalid dates after all changes have been applied * Bump app version in Helm chart values and add release note * Improve type hinting and comments * Declare type alias within module scope
1 parent e1ecdce commit 8bc6d7e

File tree

4 files changed

+120
-41
lines changed

4 files changed

+120
-41
lines changed

charts/prod-ftvalabdata-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ replicaCount: 1
66

77
image:
88
repository: uclalibrary/ftva-lab-data
9-
tag: v1.0.27
9+
tag: v1.0.28
1010
pullPolicy: Always
1111

1212
nameOverride: ""

ftva_lab_data/management/commands/batch_update.py

Lines changed: 104 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,31 @@
33
from django.core.management.base import BaseCommand
44
from pathlib import Path
55
from ftva_lab_data.models import SheetImport
6-
from django.db.models import ForeignKey, ManyToManyField
6+
from django.db.models import ForeignKey, ManyToManyField, DateField
77
from django.core.exceptions import ObjectDoesNotExist
8+
from django.core.files.uploadedfile import InMemoryUploadedFile
9+
from typing import TypeAlias, Any
810

11+
ChangeDetails: TypeAlias = list[dict[str, Any]]
912

10-
def load_input_data(input_file: str) -> list[list[dict]]:
13+
14+
def load_input_data(input_file: str | InMemoryUploadedFile) -> list[list[dict]]:
1115
"""Load input data from the input file into a list of sheets,
1216
as an input file may contain multiple sheets .
1317
14-
:param input_file: Path to the spreadsheet containing records to update, as an XLSX file.
18+
:param input_file: Path to the spreadsheet containing records to update, as an XLSX file,
19+
or an InMemoryUploadedFile object passed from a Django form.
1520
:return: A list of lists of dicts, each representing a sheet with rows of input data.
1621
:raises ValueError: If the input file is not an XLSX file.
1722
"""
18-
input_suffix = Path(input_file).suffix
19-
if input_suffix != ".xlsx":
20-
raise ValueError(f"Unsupported file type: {input_suffix}")
23+
# Check extension if input_file is a string path
24+
if isinstance(input_file, str):
25+
input_suffix = Path(input_file).suffix
26+
if input_suffix != ".xlsx":
27+
raise ValueError(f"Unsupported file type: {input_suffix}")
2128
# `sheet_name=None` reads all sheets
2229
sheets = pd.read_excel(input_file, sheet_name=None)
30+
2331
# Convert each sheet DataFrame to a list of dicts, each representing a sheet of input data,
2432
# filling NA with empty string to avoid type issues with Django
2533
return [
@@ -77,9 +85,11 @@ def batch_update(input_data: list[dict], dry_run: bool) -> int:
7785
or if no updates were made to any records.
7886
"""
7987
records_updated = 0
88+
invalid_values: ChangeDetails = [] # track invalid values for whole batch
8089
for row in input_data:
8190
record = SheetImport.objects.get(id=row["id"])
82-
has_changes = False
91+
record_changes: ChangeDetails = [] # changes to non-m2m fields for each record
92+
many_to_many_changes: ChangeDetails = [] # changes to m2m need special handling
8393
for field, value in row.items():
8494
# Guard against changes to IDs or UUIDs
8595
if field.lower() in ["id", "pk", "uuid"]:
@@ -107,11 +117,12 @@ def batch_update(input_data: list[dict], dry_run: bool) -> int:
107117
**{f"{field}__istartswith": value}
108118
)
109119
if current_value != update:
110-
has_changes = True
111-
setattr(record, field, update)
112-
print(
113-
f"Record {row['id']} updated: "
114-
f"{field} changed from {current_value} to {update}"
120+
record_changes.append(
121+
{
122+
"field": field,
123+
"from": current_value,
124+
"to": update,
125+
}
115126
)
116127

117128
# Else if the field is a ManyToManyField,
@@ -127,45 +138,107 @@ def batch_update(input_data: list[dict], dry_run: bool) -> int:
127138
)
128139
# Only apply update if there is one and it's not already in the m2m relationship
129140
if update and update not in current_related_objects:
130-
has_changes = True
131-
# Need to use `getattr().add()` here rather than `setattr()`,
132-
# since we're adding an object to a many-to-many relationship,
133-
# rather than setting a single foreign key as we do above.
134-
# `add()` immediately saves the change to the database though,
135-
# so we need an additional `dry_run` check.
136-
if not dry_run:
137-
getattr(record, field).add(update)
138-
print(
139-
f"Record {row['id']} updated: " f"added {update} to {field}"
141+
# Since we're adding an object to a many-to-many relationship,
142+
# rather than setting a single foreign key as we do above,
143+
# track these changes separately,
144+
# and apply them later after all changes to record are collected.
145+
many_to_many_changes.append(
146+
{
147+
"field": field,
148+
"update": update,
149+
}
150+
)
151+
152+
# Else if the field is a DateField,
153+
# try parsing the value as a date,
154+
# and collect invalid dates for later reporting.
155+
elif isinstance(field_object, DateField):
156+
current_value = getattr(record, field)
157+
if value == "":
158+
update = None
159+
else:
160+
try:
161+
update = pd.to_datetime(value).date()
162+
except ValueError:
163+
invalid_values.append(
164+
{
165+
"record_id": row["id"],
166+
"field": field,
167+
"value": value,
168+
}
169+
)
170+
continue # don't apply change if date is invalid
171+
if current_value != update:
172+
record_changes.append(
173+
{
174+
"field": field,
175+
"from": current_value,
176+
"to": update,
177+
}
140178
)
141179

142180
# Otherwise, just set the value directly
143181
else:
144182
# Replace any empty strings in `file_name` with "NO FILE NAME"
145183
if field == "file_name" and value == "":
146184
value = "NO FILE NAME"
147-
current_value = getattr(record, field)
185+
# Coerce current database value to string for comparison
186+
current_value = str(getattr(record, field))
148187
if current_value != value:
149-
has_changes = True
150-
setattr(record, field, value)
151-
print(
152-
f"Record {row['id']} updated: "
153-
f"{field} changed from {current_value} to {value}"
188+
record_changes.append(
189+
{
190+
"field": field,
191+
"from": current_value,
192+
"to": value,
193+
}
154194
)
155195
except ObjectDoesNotExist as e:
156196
# Handler expects a ValueError
157197
raise ValueError(
158198
f"Error applying value {value} to field {field} on record {row['id']}: {e}"
159199
)
160200

161-
# Compare the original record to the updated record
162-
if not has_changes:
163-
print(f"No changes were made to record {row['id']}")
201+
# Continue if no changes made to current record
202+
if not record_changes and not many_to_many_changes:
203+
print(f"No changes made to record {row['id']}")
164204
continue
205+
# Save changes to record if not a dry run
165206
if not dry_run:
207+
# Apply record changes that can be set via `setattr()`...
208+
for change in record_changes:
209+
setattr(record, change["field"], change["to"])
210+
# then apply many-to-many changes,
211+
# which require use of `.add()` rather than `.setattr()`.
212+
# Note that `.add()` saves changes immediately.
213+
for change in many_to_many_changes:
214+
getattr(record, change["field"]).add(change["update"])
215+
# Now save record changes to the database.
166216
record.save()
217+
# Report on changes made to each record
218+
for change in record_changes:
219+
print(
220+
f"Record {row['id']} updated: "
221+
f"{change['field']} changed "
222+
f"from {change['from'] if change['from'] else '""'} "
223+
f"to {change['to'] if change['to'] else '""'}"
224+
)
225+
for change in many_to_many_changes:
226+
print(
227+
f"Record {row['id']} updated: "
228+
f"{change['update']} added to {change['field']}"
229+
)
167230
records_updated += 1
168231

232+
# Report on invalid values here so as not to prevent other valid changes from being applied
233+
if invalid_values:
234+
report_lines = [
235+
f"Record {invalid_value['record_id']} {invalid_value['field']} {invalid_value['value']}"
236+
for invalid_value in invalid_values
237+
]
238+
raise ValueError(
239+
"Invalid values found in input data:\n" + "\n".join(report_lines)
240+
)
241+
169242
if records_updated == 0:
170243
raise ValueError("All inputs match existing records; no updates to apply.")
171244
return records_updated

ftva_lab_data/templates/release_notes.html

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,17 @@
44
<h3>Release Notes</h3>
55
<hr />
66

7+
<h4>1.0.28</h4>
8+
<p><i>February 10, 2026</i></p>
9+
<ul>
10+
<li>Fix bug preventing date values from being applied in batch updates.</li>
11+
</ul>
12+
713
<h4>1.0.27</h4>
814
<p><i>January 9, 2026</i></p>
915
<ul>
10-
<li>Downgrade numpy to 2.3.5 per <a href="https://uclalibrary.atlassian.net/browse/DIGINF-262">deployment issue</a>.</li>
16+
<li>Downgrade numpy to 2.3.5 per <a href="https://uclalibrary.atlassian.net/browse/DIGINF-262">deployment issue</a>.
17+
</li>
1118
</ul>
1219

1320
<h4>1.0.26</h4>

ftva_lab_data/views.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
transform_record_to_dict,
3838
)
3939
from .management.commands.batch_update import (
40+
load_input_data,
4041
validate_input_data,
4142
batch_update as batch_update_command,
4243
)
@@ -780,12 +781,7 @@ def batch_update(request: HttpRequest) -> HttpResponse:
780781
if form.is_valid():
781782
try:
782783
file = form.cleaned_data["file"]
783-
sheets = pd.read_excel(file, sheet_name=None)
784-
# Convert each sheet to a list of dicts, each representing a row of input data
785-
sheets_data = [
786-
sheet_data.fillna("").to_dict(orient="records")
787-
for sheet_data in sheets.values()
788-
]
784+
sheets_data = load_input_data(file)
789785

790786
# Validate and run dry_run to get counts
791787
records_updated_counts = {}
@@ -818,8 +814,11 @@ def batch_update(request: HttpRequest) -> HttpResponse:
818814
{"form": form},
819815
)
820816

821-
# Store sheets_data in session for confirmation step
822-
request.session["batch_update_file_data"] = json.dumps(sheets_data)
817+
# Store sheets_data in session for confirmation step,
818+
# using `default=str` for date serialization.
819+
request.session["batch_update_file_data"] = json.dumps(
820+
sheets_data, default=str
821+
)
823822

824823
return render(
825824
request,

0 commit comments

Comments
 (0)