Skip to content

Commit 29c0ce4

Browse files
committed
feat(backend, website): download of original data for revisions
1 parent 1c7f943 commit 29c0ce4

File tree

21 files changed

+1589
-3
lines changed

21 files changed

+1589
-3
lines changed

backend/src/main/kotlin/org/loculus/backend/api/SubmissionTypes.kt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,24 @@ data class AccessionVersionOriginalMetadata(
328328
val originalMetadata: Map<String, String?>?,
329329
) : AccessionVersionInterface
330330

331+
data class GetOriginalDataRequest(
332+
@Schema(
333+
description = "The group ID to download data for.",
334+
required = true,
335+
)
336+
val groupId: Int,
337+
@Schema(
338+
description = "Filter by specific accessions. If not provided, all accessions for the group are returned.",
339+
)
340+
val accessionsFilter: List<Accession>? = null,
341+
)
342+
343+
data class OriginalDataResponse(
344+
override val accession: Accession,
345+
override val version: Version,
346+
val originalData: OriginalData<GeneticSequence>,
347+
) : AccessionVersionInterface
348+
331349
enum class Status {
332350
@JsonProperty("RECEIVED")
333351
RECEIVED,

backend/src/main/kotlin/org/loculus/backend/controller/SubmissionController.kt

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@ import org.loculus.backend.api.DataUseTerms
2424
import org.loculus.backend.api.DataUseTermsType
2525
import org.loculus.backend.api.EditedSequenceEntryData
2626
import org.loculus.backend.api.ExternalSubmittedData
27+
import org.loculus.backend.api.GetOriginalDataRequest
2728
import org.loculus.backend.api.GetSequenceResponse
2829
import org.loculus.backend.api.Organism
30+
import org.loculus.backend.api.OriginalDataResponse
2931
import org.loculus.backend.api.ProcessedData
3032
import org.loculus.backend.api.ProcessingResult
3133
import org.loculus.backend.api.SequenceEntryVersionToEdit
@@ -49,7 +51,10 @@ import org.loculus.backend.service.datauseterms.DataUseTermsPreconditionValidato
4951
import org.loculus.backend.service.groupmanagement.GroupManagementPreconditionValidator
5052
import org.loculus.backend.service.submission.SubmissionDatabaseService
5153
import org.loculus.backend.utils.Accession
54+
import org.loculus.backend.utils.FastaEntry
55+
import org.loculus.backend.utils.FastaWriter
5256
import org.loculus.backend.utils.IteratorStreamer
57+
import org.loculus.backend.utils.TsvWriter
5358
import org.slf4j.MDC
5459
import org.springframework.http.HttpHeaders
5560
import org.springframework.http.HttpStatus
@@ -74,6 +79,8 @@ import io.swagger.v3.oas.annotations.parameters.RequestBody as SwaggerRequestBod
7479

7580
private val log = KotlinLogging.logger { }
7681

82+
const val MAX_ORIGINAL_DATA_DOWNLOAD_ENTRIES = 500
83+
7784
@RestController
7885
@RequestMapping("/{organism}")
7986
@Validated
@@ -436,6 +443,7 @@ open class SubmissionController(
436443
organism,
437444
groupIdsFilter?.takeIf { it.isNotEmpty() },
438445
statusesFilter?.takeIf { it.isNotEmpty() },
446+
null,
439447
)
440448
headers.add(X_TOTAL_RECORDS, totalRecords.toString())
441449
// TODO(https://github.com/loculus-project/loculus/issues/2778)
@@ -451,12 +459,145 @@ open class SubmissionController(
451459
groupIdsFilter?.takeIf { it.isNotEmpty() },
452460
statusesFilter?.takeIf { it.isNotEmpty() },
453461
fields?.takeIf { it.isNotEmpty() },
462+
null,
454463
)
455464
}
456465

457466
return ResponseEntity(streamBody, headers, HttpStatus.OK)
458467
}
459468

469+
@Operation(
470+
description = "Download original data (metadata and sequences) as a zip file, suitable for revisions. " +
471+
"It is limited to $MAX_ORIGINAL_DATA_DOWNLOAD_ENTRIES entries.",
472+
)
473+
@ResponseStatus(HttpStatus.OK)
474+
@PostMapping(
475+
"/get-original-data",
476+
consumes = [MediaType.APPLICATION_JSON_VALUE],
477+
produces = ["application/zip"],
478+
)
479+
fun getOriginalData(
480+
@PathVariable @Valid organism: Organism,
481+
@HiddenParam authenticatedUser: AuthenticatedUser,
482+
@RequestBody body: GetOriginalDataRequest,
483+
): ResponseEntity<StreamingResponseBody> {
484+
val entryCount = transaction {
485+
submissionDatabaseService.countOriginalData(
486+
authenticatedUser,
487+
organism,
488+
body.groupId,
489+
body.accessionsFilter,
490+
)
491+
}
492+
493+
if (entryCount > MAX_ORIGINAL_DATA_DOWNLOAD_ENTRIES) {
494+
throw UnprocessableEntityException(
495+
"Download is limited to $MAX_ORIGINAL_DATA_DOWNLOAD_ENTRIES entries. " +
496+
"Requested download would include $entryCount entries. " +
497+
"Please filter fewer sequences.",
498+
)
499+
}
500+
501+
val headers = HttpHeaders()
502+
headers.contentType = MediaType.parseMediaType("application/zip")
503+
headers.set(
504+
HttpHeaders.CONTENT_DISPOSITION,
505+
"attachment; filename=\"${organism.name}_original_data.zip\"",
506+
)
507+
508+
val instanceConfig = backendConfig.getInstanceConfig(organism)
509+
val hasConsensusSequences = instanceConfig.schema.submissionDataTypes.consensusSequences
510+
val isMultiSegmented = instanceConfig.referenceGenome.nucleotideSequences.size > 1
511+
512+
val streamBody = StreamingResponseBody { responseBodyStream ->
513+
val startTime = System.currentTimeMillis()
514+
MDC.put(REQUEST_ID_MDC_KEY, requestIdContext.requestId)
515+
MDC.put(ORGANISM_MDC_KEY, organism.name)
516+
517+
try {
518+
java.util.zip.ZipOutputStream(responseBodyStream).use { zipOut ->
519+
transaction {
520+
val data = submissionDatabaseService.streamOriginalData(
521+
authenticatedUser,
522+
organism,
523+
body.groupId,
524+
body.accessionsFilter,
525+
).toList()
526+
527+
zipOut.putNextEntry(java.util.zip.ZipEntry("metadata.tsv"))
528+
writeMetadataTsv(data, zipOut, isMultiSegmented)
529+
zipOut.closeEntry()
530+
531+
if (hasConsensusSequences) {
532+
zipOut.putNextEntry(java.util.zip.ZipEntry("sequences.fasta"))
533+
writeSequencesFasta(data, zipOut, isMultiSegmented)
534+
zipOut.closeEntry()
535+
}
536+
}
537+
}
538+
} catch (e: Exception) {
539+
val duration = System.currentTimeMillis() - startTime
540+
log.error(e) { "[get-original-data] Error after ${duration}ms: $e" }
541+
throw e
542+
}
543+
544+
val duration = System.currentTimeMillis() - startTime
545+
log.info { "[get-original-data] Completed in ${duration}ms" }
546+
547+
MDC.remove(REQUEST_ID_MDC_KEY)
548+
MDC.remove(ORGANISM_MDC_KEY)
549+
}
550+
551+
return ResponseEntity(streamBody, headers, HttpStatus.OK)
552+
}
553+
554+
private fun writeMetadataTsv(
555+
data: List<OriginalDataResponse>,
556+
outputStream: java.io.OutputStream,
557+
isMultiSegmented: Boolean,
558+
) {
559+
val metadataKeys = data.flatMap { it.originalData.metadata.keys }.toSet().sorted()
560+
val headers = if (isMultiSegmented) {
561+
listOf("id", "accession", "fastaIds") + metadataKeys
562+
} else {
563+
listOf("id", "accession") + metadataKeys
564+
}
565+
566+
TsvWriter(outputStream, headers).use { writer ->
567+
for (entry in data) {
568+
val id = "${entry.accession}.${entry.version}"
569+
val metadataValues = metadataKeys.map { entry.originalData.metadata[it] ?: "" }
570+
val row = if (isMultiSegmented) {
571+
val fastaIds = entry.originalData.unalignedNucleotideSequences.keys
572+
.map { originalFastaId -> "$id|$originalFastaId" }
573+
.joinToString(" ")
574+
listOf(id, entry.accession, fastaIds) + metadataValues
575+
} else {
576+
listOf(id, entry.accession) + metadataValues
577+
}
578+
writer.writeRow(row)
579+
}
580+
}
581+
}
582+
583+
private fun writeSequencesFasta(
584+
data: List<OriginalDataResponse>,
585+
outputStream: java.io.OutputStream,
586+
isMultiSegmented: Boolean,
587+
) {
588+
FastaWriter(outputStream).use { writer ->
589+
for (entry in data) {
590+
val id = "${entry.accession}.${entry.version}"
591+
for ((originalFastaId, sequence) in entry.originalData.unalignedNucleotideSequences) {
592+
if (sequence != null) {
593+
val header = if (isMultiSegmented) "$id|$originalFastaId" else id
594+
writer.write(FastaEntry(header, sequence))
595+
}
596+
}
597+
}
598+
}
599+
}
600+
460601
@Operation(description = APPROVE_PROCESSED_DATA_DESCRIPTION)
461602
@ResponseStatus(HttpStatus.OK)
462603
@PostMapping("/approve-processed-data", consumes = [MediaType.APPLICATION_JSON_VALUE])

backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import org.jetbrains.exposed.sql.JoinType
1313
import org.jetbrains.exposed.sql.LongColumnType
1414
import org.jetbrains.exposed.sql.Op
1515
import org.jetbrains.exposed.sql.SortOrder
16+
import org.jetbrains.exposed.sql.SqlExpressionBuilder.eq
17+
import org.jetbrains.exposed.sql.SqlExpressionBuilder.inList
1618
import org.jetbrains.exposed.sql.SqlExpressionBuilder.less
1719
import org.jetbrains.exposed.sql.SqlExpressionBuilder.plus
1820
import org.jetbrains.exposed.sql.Transaction
@@ -52,6 +54,7 @@ import org.loculus.backend.api.FileIdAndNameAndReadUrl
5254
import org.loculus.backend.api.GeneticSequence
5355
import org.loculus.backend.api.GetSequenceResponse
5456
import org.loculus.backend.api.Organism
57+
import org.loculus.backend.api.OriginalDataResponse
5558
import org.loculus.backend.api.OriginalDataWithFileUrls
5659
import org.loculus.backend.api.PreprocessingStatus.IN_PROCESSING
5760
import org.loculus.backend.api.PreprocessingStatus.PROCESSED
@@ -72,6 +75,7 @@ import org.loculus.backend.api.getFileId
7275
import org.loculus.backend.auth.AuthenticatedUser
7376
import org.loculus.backend.config.BackendSpringProperty
7477
import org.loculus.backend.controller.BadRequestException
78+
import org.loculus.backend.controller.ForbiddenException
7579
import org.loculus.backend.controller.ProcessingValidationException
7680
import org.loculus.backend.controller.UnprocessableEntityException
7781
import org.loculus.backend.log.AuditLogger
@@ -1182,6 +1186,7 @@ class SubmissionDatabaseService(
11821186
organism: Organism,
11831187
groupIdsFilter: List<Int>?,
11841188
statusesFilter: List<Status>?,
1189+
accessionVersionsFilter: List<AccessionVersion>?,
11851190
): Op<Boolean> {
11861191
val organismCondition = SequenceEntriesView.organismIs(organism)
11871192
val groupCondition = getGroupCondition(groupIdsFilter, authenticatedUser)
@@ -1190,7 +1195,12 @@ class SubmissionDatabaseService(
11901195
} else {
11911196
Op.TRUE
11921197
}
1193-
val conditions = organismCondition and groupCondition and statusCondition
1198+
val accessionVersionCondition = if (accessionVersionsFilter != null) {
1199+
SequenceEntriesView.accessionVersionIsIn(accessionVersionsFilter)
1200+
} else {
1201+
Op.TRUE
1202+
}
1203+
val conditions = organismCondition and groupCondition and statusCondition and accessionVersionCondition
11941204

11951205
return conditions
11961206
}
@@ -1200,6 +1210,7 @@ class SubmissionDatabaseService(
12001210
organism: Organism,
12011211
groupIdsFilter: List<Int>?,
12021212
statusesFilter: List<Status>?,
1213+
accessionVersionsFilter: List<AccessionVersion>?,
12031214
): Long = SequenceEntriesView
12041215
.selectAll()
12051216
.where(
@@ -1208,6 +1219,7 @@ class SubmissionDatabaseService(
12081219
organism,
12091220
groupIdsFilter,
12101221
statusesFilter,
1222+
accessionVersionsFilter,
12111223
),
12121224
)
12131225
.count()
@@ -1218,6 +1230,7 @@ class SubmissionDatabaseService(
12181230
groupIdsFilter: List<Int>?,
12191231
statusesFilter: List<Status>?,
12201232
fields: List<String>?,
1233+
accessionVersionsFilter: List<AccessionVersion>?,
12211234
): Sequence<AccessionVersionOriginalMetadata> {
12221235
val originalMetadata = SequenceEntriesView.originalDataColumn
12231236
// It's actually <Map<String, String>?> but exposed does not support nullable types here
@@ -1238,6 +1251,7 @@ class SubmissionDatabaseService(
12381251
organism,
12391252
groupIdsFilter,
12401253
statusesFilter,
1254+
accessionVersionsFilter,
12411255
),
12421256
)
12431257
.fetchSize(streamBatchSize)
@@ -1258,6 +1272,67 @@ class SubmissionDatabaseService(
12581272
}
12591273
}
12601274

1275+
private fun originalDataConditions(
1276+
organism: Organism,
1277+
groupId: Int,
1278+
accessionsFilter: List<String>?,
1279+
): Op<Boolean> {
1280+
val accessionsCondition = if (!accessionsFilter.isNullOrEmpty()) {
1281+
SequenceEntriesView.accessionColumn inList accessionsFilter
1282+
} else {
1283+
Op.TRUE
1284+
}
1285+
1286+
return SequenceEntriesView.organismIs(organism) and
1287+
(SequenceEntriesView.groupIdColumn eq groupId) and
1288+
SequenceEntriesView.statusIs(APPROVED_FOR_RELEASE) and
1289+
SequenceEntriesView.isMaxVersion and
1290+
(SequenceEntriesView.isRevocationColumn eq false) and
1291+
accessionsCondition
1292+
}
1293+
1294+
fun countOriginalData(
1295+
authenticatedUser: AuthenticatedUser,
1296+
organism: Organism,
1297+
groupId: Int,
1298+
accessionsFilter: List<String>?,
1299+
): Long {
1300+
groupManagementPreconditionValidator.validateUserIsAllowedToModifyGroup(groupId, authenticatedUser)
1301+
return SequenceEntriesView
1302+
.select(SequenceEntriesView.accessionColumn)
1303+
.where(originalDataConditions(organism, groupId, accessionsFilter))
1304+
.count()
1305+
}
1306+
1307+
fun streamOriginalData(
1308+
authenticatedUser: AuthenticatedUser,
1309+
organism: Organism,
1310+
groupId: Int,
1311+
accessionsFilter: List<String>?,
1312+
): Sequence<OriginalDataResponse> {
1313+
groupManagementPreconditionValidator.validateUserIsAllowedToModifyGroup(groupId, authenticatedUser)
1314+
return SequenceEntriesView
1315+
.select(
1316+
SequenceEntriesView.accessionColumn,
1317+
SequenceEntriesView.versionColumn,
1318+
SequenceEntriesView.originalDataColumn,
1319+
)
1320+
.where(originalDataConditions(organism, groupId, accessionsFilter))
1321+
.fetchSize(streamBatchSize)
1322+
.asSequence()
1323+
.map {
1324+
val compressedOriginalData = it[SequenceEntriesView.originalDataColumn]!!
1325+
val decompressedOriginalData = compressionService.decompressSequencesInOriginalData(
1326+
compressedOriginalData,
1327+
)
1328+
OriginalDataResponse(
1329+
it[SequenceEntriesView.accessionColumn],
1330+
it[SequenceEntriesView.versionColumn],
1331+
decompressedOriginalData,
1332+
)
1333+
}
1334+
}
1335+
12611336
fun cleanUpStaleSequencesInProcessing(timeToStaleInSeconds: Long) {
12621337
val staleDateTime = dateProvider.getCurrentInstant()
12631338
.minus(timeToStaleInSeconds, DateTimeUnit.SECOND, DateProvider.timeZone)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package org.loculus.backend.utils
2+
3+
import java.io.OutputStream
4+
5+
class FastaWriter(outputStream: OutputStream) : AutoCloseable {
6+
private val writer = outputStream.bufferedWriter()
7+
8+
fun write(entry: FastaEntry) {
9+
writer.write(">")
10+
writer.write(entry.fastaId)
11+
writer.newLine()
12+
writer.write(entry.sequence)
13+
writer.newLine()
14+
}
15+
16+
fun writeAll(entries: Iterable<FastaEntry>) {
17+
for (entry in entries) {
18+
write(entry)
19+
}
20+
}
21+
22+
override fun close() {
23+
writer.flush()
24+
}
25+
}

0 commit comments

Comments
 (0)