-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathwf_theiacov_illumina_se.wdl
More file actions
213 lines (210 loc) · 8.59 KB
/
wf_theiacov_illumina_se.wdl
File metadata and controls
213 lines (210 loc) · 8.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
version 1.0
import "wf_read_QC_trim_se.wdl" as read_qc
import "../tasks/task_alignment.wdl" as align
import "../tasks/task_consensus_call.wdl" as consensus_call
import "../tasks/quality_control/task_assembly_metrics.wdl" as assembly_metrics
import "../tasks/task_taxonID.wdl" as taxon_ID
import "../tasks/task_ncbi.wdl" as ncbi
import "../tasks/task_versioning.wdl" as versioning
import "../tasks/quality_control/task_consensus_qc.wdl" as consensus_qc_task
import "../tasks/task_sc2_gene_coverage.wdl" as sc2_calculation
workflow theiacov_illumina_se {
meta {
description: "Reference-based consensus calling for viral amplicon sequencing data"
}
input {
String samplename
String seq_method = "ILLUMINA"
File read1_raw
File? primer_bed
String nextclade_dataset_reference = "MN908947"
String nextclade_dataset_tag = "2023-02-25T12:00:00Z"
String? nextclade_dataset_name
File? reference_genome
Int min_depth = 100
String organism = "sars-cov-2"
Boolean trim_primers = true
File? adapters
File? phix
}
call read_qc.read_QC_trim {
input:
samplename = samplename,
read1_raw = read1_raw,
adapters = adapters,
phix = phix
}
call align.bwa {
input:
samplename = samplename,
read1 = read_QC_trim.read1_clean,
reference_genome = reference_genome
}
if (trim_primers){
call consensus_call.primer_trim {
input:
samplename = samplename,
primer_bed = select_first([primer_bed]),
bamfile = bwa.sorted_bam
}
call assembly_metrics.stats_n_coverage as stats_n_coverage_primtrim {
input:
samplename = samplename,
bamfile = primer_trim.trim_sorted_bam
}
}
call consensus_call.variant_call {
input:
samplename = samplename,
bamfile = select_first([primer_trim.trim_sorted_bam,bwa.sorted_bam]),
reference_genome = reference_genome,
variant_min_depth = min_depth
}
call consensus_call.consensus {
input:
samplename = samplename,
bamfile = select_first([primer_trim.trim_sorted_bam,bwa.sorted_bam]),
reference_genome = reference_genome,
consensus_min_depth = min_depth
}
call consensus_qc_task.consensus_qc {
input:
assembly_fasta = consensus.consensus_seq,
reference_genome = reference_genome
}
call assembly_metrics.stats_n_coverage {
input:
samplename = samplename,
bamfile = bwa.sorted_bam
}
if (organism == "sars-cov-2") {
# sars-cov-2 specific tasks
call taxon_ID.pangolin4 {
input:
samplename = samplename,
fasta = consensus.consensus_seq
}
call sc2_calculation.sc2_gene_coverage {
input:
samplename = samplename,
bamfile = bwa.sorted_bam,
min_depth = min_depth
}
}
if (organism == "MPXV") {
# MPXV specific tasks
}
if (organism == "WNV") {
# WNV specific tasks (none yet, just adding as placeholder for future)
}
if (organism == "MPXV" || organism == "sars-cov-2"){
# tasks specific to either MPXV or sars-cov-2
call taxon_ID.nextclade_one_sample {
input:
genome_fasta = consensus.consensus_seq,
dataset_name = select_first([nextclade_dataset_name, organism,]),
dataset_reference = nextclade_dataset_reference,
dataset_tag = nextclade_dataset_tag
}
call taxon_ID.nextclade_output_parser_one_sample {
input:
nextclade_tsv = nextclade_one_sample.nextclade_tsv,
organism = organism
}
}
if (organism == "MPXV" || organism == "sars-cov-2" || organism == "WNV"){
# tasks specific to MPXV, sars-cov-2, and WNV
call ncbi.vadr {
input:
genome_fasta = consensus.consensus_seq,
assembly_length_unambiguous = consensus_qc.number_ATCG
}
}
call versioning.version_capture{
input:
}
output {
# Version Capture
String theiacov_illumina_se_version = version_capture.phvg_version
String theiacov_illumina_se_analysis_date = version_capture.date
# Read Metadata
String seq_platform = seq_method
# Read QC
File read1_clean = read_QC_trim.read1_clean
Int num_reads_raw = read_QC_trim.fastq_scan_number_reads
String fastq_scan_version = read_QC_trim.fastq_scan_version
Int num_reads_clean = read_QC_trim.fastq_scan_clean_number_reads
String trimmomatic_version = read_QC_trim.trimmomatic_version
String bbduk_docker = read_QC_trim.bbduk_docker
Float kraken_human = read_QC_trim.kraken_human
Float kraken_sc2 = read_QC_trim.kraken_sc2
String? kraken_target_org = read_QC_trim.kraken_target_org
String? kraken_target_org_name = read_QC_trim.kraken_target_org_name
String kraken_version = read_QC_trim.kraken_version
File kraken_report = read_QC_trim.kraken_report
# Float kraken_human_dehosted = read_QC_trim.kraken_human_dehosted
# Float kraken_sc2_dehosted = read_QC_trim.kraken_sc2_dehosted
# String kraken_report_dehosted = read_QC_trim.kraken_report_dehosted
# Read Alignment
String bwa_version = bwa.bwa_version
String samtools_version = bwa.sam_version
File read1_aligned = bwa.read1_aligned
String assembly_method = "TheiaCoV (~{version_capture.phvg_version}): ~{bwa.bwa_version}; ~{primer_trim.ivar_version}"
File aligned_bam = select_first([primer_trim.trim_sorted_bam,bwa.sorted_bam])
File aligned_bai =select_first([primer_trim.trim_sorted_bai,bwa.sorted_bai])
Float? primer_trimmed_read_percent = primer_trim.primer_trimmed_read_percent
String? ivar_version_primtrim = primer_trim.ivar_version
String? samtools_version_primtrim = primer_trim.samtools_version
String? primer_bed_name = primer_trim.primer_bed_name
File ivar_tsv = variant_call.sample_variants_tsv
File ivar_vcf = variant_call.sample_variants_vcf
String? ivar_variant_proportion_intermediate = variant_call.variant_proportion_intermediate
String ivar_variant_version = variant_call.ivar_version
# Assembly QC
File assembly_fasta = consensus.consensus_seq
String ivar_version_consensus = consensus.ivar_version
String samtools_version_consensus = consensus.samtools_version
Int number_N = consensus_qc.number_N
Int assembly_length_unambiguous = consensus_qc.number_ATCG
Int number_Degenerate = consensus_qc.number_Degenerate
Int number_Total = consensus_qc.number_Total
Float percent_reference_coverage = consensus_qc.percent_reference_coverage
Int consensus_n_variant_min_depth = min_depth
# Alignment QC
File consensus_stats = stats_n_coverage.stats
File consensus_flagstat = stats_n_coverage.flagstat
Float meanbaseq_trim = select_first([stats_n_coverage_primtrim.meanbaseq, stats_n_coverage.meanbaseq])
Float meanmapq_trim = select_first([stats_n_coverage_primtrim.meanmapq, stats_n_coverage.meanmapq])
Float assembly_mean_coverage = select_first([stats_n_coverage_primtrim.depth, stats_n_coverage.depth])
String samtools_version_stats = stats_n_coverage.samtools_version
# SC2 specific
Float? sc2_s_gene_mean_coverage = sc2_gene_coverage.sc2_s_gene_depth
Float? sc2_s_gene_percent_coverage = sc2_gene_coverage.sc2_s_gene_percent_coverage
File? sc2_all_genes_percent_coverage = sc2_gene_coverage.sc2_all_genes_percent_coverage
# Lineage Assignment
String? pango_lineage = pangolin4.pangolin_lineage
String? pango_lineage_expanded = pangolin4.pangolin_lineage_expanded
String? pangolin_conflicts = pangolin4.pangolin_conflicts
String? pangolin_notes = pangolin4.pangolin_notes
String? pangolin_assignment_version = pangolin4.pangolin_assignment_version
File? pango_lineage_report = pangolin4.pango_lineage_report
String? pangolin_docker = pangolin4.pangolin_docker
String? pangolin_versions = pangolin4.pangolin_versions
# Clade Assigment
File? nextclade_json = nextclade_one_sample.nextclade_json
File? auspice_json = nextclade_one_sample.auspice_json
File? nextclade_tsv = nextclade_one_sample.nextclade_tsv
String? nextclade_version = nextclade_one_sample.nextclade_version
String? nextclade_docker = nextclade_one_sample.nextclade_docker
String nextclade_ds_tag = nextclade_dataset_tag
String? nextclade_aa_subs = nextclade_output_parser_one_sample.nextclade_aa_subs
String? nextclade_aa_dels = nextclade_output_parser_one_sample.nextclade_aa_dels
String? nextclade_clade = nextclade_output_parser_one_sample.nextclade_clade
String? nextclade_lineage = nextclade_output_parser_one_sample.nextclade_lineage
# VADR Annotation QC
File? vadr_alerts_list = vadr.alerts_list
String? vadr_num_alerts = vadr.num_alerts
String? vadr_docker = vadr.vadr_docker
File? vadr_fastas_zip_archive = vadr.vadr_fastas_zip_archive
}
}