Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`

- [#249](https://github.com/nf-core/phaseimpute/pull/249) - Add new `publish_all` hidden parameter to help during debugging. Add csv content checking in nf-test.
- [#248](https://github.com/nf-core/phaseimpute/pull/248) - Add chromosomes concatenation to `BAM_GL_BCFTOOLS`

### `Changed`

Expand All @@ -20,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#240](https://github.com/nf-core/phaseimpute/pull/240) - Move from local to nf-core sbwf for `GLIMPSE2` imputation. Update `usage.md`.
- [#247](https://github.com/nf-core/phaseimpute/pull/243) - Move from local to nf-core sbwf for `BEAGLE5` imputation. Update `usage.md`.
- [#249](https://github.com/nf-core/phaseimpute/pull/249) - Move `test_all`, `test_sim`, `test_panelprep`, `test_validate` to independant nf-test with md5sum assertion.
- [#248](https://github.com/nf-core/phaseimpute/pull/248) - Move from local to nf-core sbwf for `GLIMPSE1` imputation. Set output channels to `vcf_index`. Update `usage.md`.

### `Fixed`

Expand Down
45 changes: 27 additions & 18 deletions conf/steps/imputation_glimpse1.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@ process {

// Call the variants before imputation
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_GLIMPSE1:.*' {
publishDir = [ enabled: false ]
publishDir = [
path: { "${params.outdir}/imputation/glimpse1/variant_calling" },
mode: params.publish_dir_mode,
enabled: params.publish_all,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
tag = {"${meta.id} ${meta.chr}"}
}

Expand All @@ -28,10 +33,13 @@ process {

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_GLIMPSE1:BCFTOOLS_MERGE' {
cache = "lenient"
ext.args = [
"--write-index=tbi",
].join(' ')
ext.prefix = { "${meta.id}.merge" }
ext.args = "--write-index=tbi -Oz"
ext.prefix = { "${meta.id}_${meta.chr}.merge" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_GLIMPSE1:VCF_CONCATENATE_BCFTOOLS:BCFTOOLS_CONCAT' {
ext.args = "--write-index=tbi -Oz"
ext.prefix = { "${meta.id}.concat" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_GLIMPSE1:BCFTOOLS_ANNOTATE' {
Expand All @@ -40,32 +48,33 @@ process {
}

// Impute the variants
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:.*' {
publishDir = [ enabled: false ]
tag = {"${meta.id} ${meta.chr}"}
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE:.*' {
publishDir = [
path: { "${params.outdir}/imputation/glimpse1/impute" },
mode: params.publish_dir_mode,
enabled: params.publish_all,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
tag = {"${meta.id} ${meta.regionout ?: meta.chr}"}
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_PHASE' {
ext.args = ["--impute-reference-only-variants"].join(' ')
ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chunk.replace(':','_')}.glimpse1" }
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE:GLIMPSE_PHASE' {
ext.args = { "--impute-reference-only-variants --seed ${params.seed}" }
ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.glimpse1" }
ext.suffix = "bcf"
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_1' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE:BCFTOOLS_INDEX_PHASE' {
ext.args = "--csi"
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:GLIMPSE_LIGATE' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE:GLIMPSE_LIGATE' {
ext.args = { "--seed ${params.seed}" }
ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chr}.ligate.glimpse1" }
publishDir = [ enabled: false ]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE1:BCFTOOLS_INDEX_2' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:VCF_IMPUTE_GLIMPSE:BCFTOOLS_INDEX_LIGATE' {
ext.args = "--tbi"
publishDir = [ enabled: false ]
}

// Concatenate the imputed chunks
Expand Down
20 changes: 7 additions & 13 deletions conf/steps/validation.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,17 @@ process {
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_MERGE' {
ext.args = [
"--write-index=tbi",
].join(' ')
ext.prefix = { "${meta.id}" }
ext.args = "--write-index=tbi -Oz"
ext.prefix = { "${meta.id}_${meta.chr}" }
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_ANNOTATE' {
ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz", "--write-index=tbi"].join(' ')
ext.prefix = { "${meta.id}_${meta.chr}.annotate" }
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:VCF_CONCATENATE_BCFTOOLS:BCFTOOLS_CONCAT' {
ext.args = "--write-index=tbi --output-type z --ligate"
ext.prefix = { "${meta.id}.batch${meta.batch}.concat" }
}

// Concatenate the truth set
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:.*' {
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:GL_TRUTH:BCFTOOLS_ANNOTATE' {
ext.args = ["--set-id '%CHROM:%POS:%REF:%ALT'", "-Oz", "--write-index=tbi"].join(' ')
ext.prefix = { "${meta.id}.batch${meta.batch}.truth" }
publishDir = [
path: { "${params.outdir}/validation/concat" },
Expand All @@ -49,10 +47,6 @@ process {
]
}

withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_TRUTH:BCFTOOLS_CONCAT' {
ext.args = ["--ligate", "--output-type z", "--write-index=tbi"].join(' ')
}

// Compute sample files for renaming
withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BCFTOOLS_QUERY_TRUTH' {
tag = { "${meta.id} Batch ${meta.batch} ${meta.tools}" }
Expand Down
9 changes: 8 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ Optionnaly you can provide the following flags:

| | `--steps impute`(m) | `--input`(m) | `--genome` or `--fasta`(m) | `--panel`(m) | `--posfile`(m) | `--map`(o) | `--chunks`(o) |
| ---------- | ------------------- | ------------ | -------------------------- | ------------ | -------------- | ---------- | ------------- |
| `GLIMPSE1` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ³ | ❌⁶ | ✅ |
| `GLIMPSE1` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ³ | | ✅ |
| `GLIMPSE2` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ |
| `QUILT` | ✅ | ✅ ² | ✅ | ❌ | ✅ ⁴ | ❌⁶ | ✅ |
| `STITCH` | ✅ | ✅ ² | ✅ | ❌ | ✅ ³ | ✅ | ✅ |
Expand Down Expand Up @@ -617,6 +617,13 @@ panel,chr,posfile

The CSV file provided in `--panel` must be prepared with `--steps panelprep` and must contain four columns [panel, chr, vcf, index].

You can optionally provide chunks to parallelize the imputation process using `--chunks`.
If not provided the full region per chromosome will be used.
See [Chunks section](#samplesheet-chunks) for more information.

Genetic map can also be provided for better accuracy.
See [Map section](#samplesheet-map) for more information.

### GLIMPSE2

[GLIMPSE2](https://github.com/odelaneau/GLIMPSE) is a set of tools for phasing and imputation for low-coverage sequencing datasets. This is an example command to run this tool from the `--steps impute`:
Expand Down
14 changes: 10 additions & 4 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
"bam_impute_stitch",
"bam_vcf_impute_glimpse2",
"modules",
"vcf_impute_beagle5"
"vcf_impute_beagle5",
"vcf_impute_glimpse"
]
},
"bcftools/merge": {
Expand Down Expand Up @@ -82,17 +83,17 @@
"glimpse/chunk": {
"branch": "master",
"git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
"installed_by": ["modules"]
"installed_by": ["modules", "vcf_impute_glimpse"]
},
"glimpse/ligate": {
"branch": "master",
"git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
"installed_by": ["modules"]
"installed_by": ["modules", "vcf_impute_glimpse"]
},
"glimpse/phase": {
"branch": "master",
"git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
"installed_by": ["modules"]
"installed_by": ["modules", "vcf_impute_glimpse"]
},
"glimpse2/chunk": {
"branch": "master",
Expand Down Expand Up @@ -250,6 +251,11 @@
"git_sha": "4b406a74dc0449c0401ed87d5bfff4252fd277fd",
"installed_by": ["subworkflows"]
},
"vcf_impute_glimpse": {
"branch": "master",
"git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6",
"installed_by": ["subworkflows"]
},
"vcf_impute_beagle5": {
"branch": "master",
"git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6",
Expand Down
31 changes: 20 additions & 11 deletions subworkflows/local/bam_gl_bcftools/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup'
include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge'
include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate'
include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup'
include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge'
include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate'
include { VCF_CONCATENATE_BCFTOOLS } from '../../../subworkflows/local/vcf_concatenate_bcftools'

workflow BAM_GL_BCFTOOLS {

Expand Down Expand Up @@ -34,7 +35,7 @@ workflow BAM_GL_BCFTOOLS {
.map{ metaIPC, vcf, tbi -> [metaIPC.subMap("panel_id", "chr", "batch"), [metaIPC, vcf, tbi]] }
.groupTuple(sort: { it1, it2 -> it1[0]["id"] <=> it2[0]["id"] }) // Sort by id
.map{ metaPC, filestups -> [
metaPC + [id: "all", metas: filestups.collect{it -> it[0]}],
metaPC + [id: "all_samples", metas: filestups.collect{it -> it[0]}],
filestups.collect{it -> it[1]},
filestups.collect{it -> it[2]},
filestups.collect{it -> it[1]}.size()
Expand All @@ -44,34 +45,42 @@ workflow BAM_GL_BCFTOOLS {
more: it[3] > 1
}

// Merge VCFs
// Merge VCFs all individuals
BCFTOOLS_MERGE(
ch_all_vcf.more.map{it -> [it[0], it[1], it[2], []] },
ch_fasta
)
ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions.first())

// Mix all vcfs
ch_to_annotate = ch_all_vcf.one
ch_to_concat = ch_all_vcf.one
.map{it -> [it[0]["metas"][0], it[1][0], it[2][0]] }
.mix(
BCFTOOLS_MERGE.out.vcf
.join(BCFTOOLS_MERGE.out.tbi)
.join(BCFTOOLS_MERGE.out.tbi.mix(
BCFTOOLS_MERGE.out.csi
))
)

// Merge all chromosomes
VCF_CONCATENATE_BCFTOOLS(ch_to_concat)
ch_versions = ch_versions.mix(VCF_CONCATENATE_BCFTOOLS.out.versions.first())

// Annotate the variants
BCFTOOLS_ANNOTATE(ch_to_annotate
BCFTOOLS_ANNOTATE(VCF_CONCATENATE_BCFTOOLS.out.vcf_index
.combine(channel.of([[], [], [], []]))
)
ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions.first())

// Output
ch_output = BCFTOOLS_ANNOTATE.out.vcf
.join(BCFTOOLS_ANNOTATE.out.tbi)
.map{ metaIPC, vcf, tbi -> [metaIPC + [ variantcaller:'bcftools' ], vcf, tbi] }
.join(BCFTOOLS_ANNOTATE.out.tbi.mix(
BCFTOOLS_ANNOTATE.out.csi
))
.map{ metaIPC, vcf, index -> [metaIPC + [ variantcaller:'bcftools' ], vcf, index] }

emit:
vcf_tbi = ch_output // channel: [ [id, panel, chr], vcf, tbi ]
vcf_index = ch_output // channel: [ [id, panel], vcf, index ]
versions = ch_versions // channel: [ versions.yml ]
multiqc_files = ch_multiqc_files
}
23 changes: 14 additions & 9 deletions subworkflows/local/bam_gl_bcftools/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ nextflow_workflow {
tag "bcftools/mpileup"
tag "bcftools/annotate"

test("Compute genotype likelihood with merging") {
test("Compute genotype likelihood with merging and two chromosomes") {
when {
workflow {
"""
Expand All @@ -34,10 +34,15 @@ nextflow_workflow {
file(params.pipelines_testdata_base_path + "hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true),
],
])
input[1] = channel.of([
[panel_id: "1000GP", chr: "22"],
file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.posfile", checkIfExist:true)
]).collect()
input[1] = channel.of(
[
[panel_id: "1000GP", chr: "22"],
file(params.pipelines_testdata_base_path + "hum_data/panel/chr22/1000GP.chr22.posfile", checkIfExist:true)
],
[
[panel_id: "1000GP", chr: "21"],
file(params.pipelines_testdata_base_path + "hum_data/panel/chr21/1000GP.chr21.posfile", checkIfExist:true)
])
input[2] = channel.of([
[id: "GRCh38"],
file(params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true),
Expand All @@ -52,12 +57,12 @@ nextflow_workflow {
{ assert workflow.success },
{ assert snapshot(
workflow.out.versions,
workflow.out.vcf_tbi.collect{[
workflow.out.vcf_index.collect{[
it[0],
path(it[1]).getFileName().toString(),
path(it[2]).getFileName().toString()
] },
workflow.out.vcf_tbi.collect{
workflow.out.vcf_index.collect{
path(it[1]).vcf.summary
},
workflow.out.versions.collect{ path(it).yaml }
Expand Down Expand Up @@ -95,12 +100,12 @@ nextflow_workflow {
{ assert workflow.success },
{ assert snapshot(
workflow.out.versions,
workflow.out.vcf_tbi.collect{[
workflow.out.vcf_index.collect{[
it[0],
path(it[1]).getFileName().toString(),
path(it[2]).getFileName().toString()
] },
workflow.out.vcf_tbi.collect{
workflow.out.vcf_index.collect{
path(it[1]).vcf.summary
},
workflow.out.versions.collect{ path(it).yaml }
Expand Down
Loading