Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions conf/test_shortdna.config
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ params {
fasta = params.pipelines_testdata_base_path + 'eager/reference/Mammoth/Mammoth_MT_Krause.fasta'

// Mapping
// TO DO: Change when mapAD is there.
mapping_tool = 'mapad'
mapping_tool = 'mapad'

// Metagenomics
run_metagenomics = true
Expand Down
141 changes: 141 additions & 0 deletions tests/shortdna.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
nextflow_pipeline {

name "Test pipeline: NFCORE_EAGER"
script "main.nf"
tag "pipeline"
tag "nfcore_eager"
tag "test_shortdna"

test("test_shortdna_profile") {

when {
params {
outdir = "$outputDir"
}
}

then {

///////////////////
// DOCUMENTATION //
///////////////////

// The contents of each top level results directory should be tested with individually named snapshots.
// Within each snapshot, there should be two to three distinct variables, that contain the files to be tested.
// - stable_name_<dir> is for files with variable md5sums (i.e. content) so only names will be compared
// - stable_content_<dir> is for files with stable md5sums (i.e. content) so md5sums will be compared
// - bams_<dir> is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable)
// If a directory is fully stable, you can drop `stable_name_*`
// If a directory contains no BAMs, you can drop `bams_*`

// Generate with: nf-test test --tag test --profile docker,test --update-snapshot
// Test with: nf-test test --tag test --profile docker,test
// NOTE: BAMs are always only stable in name, because:
// a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112)
// b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order)
// point b) also causes BAIs to be unstable.
// c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes)

//////////////////////
// DEFINE VARIABLES //
//////////////////////

// Define exclusion patterns for files with unstable contents
// NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here
// This is particularly important if the patterns excluded in the stable content section should be included in the stable name section
def unstable_patterns_auth = [
'**/mapped_reads_gc-content_distribution.txt',
'**/mapped_reads_nucleotide_content.txt',
'**/genome_gc_content_per_window.png',
'**/*.{svg,pdf,html,png,json}',
'**/DamageProfiler.log',
'**/{3,5}p_freq_misincorporations.txt',
'**/DNA_comp_genome.txt',
'**/DNA_composition_sample.txt',
'**/misincorporation.txt',
'**/genome_results.txt',
'**/3pGtoA_freq.txt',
'**/5pCtoT_freq.txt',
'**/lgdistribution.txt',
'**/*c_curve.txt',
"**/coverage_across_reference.txt",
"**/coverage_histogram.txt",
"**/duplication_rate_histogram.txt",
"**/genome_fraction_coverage.txt",
"**/mapping_quality_across_reference.txt",
"**/mapping_quality_histogram.txt",
]

// Check that no files are missing/added
// Command legend: Result directory to index , includeDir: include dirs?, ignore: exclude patterns , ignoreFile: exclude pattern list , include: include patterns
def stable_name_all = getAllFilesFromDir("$outputDir/" , includeDir: false , ignore: ['pipeline_info/*'] , ignoreFile: null , include: ['*', '**/*'] )

// Authentication
def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] )
def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth)

// Deduplication
// NOTE: even the flagstats are unstable, so we only check the names
def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*'] )

// Final_bams
// NOTE: BAMs are unstable, since upstream BAMs are unstable.
def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*'] )

// Mapping (incl. bam_input flasgstat)
def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )

// Preprocessing
// NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing.
def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'], ignoreFile: null , include: ['**/*'] )
def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] )

// Genotyping
// NOTE: single file is created, with unstable content due to unstable BAM input.
def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.glf.gz'] )

// Metagenomics
def unstable_patterns_meta = [ // contain the creation date
'**/*.{biom,log,megan,rma6}',
'**/*table.tsv',
'**/log.txt'
]

def stable_content_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: unstable_patterns_meta , ignoreFile: null , include: ['**/*'] )
def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_meta )

// MultiQC
def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] )

///////////////////////
// DEFINE ASSERTIONS //
///////////////////////

assertAll(
{ assert workflow.success },
// This checks that there are no missing or additional output files.
// Also a good starting point to look at all the files in the output folder than need to be checked in subsequent sections.
{ assert snapshot( stable_name_all*.name ).match("all_files") },

// Checking changes to contents of each section
// NOTE: Keep the order of the sections in the alphanumeric order of the output directories.
// Each section should first check stable_content, stable_name second (if applicable).
{ assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") },
{ assert snapshot( stable_name_deduplication*.name ).match("deduplication") },
{ assert snapshot( stable_name_final_bams*.name ).match("final_bams") },
// NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
{ assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
{ assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") },
// { assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") },
{ assert snapshot( stable_name_genotyping*.name ).match("genotyping") },
{ assert snapshot( stable_content_metagenomics , stable_name_metagenomics*.name ).match("metagenomics") },
{ assert snapshot( stable_name_multiqc*.name ).match("multiqc") },

// Versions
{ assert new File("$outputDir/pipeline_info/nf_core_eager_software_mqc_versions.yml").exists() },

)
}
}
}
Loading