Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/linting_comment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Download lint results
uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11
uses: dawidd6/action-download-artifact@4c1e823582f43b179e2cbb49c3eade4e41f992e2 # v10
with:
workflow: linting.yml
workflow_conclusion: completed
Expand Down
106 changes: 87 additions & 19 deletions bin/collect_genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,22 @@
import argparse
import filecmp


def file_len(fname):
with open(fname) as f:
for i, l in enumerate(f):
pass
return i + 1


## A function to return the number of genotypes per line in a .geno file.
def file_width(fname):
with open(fname) as f:
for i in f:
return(len(i.strip()))
return len(i.strip())
break


## A function to check that there are no duplicate individual IDs across ind files.
def check_for_duplicate_ids(indf1, indf2):
with open(indf1) as f:
Expand All @@ -26,13 +29,19 @@ def check_for_duplicate_ids(indf1, indf2):
inds2 = [x.strip().split()[0] for x in f.readlines()]
intersection = set(inds1).intersection(inds2)
if len(intersection) > 0:
raise IOError("Input .ind files contain duplicate individual IDs. Duplicates: {}".format(intersection))
raise IOError(
"Input .ind files contain duplicate individual IDs. Duplicates: {}".format(
intersection
)
)


## Function to check that the snp files are identical
def check_snp_files(snpf1, snpf2):
if not filecmp.cmp(snpf1, snpf2):
raise IOError("Input .snp files are not identical.")


## Function to check the consistency of an eigenstrat database
def validate_eigenstrat(genof, snpf, indf):
dimsGeno = [file_len(genof), file_width(genof)]
Expand All @@ -41,40 +50,99 @@ def validate_eigenstrat(genof, snpf, indf):

# print(dimsGeno,linesSnp,linesInd)
##Check geno and snp compatibility
if dimsGeno[0] != linesSnp:
if dimsGeno[0] != linesSnp:
raise IOError("Input .snp and .geno files do not match.")

##Check geno and ind compatibility
if dimsGeno[1] != linesInd:
if dimsGeno[1] != linesInd:
raise IOError("Input .ind and .geno files do not match.")


VERSION = "1.0.0"

parser = argparse.ArgumentParser(usage="%(prog)s [-v] (-g1 <GENO FILE 1 NAME>) (-s1 <SNP FILE 1 NAME>) (-i1 <IND FILE 1 NAME>) (-g2 <GENO FILE 2 NAME>) (-s2 <SNP FILE 2 NAME>) (-i2 <IND FILE 2 NAME>) (-o <OUTPUT FILE PREFIX>)" , description="A tool to put together two EIGENSTRAT datasets of genotyped on the same SNP set into a single dataset.")
parser = argparse.ArgumentParser(
usage="%(prog)s [-v] (-g1 <GENO FILE 1 NAME>) (-s1 <SNP FILE 1 NAME>) (-i1 <IND FILE 1 NAME>) (-g2 <GENO FILE 2 NAME>) (-s2 <SNP FILE 2 NAME>) (-i2 <IND FILE 2 NAME>) (-o <OUTPUT FILE PREFIX>)",
description="A tool to put together two EIGENSTRAT datasets of genotyped on the same SNP set into a single dataset.",
)
parser._optionals.title = "Available options"
parser.add_argument("-g1", "--genoFn1", type = str, metavar = "<GENO FILE 1 NAME>", required = True, help = "The path to the input geno file of the first dataset.")
parser.add_argument("-s1", "--snpFn1", type = str, metavar = "<SNP FILE 1 NAME>", required = True, help = "The path to the input snp file of the first dataset.")
parser.add_argument("-i1", "--indFn1", type = str, metavar = "<IND FILE 1 NAME>", required = True, help = "The path to the input ind file of the first dataset.")
parser.add_argument("-g2", "--genoFn2", type = str, metavar = "<GENO FILE 2 NAME>", required = True, help = "The path to the input geno file of the second dataset.")
parser.add_argument("-s2", "--snpFn2", type = str, metavar = "<SNP FILE 2 NAME>", required = True, help = "The path to the input snp file of the second dataset.")
parser.add_argument("-i2", "--indFn2", type = str, metavar = "<IND FILE 2 NAME>", required = True, help = "The path to the input ind file of the second dataset.")
parser.add_argument("-o", "--output", type = str, metavar = "<OUTPUT FILES PREFIX>", required = True, help = "The desired output file prefix. Three output files are created, <OUTPUT FILES PREFIX>.geno , <OUTPUT FILES PREFIX>.snp and <OUTPUT FILES PREFIX>.ind .")
parser.add_argument("-v", "--version", action='version', version="{}".format(VERSION), help="Print the version and exit.")
parser.add_argument(
"-g1",
"--genoFn1",
type=str,
metavar="<GENO FILE 1 NAME>",
required=True,
help="The path to the input geno file of the first dataset.",
)
parser.add_argument(
"-s1",
"--snpFn1",
type=str,
metavar="<SNP FILE 1 NAME>",
required=True,
help="The path to the input snp file of the first dataset.",
)
parser.add_argument(
"-i1",
"--indFn1",
type=str,
metavar="<IND FILE 1 NAME>",
required=True,
help="The path to the input ind file of the first dataset.",
)
parser.add_argument(
"-g2",
"--genoFn2",
type=str,
metavar="<GENO FILE 2 NAME>",
required=True,
help="The path to the input geno file of the second dataset.",
)
parser.add_argument(
"-s2",
"--snpFn2",
type=str,
metavar="<SNP FILE 2 NAME>",
required=True,
help="The path to the input snp file of the second dataset.",
)
parser.add_argument(
"-i2",
"--indFn2",
type=str,
metavar="<IND FILE 2 NAME>",
required=True,
help="The path to the input ind file of the second dataset.",
)
parser.add_argument(
"-o",
"--output",
type=str,
metavar="<OUTPUT FILES PREFIX>",
required=True,
help="The desired output file prefix. Three output files are created, <OUTPUT FILES PREFIX>.geno , <OUTPUT FILES PREFIX>.snp and <OUTPUT FILES PREFIX>.ind .",
)
parser.add_argument(
"-v",
"--version",
action="version",
version="{}".format(VERSION),
help="Print the version and exit.",
)
args = parser.parse_args()

## Open input files
GenoFile1 = open(args.genoFn1, "r")
SnpFile1 = open(args.snpFn1, "r")
IndFile1 = open(args.indFn1, "r")
SnpFile1 = open(args.snpFn1, "r")
IndFile1 = open(args.indFn1, "r")

GenoFile2 = open(args.genoFn2, "r")
# SnpFile2 = open(args.snpFn2, "r") ## Never actually read in line by line
IndFile2 = open(args.indFn2, "r")
IndFile2 = open(args.indFn2, "r")

## open output files
GenoFileOut = open(args.output + ".geno", "w")
SnpFileOut = open(args.output + ".snp", "w")
IndFileOut = open(args.output + ".ind", "w")
SnpFileOut = open(args.output + ".snp", "w")
IndFileOut = open(args.output + ".ind", "w")

## Perform basic validation on inputs
validate_eigenstrat(args.genoFn1, args.snpFn1, args.indFn1)
Expand All @@ -85,7 +153,7 @@ def validate_eigenstrat(genof, snpf, indf):
## Now actually merge the data
## Geno
for line1, line2 in zip(GenoFile1, GenoFile2):
geno_line="{}{}".format(line1.strip(),line2.strip())
geno_line = "{}{}".format(line1.strip(), line2.strip())
print(geno_line, file=GenoFileOut)

## Snp
Expand Down
16 changes: 12 additions & 4 deletions bin/extract_map_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,24 @@ def _get_args():
action="store_true",
help="specify if bam file was created from merged fastq files",
)
parser.add_argument("-rev", dest="rev", default=None, help="path to reverse fastq file")
parser.add_argument("-of", dest="out_fwd", default=None, help="path to forward output fastq file")
parser.add_argument("-or", dest="out_rev", default=None, help="path to forward output fastq file")
parser.add_argument(
"-rev", dest="rev", default=None, help="path to reverse fastq file"
)
parser.add_argument(
"-of", dest="out_fwd", default=None, help="path to forward output fastq file"
)
parser.add_argument(
"-or", dest="out_rev", default=None, help="path to forward output fastq file"
)
parser.add_argument(
"-m",
dest="mode",
default="remove",
help="Read removal mode: remove reads (remove) or replace sequence by N (replace). Default = remove",
)
parser.add_argument("-t", dest="threads", default=4, help="Number of parallel threads")
parser.add_argument(
"-t", dest="threads", default=4, help="Number of parallel threads"
)

args = parser.parse_args()

Expand Down
4 changes: 3 additions & 1 deletion bin/filter_bam_fragment_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

def get_args():
"""This function parses and return arguments passed in"""
parser = argparse.ArgumentParser(prog="bam_filter", description="Filter bam on fragment length")
parser = argparse.ArgumentParser(
prog="bam_filter", description="Filter bam on fragment length"
)
parser.add_argument("bam", help="Bam aligment file")
parser.add_argument(
"-l",
Expand Down
15 changes: 14 additions & 1 deletion bin/print_x_contamination.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,20 @@ def make_float(x):
"Method2_ML_estimate": ml2,
"Method2_ML_SE": err_ml2,
}
print(Ind, nSNPs, mom1, err_mom1, ml1, err_ml1, mom2, err_mom2, ml2, err_ml2, sep="\t", file=output)
print(
Ind,
nSNPs,
mom1,
err_mom1,
ml1,
err_ml1,
mom2,
err_mom2,
ml2,
err_ml2,
sep="\t",
file=output,
)


jsonOut = {
Expand Down
6 changes: 6 additions & 0 deletions conf/test_long.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,14 @@ process {
memory: '15.GB',
time: '3.h',
]

withName: MAPDAMAGE2 {
time = { 3.h * task.attempt }
memory = { 15.GB * task.attempt }
}
}


includeConfig 'test_minimal.config'

params {
Expand Down
2 changes: 1 addition & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@
},
"mapdamage2": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
"installed_by": ["modules"]
},
"megan/rma2info": {
Expand Down
4 changes: 2 additions & 2 deletions modules/nf-core/mapdamage2/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions modules/nf-core/mapdamage2/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading