diff --git a/.dockstore.yml b/.dockstore.yml index 117ee38..31d15d3 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -4,7 +4,7 @@ workflows: primaryDescriptorPath: /main.nf readMePath: /README.md authors: - - name: Mandy Sullivan + - name: Amanda Sullivan email: xpa3@cdc.gov affiliation: CDC/NCIRD/ID orcid: '0000-0003-2007-4210' diff --git a/Dockerfile b/Dockerfile deleted file mode 100755 index bf89893..0000000 --- a/Dockerfile +++ /dev/null @@ -1,110 +0,0 @@ -# Create an argument to pull a particular version of an image -ARG python_image -ARG python_image=${python_image:-mira-nf:pyarrow-alpine} - -#################################################################################################### -# BASE IMAGE -#################################################################################################### -FROM ${python_image} as base - -# Create environment variable to get base python version -ARG python_version -ENV python_version=${python_version:-python3.10} - -# Required certs for apk update -COPY ca.crt /root/ca.crt - -# Put certs in /etc/ssl/certs location -RUN cat /root/ca.crt >> /etc/ssl/certs/ca-certificates.crt - -# Install system libraries of general use -RUN apk update && apk add --no-cache \ - bash \ - vim \ - tar \ - dos2unix \ - && pip install --upgrade pip - -# Create working directory variable -ENV PROJECT_DIR=/mira-nf - -# Copy all scripts to docker images -COPY . ${PROJECT_DIR} - -############# Install python packages ################## - -# Copy all files to docker images -COPY docker_files/requirements.txt ${PROJECT_DIR}/requirements.txt - -# Install python requirements -RUN pip install --no-cache-dir -r ${PROJECT_DIR}/requirements.txt - -############# Run nextflow bash script ################## - -# Copy all files to docker images -COPY MIRA_nextflow.sh ${PROJECT_DIR}/MIRA_nextflow.sh - -# Convert spyne from Windows style line endings to Unix-like control characters -RUN dos2unix ${PROJECT_DIR}/MIRA_nextflow.sh - -# Allow permission to excute the bash scripts -RUN chmod a+x ${PROJECT_DIR}/MIRA_nextflow.sh - -############# Fix vulnerablities pkgs ################## - -# Copy all files to docker images -COPY docker_files/fixed_vulnerability_pkgs.txt ${PROJECT_DIR}/fixed_vulnerability_pkgs.txt - -# Copy all files to docker images -COPY docker_files/fixed_vulnerability_pkgs.sh ${PROJECT_DIR}/fixed_vulnerability_pkgs.sh - -# Convert bash script from Windows style line endings to Unix-like control characters -RUN dos2unix ${PROJECT_DIR}/fixed_vulnerability_pkgs.sh - -# Allow permission to excute the bash script -RUN chmod a+x ${PROJECT_DIR}/fixed_vulnerability_pkgs.sh - -# Execute bash script to wget the file and tar the package -RUN bash ${PROJECT_DIR}/fixed_vulnerability_pkgs.sh - -############# Remove vulnerability pkgs ################## - -# Copy all files to docker images -COPY docker_files/remove_vulnerability_pkgs.txt ${PROJECT_DIR}/remove_vulnerability_pkgs.txt - -# Copy all files to docker images -COPY docker_files/remove_vulnerability_pkgs.sh ${PROJECT_DIR}/remove_vulnerability_pkgs.sh - -# Convert bash script from Windows style line endings to Unix-like control characters -RUN dos2unix ${PROJECT_DIR}/remove_vulnerability_pkgs.sh - -# Allow permission to excute the bash script -RUN chmod a+x ${PROJECT_DIR}/remove_vulnerability_pkgs.sh - -# Execute bash script to wget the file and tar the package -RUN bash ${PROJECT_DIR}/remove_vulnerability_pkgs.sh - -############# Remove the vendor packages ################## - -# Clean up and remove unwanted files -RUN rm -rf /usr/local/lib/${python_version}/site-packages/pip/_vendor \ - && rm -rf /usr/local/lib/${python_version}/site-packages/pipenv/patched/pip/_vendor \ - && rm -rf /usr/local/lib/${python_version}/site-packages/examples \ - && rm -rf ${PROJECT_DIR}/blast \ - && rm -rf ${PROJECT_DIR}/fastqc \ - && rm -rf ${PROJECT_DIR}/multiqc \ - && rm -rf ${PROJECT_DIR}/pyarrow - -############# Set up working directory ################## - -# Create working directory variable -ENV WORKDIR=/data - -# Set up volume directory in docker -VOLUME ${WORKDIR} - -# Set up working directory in docker -WORKDIR ${WORKDIR} - -# Export project directory to PATH -ENV PATH "$PATH:${PROJECT_DIR}" \ No newline at end of file diff --git a/README.md b/README.md index 8abae83..681f44d 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,10 @@ MIRA-NF performs these steps for genome assembly and curation: | SARS-CoV-2 :: Spike-only | | 🥳 | | RSV | 🥳 | 🥳 | -![MIRA-NF workflows](docs/images/mira_nf_workflow_img_v6.png) +![MIRA-NF workflows](docs/images/mira_nf_workflow_img_v7.png) *MIRA-NF workflows* -## Usage +# Usage > To run this pipeline you will need to have these programs installed: @@ -65,30 +65,42 @@ First, prepare a samplesheet with your input data that looks as follows: Illumina data should be set up as follows: ```csv -Sample ID,Sample Type +sample_id,sample_type sample_1,Test sample_2,Test sample_3,Test sample_4,Test ``` +Each row represents a sample. + +| Column | Description | +|------------|-----------------------------------------------------------------------------------------------------------| +| `sample_id` | Custom sample name. This entry must match the name associated with the paired reads. Convert all spaces in sample names to underscores (`_`). | +| `sample_type` | The sample type for the given sample. Ex: test, - control, + control, etc. | Oxford Nanopore data should be set up as follows: ```csv -Barcode #,Sample ID,Sample Type +barcode,sample_id,sample_type barcode07,s1,Test barcode37,s2,Test barcode41,s3,Test ``` - Each row represents a sample. +| Column | Description | +|------------|-----------------------------------------------------------------------------------------------------------| +| `barcode` | The barcode used to create the ONT data for this sample. Must match the fold contain the fastq files associated with the sample. Single digit numbers must have 0 in front of them. Ex: barcode07 | +| `sample_id` | Custom sample name. Convert all spaces in sample names to underscores (`_`). | +| `sample_type` | The sample type for the given sample. Ex: test, positive, negative, etc. | + **Important things to note about samplesheet:** - Sample names within the "Sample ID" column need to be unique. -- Be sure that sample names are not nested within another sample name (i.e. having sample_1 and sample_1_1) +- The headers must be named as seen above. - Be sure that there are no empty lines at the end of the samplesheet. - For Illumina samples be sure that you have read 1 and read 2 for all samples in samplesheet. +- Illumina fastq file must be in this format: {sample_id}_R1\*fastq\* or {sample_id}_R1\*fq\* AND {sample_id}_R2\*fastq\* or {sample_id}_R2\*fq\* **To use the nextflow samplesheet setup please refer to the usage document (../assets/usage.md). USING THE NEXTFLOW SAMPLESHEET SET UP WITH ONT DATA WILL REQUIRE YOU TO COMBINE ONT FASTQS YOURSELF.** diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 63f157d..0000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env python - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - print(args) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/create_nextflow_samplesheet_i.py b/bin/create_nextflow_samplesheet_i.py deleted file mode 100755 index 402e118..0000000 --- a/bin/create_nextflow_samplesheet_i.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python - -# import yaml -from os.path import abspath -from sys import argv, exit -import pandas as pd -from glob import glob -import subprocess -import os -import argparse -import time - -time.sleep(60) - -parser = argparse.ArgumentParser() -parser.add_argument("-s", "--samplesheet", help="Samplesheet with sample names") -parser.add_argument( - "-r", - "--runid", - help="Full path to data directory containing either a fastq_pass subdirectory for ONT data or fastq subdirectory for Illumina", -) -parser.add_argument( - "-e", - "--experiment_type", - help="Experiment type options: Flu-ONT, SC2-Spike-Only-ONT, Flu-Illumina, SC2-Whole-Genome-ONT, SC2-Whole-Genome-Illumina", -) - -inputarguments = parser.parse_args() - -root = "/".join(abspath(__file__).split("/")[:-2]) -if len(argv) < 2: - exit( - "\n\tUSAGE: {} -s -r -e \n".format( - __file__ - ) - ) - -print(f"argv[1:]= {argv[1:]}") -try: - samplesheet = inputarguments.samplesheet - runpath = inputarguments.runid - -except: - parser.print_help() - exit(0) - -print(runpath) -print(samplesheet) - -df = pd.read_csv(samplesheet) -dfd = df.to_dict("index") - -data = "sample,fastq_1,fastq_2,sample_type\n" -for d in dfd.values(): - id = d["Sample ID"] - R1_fastq = glob(f"{runpath}/{id}*R1*fastq*", recursive=True)[0] - R2_fastq = glob(f"{runpath}/{id}*R2*fastq*", recursive=True)[0] - sample_type = d["Sample Type"] - if len(R1_fastq) < 1 or len(R2_fastq) < 1: - print(f"Fastq pair not found for sample {id}") - exit() - else: - data += f"{id},{R1_fastq},{R2_fastq},{sample_type}\n" - -with open("nextflow_samplesheet.csv", "w") as out: - out.write(data) diff --git a/bin/create_nextflow_samplesheet_o.py b/bin/create_nextflow_samplesheet_o.py deleted file mode 100755 index 266681c..0000000 --- a/bin/create_nextflow_samplesheet_o.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python - -# import yaml -from os.path import abspath -from sys import argv, exit -import pandas as pd -from glob import glob -import subprocess -import os -import argparse -import time - -time.sleep(60) - -parser = argparse.ArgumentParser() -parser.add_argument("-s", "--samplesheet", help="Samplesheet with sample names") -parser.add_argument( - "-r", - "--runid", - help="Full path to data directory containing either a fastq_pass subdirectory for ONT data or fastq subdirectory for Illumina", -) -parser.add_argument( - "-e", - "--experiment_type", - help="Experiment type options: Flu-ONT, SC2-Spike-Only-ONT, Flu-Illumina, SC2-Whole-Genome-ONT, SC2-Whole-Genome-Illumina", -) - -inputarguments = parser.parse_args() - -root = "/".join(abspath(__file__).split("/")[:-2]) -if len(argv) < 2: - exit( - "\n\tUSAGE: {} -s -r -e \n".format( - __file__ - ) - ) - -print(f"argv[1:]= {argv[1:]}") -try: - samplesheet = inputarguments.samplesheet - runpath = inputarguments.runid - if runpath[-1] == "/": - runpath = runpath[:-1] - experiment_type = inputarguments.experiment_type - -except: - parser.print_help() - exit(0) - -print(runpath) -print(samplesheet) - -df = pd.read_csv(samplesheet) -dfd = df.to_dict("index") - -data = "sample,barcodes,fastq_1,fastq_2,sample_type\n" -for d in dfd.values(): - id = d["Sample ID"] - barcode = d["Barcode #"] - print(f"Sample ID: {id}") - print(f"Barcode: {barcode}") - print (f"{runpath}/fastq_pass/cat_fastqs/{id}.fastq*") - print (glob(f"{runpath}/fastq_pass/cat_fastqs/{id}_nf_combined.fastq*", recursive=True)) - fastq_1 = glob(f"{runpath}/fastq_pass/cat_fastqs/{id}_nf_combined.fastq*", recursive=True)[0] - fastq_2 = "" - sample_type = d["Sample Type"] - if len(barcode) < 1: - print(f"Fastqs not found for sample {id}") - exit() - else: - data += f"{id},{barcode},{fastq_1},{fastq_2},{sample_type}\n" - -with open("nextflow_samplesheet.csv", "w") as out: - out.write(data) diff --git a/conf/modules.config b/conf/modules.config index 13eab62..6f32d0d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -49,12 +49,12 @@ process { ] } - withName: 'NEXTFLOWSAMPLESHEETI' { - ext.args = '--quiet' - } - - withName: 'NEXTFLOWSAMPLESHEETO' { - ext.args = '--quiet' + withName: 'NEXTFLOWSAMPLESHEET' { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + pattern: 'bad_samples.tsv', + mode: params.publish_dir_mode, + ] } withName: 'FINDCHEMISTRY' { diff --git a/conf/modules_arm64.config b/conf/modules_arm64.config index 57f4d4f..a8d7820 100644 --- a/conf/modules_arm64.config +++ b/conf/modules_arm64.config @@ -50,12 +50,12 @@ process { ] } - withName: 'NEXTFLOWSAMPLESHEETI' { - ext.args = '--quiet' - } - - withName: 'NEXTFLOWSAMPLESHEETO' { - ext.args = '--quiet' + withName: 'NEXTFLOWSAMPLESHEET' { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + pattern: 'bad_samples.tsv', + mode: params.publish_dir_mode, + ] } withName: 'FINDCHEMISTRY' { diff --git a/conf/modules_docker.config b/conf/modules_docker.config index c32f75a..e0e4185 100644 --- a/conf/modules_docker.config +++ b/conf/modules_docker.config @@ -49,12 +49,12 @@ process { ] } - withName: 'NEXTFLOWSAMPLESHEETI' { - ext.args = '--quiet' - } - - withName: 'NEXTFLOWSAMPLESHEETO' { - ext.args = '--quiet' + withName: 'NEXTFLOWSAMPLESHEET' { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + pattern: 'bad_samples.tsv', + mode: params.publish_dir_mode, + ] } withName: 'FINDCHEMISTRYI' { diff --git a/conf/modules_docker_arm64.config b/conf/modules_docker_arm64.config index 064e46d..5e22177 100644 --- a/conf/modules_docker_arm64.config +++ b/conf/modules_docker_arm64.config @@ -50,12 +50,12 @@ process { ] } - withName: 'NEXTFLOWSAMPLESHEETI' { - ext.args = '--quiet' - } - - withName: 'NEXTFLOWSAMPLESHEETO' { - ext.args = '--quiet' + withName: 'NEXTFLOWSAMPLESHEET' { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + pattern: 'bad_samples.tsv', + mode: params.publish_dir_mode, + ] } withName: 'FINDCHEMISTRYI' { diff --git a/conf/modules_podman.config b/conf/modules_podman.config index 3d73d41..449ef56 100644 --- a/conf/modules_podman.config +++ b/conf/modules_podman.config @@ -49,12 +49,12 @@ process { ] } - withName: 'NEXTFLOWSAMPLESHEETI' { - ext.args = '--quiet' - } - - withName: 'NEXTFLOWSAMPLESHEETO' { - ext.args = '--quiet' + withName: 'NEXTFLOWSAMPLESHEET' { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + pattern: 'bad_samples.tsv', + mode: params.publish_dir_mode, + ] } withName: 'FINDCHEMISTRY' { diff --git a/conf/modules_podman_arm64.config b/conf/modules_podman_arm64.config index ff6066e..657aba9 100644 --- a/conf/modules_podman_arm64.config +++ b/conf/modules_podman_arm64.config @@ -50,12 +50,12 @@ process { ] } - withName: 'NEXTFLOWSAMPLESHEETI' { - ext.args = '--quiet' - } - - withName: 'NEXTFLOWSAMPLESHEETO' { - ext.args = '--quiet' + withName: 'NEXTFLOWSAMPLESHEET' { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + pattern: 'bad_samples.tsv', + mode: params.publish_dir_mode, + ] } withName: 'FINDCHEMISTRY' { diff --git a/conf/omics.config b/conf/omics.config index 8df0bed..df77318 100644 --- a/conf/omics.config +++ b/conf/omics.config @@ -71,20 +71,11 @@ process { container = 'cdcgov/multiqc:v1.19-alpine' } - withName: 'NEXTFLOWSAMPLESHEETI' { + withName: 'NEXTFLOWSAMPLESHEET' { publishDir = [ path: { "${params.outdir}/samplesheet_i" }, mode: params.publish_dir_mode ] - container = 'cdcgov/mira-nf:python3.10-alpine' - } - - withName: 'NEXTFLOWSAMPLESHEETO' { - publishDir = [ - path: { "${params.outdir}/samplesheet_o" }, - mode: params.publish_dir_mode - ] - container = 'cdcgov/mira-nf:python3.10-alpine' } withName: 'FINDCHEMISTRYI' { diff --git a/data/voi_references/flu_curated_references.txt b/data/voi_references/flu_curated_references.txt index 0b6e6b6..5ba1588 100644 --- a/data/voi_references/flu_curated_references.txt +++ b/data/voi_references/flu_curated_references.txt @@ -1,8 +1,8 @@ isolate_id isolate_name subtype passage_history nt_id ctype reference_id protein aa_aln cds_aln -EPI_ISL_129017 B/Brisbane/60/2008 B CXC4/C2 5136864685f49890cea4ffc68d687b7578dd5648 B_HA BRISBANE60 HA DRICTGITSSNSPHVVKTATQGEVNVTGVIPLTTTPTKSHFANLKGTETRGKLCPKCLNCTDLDVALGRPKCTGKIPSARVSILHEVRPVTSGCFPIMHDRTKIRQLPNLLRGYEHIRLSTHNVINAENAPGGPYKIGTSGSCPNITNGNGFFATMAWAVPKNDKNKTATNPLTIEVPYICTEGEDQITVWGFHSDNETQMAKLYGDSKPQKFTSSANGVTTHYVSQIGGFPNQTEDGGLPQSGRIVVDYMVQKSGKTGTITYQRGILLPQKVWCASGRSKVIKGSLPLIGEADCLHEKYGGLNKSKPYYTGEHAKAIGNCPIWVKTPLKLANGTKYRPPAKLLKERGFFGAIAGFLEGGWEGMIAGWHGYTSHGAHGVAVAADLKSTQEAINKITKNLNSLSELEVKNLQRLSGAMDELHNEILELDEKVDDLRADTISSQIELAVLLSNEGIINSEDEHLLALERKLKKMLGPSAVEIGNGCFETKHKCNQTCLDRIAAGTFDAGEFSLPTFDSLNITAASLNDDGLDNHTILLYYSTAASSLAVTLMIAIFVVYMVSRDNVSCSICL* GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGACCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGATGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAACATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGTTTGGCTGTAACACTGATGATAGCTATCTTTGTTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAA -EPI_ISL_129017 B/Brisbane/60/2008 B CXC4/C2 5136864685f49890cea4ffc68d687b7578dd5648 B_HA BRISBANE60 HA-signal MKAIIVLLMVVTSNA ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCA -EPI_ISL_129017 B/Brisbane/60/2008 B CXC4/C2 5136864685f49890cea4ffc68d687b7578dd5648 B_HA BRISBANE60 HA1 DRICTGITSSNSPHVVKTATQGEVNVTGVIPLTTTPTKSHFANLKGTETRGKLCPKCLNCTDLDVALGRPKCTGKIPSARVSILHEVRPVTSGCFPIMHDRTKIRQLPNLLRGYEHIRLSTHNVINAENAPGGPYKIGTSGSCPNITNGNGFFATMAWAVPKNDKNKTATNPLTIEVPYICTEGEDQITVWGFHSDNETQMAKLYGDSKPQKFTSSANGVTTHYVSQIGGFPNQTEDGGLPQSGRIVVDYMVQKSGKTGTITYQRGILLPQKVWCASGRSKVIKGSLPLIGEADCLHEKYGGLNKSKPYYTGEHAKAIGNCPIWVKTPLKLANGTKYRPPAKLLKER GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGACCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGG +EPI_ISL_183972 B/Brisbane/60/2008 B 15ee991a6e468cf6fee647ccc2855d4f0c641734 B_HA BRISBANE60 HA1 DRICTGITSSNSPHVVKTATQGEVNVTGVIPLTTTPTKSHFANLKGTETRGKLCPKCLNCTDLDVALGRPKCTGKIPSARVSILHEVRPVTSGCFPIMHDRTKIRQLPNLLRGYEHIRLSTHNVINAENAPGGPYKIGTSGSCPNITNGNGFFATMAWAVPKNDKNKTATNPLTIEVPYICTEGEDQITVWGFHSDNEAQMAKLYGDSKPQKFTSSANGVTTHYVSQIGGFPNQTEDGGLPQSGRIVVDYMVQKSGKTGTITYQRGILLPQKVWCASGRSKVIKGSLPLIGEADCLHEKYGGLNKSKPYYTGEHAKAIGNCPIWVKTPLKLANGTKYRPPAKLLKER GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGGCCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGG +EPI_ISL_183972 B/Brisbane/60/2008 B 15ee991a6e468cf6fee647ccc2855d4f0c641734 B_HA BRISBANE60 HA-signal MKAIIVLLMVVTSNA ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCA +EPI_ISL_183972 B/Brisbane/60/2008 B 15ee991a6e468cf6fee647ccc2855d4f0c641734 B_HA BRISBANE60 HA DRICTGITSSNSPHVVKTATQGEVNVTGVIPLTTTPTKSHFANLKGTETRGKLCPKCLNCTDLDVALGRPKCTGKIPSARVSILHEVRPVTSGCFPIMHDRTKIRQLPNLLRGYEHIRLSTHNVINAENAPGGPYKIGTSGSCPNITNGNGFFATMAWAVPKNDKNKTATNPLTIEVPYICTEGEDQITVWGFHSDNEAQMAKLYGDSKPQKFTSSANGVTTHYVSQIGGFPNQTEDGGLPQSGRIVVDYMVQKSGKTGTITYQRGILLPQKVWCASGRSKVIKGSLPLIGEADCLHEKYGGLNKSKPYYTGEHAKAIGNCPIWVKTPLKLANGTKYRPPAKLLKERGFFGAIAGFLEGGWEGMIAGWHGYTSHGAHGVAVAADLKSTQEAINKITKNLNSLSELEVKNLQRLSGAMDELHNEILELDEKVDDLRADTISSQIELAVLLSNEGIINSEDEHLLALERKLKKMLGPSAVEIGNGCFETKHKCNQTCLDRIAAGTFDAGEFSLPTFDSLNITAASLNDDGLDNHTILLYYSTAASSLAVTLMIAIFVVYMVSRDNVSCSICL* GATCGAATCTGCACTGGGATAACATCGTCAAACTCACCACATGTCGTCAAAACTGCTACTCAAGGGGAGGTCAATGTGACTGGTGTAATACCACTGACAACAACACCCACCAAATCTCATTTTGCAAATCTCAAAGGAACAGAAACCAGGGGGAAACTATGCCCAAAATGCCTCAACTGCACAGATCTGGACGTAGCCTTGGGCAGACCAAAATGCACGGGGAAAATACCCTCGGCAAGAGTTTCAATACTCCATGAAGTCAGACCTGTTACATCTGGGTGCTTTCCTATAATGCACGACAGAACAAAAATTAGACAGCTGCCTAACCTTCTCCGAGGATACGAACATATCAGGTTATCAACCCATAACGTTATCAATGCAGAAAATGCACCAGGAGGACCCTACAAAATTGGAACCTCAGGGTCTTGCCCTAACATTACCAATGGAAACGGATTTTTCGCAACAATGGCTTGGGCCGTCCCAAAAAACGACAAAAACAAAACAGCAACAAATCCATTAACAATAGAAGTACCATACATTTGTACAGAAGGAGAAGACCAAATTACCGTTTGGGGGTTCCACTCTGACAACGAGGCCCAAATGGCAAAGCTCTATGGGGACTCAAAGCCCCAGAAGTTCACCTCATCTGCCAACGGAGTGACCACACATTACGTTTCACAGATTGGTGGCTTCCCAAATCAAACAGAAGACGGAGGACTACCACAAAGTGGTAGAATTGTTGTTGATTACATGGTGCAAAAATCTGGGAAAACAGGAACAATTACCTATCAAAGGGGTATTTTATTGCCTCAAAAGGTGTGGTGCGCAAGTGGCAGGAGCAAGGTAATAAAAGGATCCTTGCCTTTAATTGGAGAAGCAGATTGCCTCCACGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGGGAACATGCAAAGGCCATAGGAAATTGCCCAATATGGGTGAAAACACCCTTGAAGCTGGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTAAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCTTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCCCATGGGGCACATGGAGTAGCGGTGGCAGCAGACCTTAAGAGCACTCAAGAGGCCATAAACAAGATAACAAAAAATCTCAACTCTTTGAGTGAGCTGGAAGTAAAGAATCTTCAAAGACTAAGCGGTGCCATGGATGAACTCCACAACGAAATACTAGAACTAGATGAGAAAGTGGATGATCTCAGAGCTGATACAATAAGCTCACAAATAGAACTCGCAGTCCTGCTTTCCAATGAAGGAATAATAAACAGTGAAGATGAACATCTCTTGGCGCTTGAAAGAAAGCTGAAGAAAATGCTGGGCCCCTCTGCTGTAGAGATAGGGAATGGATGCTTTGAAACCAAACACAAGTGCAACCAGACCTGTCTCGACAGAATAGCTGCTGGTACCTTTGATGCAGGAGAATTTTCTCTCCCCACCTTTGATTCACTGAATATTACTGCTGCATCTTTAAATGACGATGGATTGGATAATCATACTATACTGCTTTACTACTCAACTGCTGCCTCCAGTTTGGCTGTAACACTGATGATAGCTATCTTTGTTGTTTATATGGTCTCCAGAGACAATGTTTCTTGCTCCATCTGTCTATAA EPI_ISL_129017 B/Brisbane/60/2008 B CXC4/C2 8814fc14b6f6d17d5e3a0375ce62e833de613017 B_NS BRISBANE60 NEP MANNNMTTTQIEWRMKKMAIGSSTHSSSVLMKDIQSQFEQLKLRWESYPNLVKSTDYHQKRETIRLVTEELYLLSKRIDDNILFHKTVIANSSIIADMVVSLSLLETLYEMKDVVEVYSRQCL* ATGGCGAACAACAACATGACCACAACACAAATTGAGTGGAGGATGAAGAAGATGGCCATCGGATCCTCAACTCACTCTTCGAGCGTCTTAATGAAGGACATTCAAAGCCAATTCGAGCAGCTGAAACTGCGGTGGGAGTCTTATCCCAATTTGGTCAAGAGCACCGATTATCACCAGAAGAGGGAGACAATTAGACTGGTCACGGAAGAACTTTATCTTTTAAGTAAAAGAATTGATGATAACATACTATTCCACAAAACAGTAATAGCTAACAGCTCCATAATAGCTGACATGGTTGTATCATTATCATTATTAGAAACATTGTATGAAATGAAGGATGTGGTTGAAGTGTACAGCAGGCAGTGCTTGTGA EPI_ISL_129017 B/Brisbane/60/2008 B CXC4/C2 8814fc14b6f6d17d5e3a0375ce62e833de613017 B_NS BRISBANE60 NS1 MANNNMTTTQIEVGPGATNATINFEAGILECYERLSWQRALDYPGQDRLNRLKRKLESRIKTHNKSEPESKRMSLEERKAIGVKMMKVLLFMNPSAGIEGFEPYCMKSSSNSNCTKYNWTDYPSTPERCLDDIEEEPEDVDGPTEIVLRDMNNKDARQKIKEEVNTQKEGKFRLTIKRDMRNVLSLRVLVNGTFLKHPNGHKSLSTLHRLNAYDQSGRLVAKLVATDDLTVEDEEDGHRILNSLFERLNEGHSKPIRAAETAVGVLSQFGQEHRLSPEEGDN* ATGGCGAACAACAACATGACCACAACACAAATTGAGGTGGGTCCGGGAGCAACCAATGCCACCATAAACTTTGAAGCAGGAATTCTAGAGTGCTATGAAAGGCTTTCATGGCAAAGAGCCCTTGACTACCCTGGTCAAGACCGCCTAAACAGACTAAAGAGAAAATTAGAGTCAAGAATAAAGACTCACAACAAAAGTGAGCCTGAAAGTAAAAGGATGTCCCTTGAAGAGAGAAAAGCAATTGGAGTAAAAATGATGAAAGTACTCCTATTTATGAATCCGTCTGCTGGAATTGAAGGGTTTGAGCCATACTGTATGAAAAGTTCCTCAAATAGCAACTGTACGAAATACAATTGGACTGATTACCCTTCAACACCAGAGAGGTGCCTTGATGACATAGAGGAAGAACCAGAGGATGTTGATGGCCCAACTGAAATAGTATTAAGGGACATGAACAACAAAGATGCAAGGCAAAAGATAAAGGAGGAAGTAAACACTCAGAAAGAAGGGAAGTTCCGTTTGACAATAAAAAGGGATATGCGTAATGTATTGTCCTTGAGAGTGTTGGTAAACGGAACATTCCTCAAACACCCCAATGGACACAAGTCCTTATCAACTCTGCATAGATTGAATGCATATGACCAGAGTGGAAGGCTTGTTGCTAAACTTGTTGCCACTGATGATCTTACAGTGGAGGATGAAGAAGATGGCCATCGGATCCTCAACTCACTCTTCGAGCGTCTTAATGAAGGACATTCAAAGCCAATTCGAGCAGCTGAAACTGCGGTGGGAGTCTTATCCCAATTTGGTCAAGAGCACCGATTATCACCAGAAGAGGGAGACAATTAG EPI_ISL_166958 B/Phuket/3073/2013 B C2/C2 313945a8cf09953ac4db5f8c0d69e0498cac83ee B_MP PHUKET3073 BM2 MFEPFQILSICSFILSALHFMAWTIGHLNQIKRGVNMKIRIKGPNKETINREVSILRHSYQKEIQAKEAMKEVLSDNMEVLSDHIVIEGLSAEEIIKMGETVLEVEESH* ATGTTTGAACCATTCCAGATTCTTTCAATTTGTTCTTTTATTTTATCAGCTCTCCATTTCATGGCTTGGACAATAGGACATTTAAATCAAATAAAAAGAGGAGTAAACATGAAAATAAGAATAAAGGGGCCAAATAAAGAGACAATAAACAGAGAGGTATCAATTTTGAGACACAGTTACCAAAAAGAAATTCAGGCTAAGGAAGCAATGAAGGAAGTACTCTCTGACAACATGGAAGTATTGAGTGACCACATAGTAATTGAGGGGCTTTCTGCTGAAGAGATAATAAAAATGGGTGAAACAGTTTTGGAGGTAGAAGAATCGCATTAA @@ -17,20 +17,10 @@ EPI_ISL_166958 B/Phuket/3073/2013 B C2/C2 fc49fac550492563f09608b88454189625ea51 EPI_ISL_166958 B/Phuket/3073/2013 B C2/C2 b64545dcc0ba61789b25a156031e5d87a5912f20 B_NS PHUKET3073 NS1 MADNMTTTQIEVGPGATNATINFEAGILECYERLSWQRALDYPGQDRLNRLKRKLESRIKTHNKSEPESKRMSLEERKAIGVKMMKVLLFMNPSAGIEGFEPYCMKSSSKSNCPKYSWIDYPSTPGRCLDDIEEEPDDVDGPTEIVLRDMNNKDARQKIKEEVNTQKEGKFRLTIKRDMRNVLSLRVLVNGTFLKHPNGYKSLSTLHRLNAYDQSGRLVAKLVATDDLTVEDEEDGHRILNSLFERLNEGHSKPIRAAETAVGVLSQFGQEHRLSPEEGDN* ATGGCGGACAATATGACCACAACACAAATTGAGGTGGGTCCGGGAGCAACCAATGCCACCATAAACTTCGAAGCAGGAATTCTGGAGTGCTATGAAAGACTTTCATGGCAAAGGGCCCTTGACTACCCTGGTCAAGACCGTCTAAACAGACTAAAGAGGAAATTAGAGTCAAGAATAAAGACTCACAACAAAAGTGAGCCTGAAAGTAAAAGGATGTCTCTTGAAGAGAGAAAAGCAATTGGAGTAAAAATGATGAAAGTACTTCTATTTATGAATCCGTCTGCTGGAATTGAAGGGTTTGAGCCATACTGTATGAAAAGTTCCTCAAAGAGCAACTGTCCGAAATACAGTTGGATTGATTACCCTTCAACCCCAGGGAGGTGCCTTGATGACATAGAAGAAGAACCAGATGATGTTGATGGCCCAACTGAAATAGTATTAAGGGACATGAACAACAAAGATGCAAGGCAAAAGATAAAGGAGGAAGTAAACACTCAGAAAGAAGGGAAGTTCCGTTTGACAATAAAAAGGGATATGCGTAATGTATTGTCCCTGAGAGTGTTAGTAAACGGAACATTCCTCAAACACCCCAATGGATACAAGTCCTTATCAACTCTGCATAGATTGAATGCATATGACCAGAGTGGAAGGCTTGTTGCTAAACTTGTTGCTACTGATGATCTTACAGTGGAGGATGAAGAAGATGGCCATCGGATCCTCAATTCACTCTTCGAGCGTCTTAATGAAGGACATTCAAAGCCAATTCGAGCAGCTGAAACTGCGGTGGGAGTCTTATCCCAATTTGGTCAAGAGCACCGATTATCACCAGAAGAGGGAGACAATTAG EPI_ISL_166958 B/Phuket/3073/2013 B C2/C2 365e37c7ff597ec4dc0dab53185be80fd74aa4fe B_PA PHUKET3073 PA MDTFITRNFQTTIIQKAKNTMAEFSEDPELQPAMLFNICVHLEVCYVISDMNFLDEEGKAYTALEGQGKEQNLRPQYEVIEGMPRTIAWMVQRSLAQEHGIETPKYLADLFDYKTKRFIEVGITKGLADDYFWKKKEKLGNSMELMIFSYNQDYSLSNESSLDEEGKGRVLSRLTELQAELSLKNLWQVLIGEEDVEKGIDFRLGQTISRLRDISVPAGFSNFEGMRSYIDNIDPKGAIERNLARMSPLVSVTPKKLKWEDLRPIGPHIYNHELPEVPYNAFLLMSDELGLANMTEGKSKKPKTLAKECLEKYSTLRDQTDPILIMKSEKANENFLWKLWRDCVNTISNEEMSNELQKTNYAKWATGDGLTYQKIMKEVAIDDETMCQEEPKIPNKCRVAAWVQTEMNLLSTLTSKRALDLPEIGPDVAPVEHVGSERRKYFVNEINYCKASTVMMKYVLFHTSLLNESNASMGKYKVIPITNRIVNEKGESFDMLYGLAVKGQSHLRGDTDVVTVVTFEFSSTDPRVDSGKWPKYTVFRIGSLFVSGREKSVYLYCRVNGTNKIQMKWGMEARRCLLQSMQQMEAIVEQESSIQGYDMTKACFKGDRVNSPKTFSIGTQEGKLVKGSFGKALRVIFTKCLMHYVFGNAQLEGFSAESRRLLLLIQALKDRKGPWVFDLEGMYSGIEECISNNPWVIQSAYWFNEWLGFEKEGSKVLESVDEIMDE* ATGGATACTTTTATTACAAGAAACTTCCAGACTACAATAATACAAAAGGCCAAAAACACAATGGCAGAATTTAGTGAAGATCCTGAATTGCAACCAGCAATGCTATTCAACATCTGTGTCCATCTAGAGGTTTGCTATGTAATAAGTGACATGAATTTTCTTGACGAAGAAGGAAAAGCATATACAGCATTAGAAGGACAAGGGAAAGAACAAAATTTGAGACCACAATATGAAGTAATTGAGGGAATGCCAAGAACCATAGCATGGATGGTCCAAAGATCCTTAGCTCAAGAGCATGGGATAGAGACTCCCAAGTATCTGGCTGATTTGTTTGATTATAAGACCAAGAGATTTATAGAAGTTGGAATAACAAAAGGATTGGCTGATGATTACTTTTGGAAAAAGAAAGAGAAGCTGGGAAATAGCATGGAACTAATGATATTCAGCTACAATCAAGACTATTCGTTAAGTAATGAATCCTCATTGGATGAGGAAGGGAAAGGGAGAGTGCTAAGCAGGCTCACAGAACTTCAAGCTGAATTAAGTCTGAAAAACTTATGGCAAGTTCTCATAGGAGAAGAAGATGTTGAAAAGGGAATTGACTTTAGACTTGGACAAACAATATCTAGACTAAGGGATATATCTGTTCCTGCTGGTTTCTCCAATTTTGAAGGAATGAGGAGCTACATAGACAATATAGACCCGAAAGGAGCAATAGAGAGAAACCTAGCAAGGATGTCTCCCTTAGTATCAGTCACACCTAAAAAGTTGAAATGGGAGGATCTAAGACCAATAGGGCCTCACATTTACAACCATGAGCTACCAGAAGTTCCATATAATGCCTTTCTTTTAATGTCTGATGAACTGGGGCTGGCCAATATGACGGAGGGAAAGTCCAAAAAACCAAAGACATTAGCCAAAGAATGTCTAGAAAAGTACTCAACACTACGGGATCAGACTGACCCAATATTAATAATGAAAAGCGAAAAAGCTAACGAAAATTTCCTATGGAAACTTTGGAGAGACTGTGTAAATACAATAAGTAATGAGGAAATGAGTAACGAGTTACAGAAAACCAATTATGCCAAGTGGGCCACAGGGGATGGTTTAACATACCAAAAAATAATGAAAGAAGTAGCAATAGATGACGAAACAATGTGCCAAGAAGAGCCTAAAATCCCTAACAAGTGTAGAGTGGCTGCTTGGGTTCAAACAGAGATGAACCTATTGAGCACTCTGACAAGTAAAAGAGCTCTGGACCTACCAGAAATAGGGCCAGACGTAGCACCAGTGGAGCATGTTGGAAGTGAAAGAAGAAAATACTTTGTTAATGAAATCAACTACTGTAAGGCCTCTACAGTTATGATGAAGTATGTGCTTTTTCACACTTCATTGCTGAATGAAAGCAATGCCAGCATGGGAAAATACAAAGTAATACCAATAACCAATAGAATAGTAAATGAAAAAGGAGAAAGTTTCGACATGCTTTATGGTCTGGCGGTTAAAGGACAATCTCATCTGAGGGGGGACACTGATGTTGTAACAGTTGTAACTTTCGAATTTAGTAGTACAGACCCCAGAGTGGACTCAGGAAAGTGGCCAAAATATACTGTGTTTAGGATTGGCTCCCTATTTGTGAGTGGAAGGGAAAAATCTGTGTACCTATATTGCAGAGTGAATGGCACAAATAAGATCCAAATGAAATGGGGAATGGAAGCTAGAAGATGTCTGCTTCAATCAATGCAACAAATGGAAGCAATTGTTGAACAAGAATCCTCGATACAAGGATACGACATGACCAAAGCTTGTTTCAAGGGGGACAGAGTAAATAGCCCCAAAACTTTCAGTATTGGAACTCAAGAAGGGAAACTAGTAAAAGGATCCTTTGGAAAAGCACTAAGAGTAATATTTACTAAATGTTTGATGCACTATGTATTTGGAAATGCCCAGTTGGAGGGATTCAGTGCCGAGTCTAGGAGACTTCTACTGTTGATTCAAGCATTAAAGGACAGAAAGGGCCCTTGGGTGTTCGACTTAGAGGGAATGTATTCTGGAATAGAAGAATGTATCAGTAACAACCCTTGGGTAATACAGAGTGCATACTGGTTCAATGAATGGTTGGGTTTTGAAAAGGAGGGAAGTAAAGTATTAGAATCAGTAGATGAAATAATGGATGAATAA EPI_ISL_166958 B/Phuket/3073/2013 B C2/C2 1ffe7b4cc0f417b6473b749b3fc6c84bb1be125d B_PB1 PHUKET3073 PB1 MNINPYFLFIDVPIQAAISTTFPYTGVPPYSHGTGTGYTIDTVIRTHEYSNKGKQYVSDITGCTMIDPTNGPLPEDNEPSAYAQLDCVLEALDRMDEEHPGLFQAASQNAMEALMVTTVDKLTQGRQTFDWTVCRNQPAATALNTTITSFRLNDLNGADKGGLVPFCQDIIDSLDKPEMTFFSVKNIKKKLPAKNRKGFLIKRIPMKVKDRISRVEYIKRALSLNTMTKDAERGKLKRRAIATAGIQIRGFVLVVENLAKNICENLEQSGLPVGGNEKKAKLSNAVAKMLSNCPPGGISMTVTGDNTKWNECLNPRIFLAMTERITRDSPIWFRDFCSIAPVLFSNKIARLGKGFMITSKTKRLKAQIPCPDLFSIPLERYNEETRAKLKRLKPFFNEEGTASLSPGMMMGMFNMLSTVLGVAALGIKNIGNKEYLWDGLQSSDDFALFVNAKDEETCMEGINDFYRTCKLLGINMSKKKSYCNETGMFEFTSMFYRDGFVSNFAMEIPSFGVAGVNESADMAIGMTIIKNNMINNGMGPATAQTAIQLFIADYRYTYKCHRGDSKVEGKRMKIIKELWENTKGRDGLLVADGGPNIYNLRNLHIPEIVLKYNLMDPEYKGRLLHPQNPFVGHLSIEGIKEADITPAHGPVRKMDYDAVSGTHSWRTKRNRSILNTDQRNMILEEQCYAKCCNLFEACFNSASYRKPVGQHSMLEAMAHRLRMDARLDYESGRMSKDDFEKAMAHLGEIGYT* ATGAATATAAATCCGTATTTTCTATTCATAGATGTACCCATACAGGCAGCAATTTCAACAACATTCCCATACACCGGTGTTCCCCCTTATTCCCATGGAACGGGAACAGGCTACACAATAGACACCGTGATCAGAACACATGAGTACTCGAACAAAGGAAAACAGTATGTTTCTGACATCACAGGGTGTACAATGATAGATCCAACAAATGGGCCATTACCTGAAGACAATGAGCCAAGTGCCTATGCACAATTAGATTGCGTTCTGGAGGCTTTGGATAGAATGGATGAGGAACATCCAGGTCTGTTTCAAGCAGCCTCACAGAATGCCATGGAGGCACTAATGGTCACAACTGTAGACAAATTAACCCAGGGGAGACAGACTTTCGATTGGACAGTATGCAGAAACCAGCCTGCTGCAACGGCACTAAACACAACAATAACCTCCTTCAGATTGAATGATTTGAATGGAGCTGACAAGGGTGGATTGGTACCCTTTTGCCAAGATATCATTGATTCATTAGACAAGCCTGAAATGACTTTCTTCTCAGTAAAGAATATAAAGAAAAAATTGCCTGCAAAAAACAGAAAGGGTTTCCTCATAAAGAGAATACCAATGAAAGTAAAAGACAGGATATCCAGAGTGGAATACATCAAAAGAGCATTGTCATTAAACACAATGACAAAAGATGCTGAAAGAGGCAAACTAAAAAGAAGAGCGATTGCAACCGCTGGAATACAAATCAGAGGGTTTGTATTAGTAGTTGAAAACTTGGCTAAAAACATCTGTGAAAATCTAGAACAAAGTGGTTTGCCCGTGGGTGGAAATGAAAAGAAGGCCAAACTGTCAAATGCAGTAGCCAAAATGCTCAGTAACTGCCCACCAGGAGGGATCAGCATGACAGTAACAGGAGACAATACTAAATGGAATGAATGCTTAAATCCACGAATCTTTTTGGCTATGACTGAAAGAATAACCAGAGACAGCCCAATTTGGTTCCGGGATTTTTGTAGTATAGCACCTGTCTTGTTCTCCAACAAAATAGCCAGATTAGGGAAAGGATTCATGATAACAAGTAAAACAAAAAGACTAAAGGCTCAAATACCTTGTCCTGATCTGTTCAGCATACCATTAGAAAGATATAATGAAGAAACAAGGGCGAAATTAAAAAGGCTGAAGCCATTCTTCAATGAAGAAGGAACGGCATCTTTGTCGCCTGGGATGATGATGGGAATGTTTAATATGCTATCTACCGTGTTGGGAGTAGCAGCACTAGGCATCAAAAACATTGGAAACAAGGAATACTTATGGGATGGACTGCAATCTTCCGATGATTTTGCTTTGTTTGTTAATGCAAAAGATGAAGAAACATGTATGGAAGGGATAAATGATTTTTACCGAACATGTAAATTATTGGGAATAAACATGAGCAAAAAGAAAAGTTACTGTAACGAAACTGGAATGTTTGAATTTACAAGCATGTTCTATAGAGATGGATTTGTATCTAACTTTGCAATGGAAATTCCTTCATTTGGAGTTGCTGGAGTAAATGAATCAGCAGATATGGCAATAGGAATGACAATAATAAAGAACAATATGATTAACAATGGGATGGGTCCGGCAACAGCACAAACAGCCATACAATTGTTCATAGCTGATTATAGGTACACATACAAATGCCACAGAGGAGATTCCAAAGTGGAAGGAAAAAGAATGAAAATTATAAAGGAGCTATGGGAAAACACTAAAGGAAGAGATGGTCTGTTAGTAGCAGATGGTGGGCCCAACATTTACAATTTGAGAAACTTACATATCCCAGAAATAGTATTGAAGTACAACCTAATGGACCCTGAATACAAAGGGCGGTTACTTCACCCTCAAAATCCCTTTGTAGGACATTTGTCTATTGAAGGCATCAAAGAAGCAGATATAACCCCAGCACATGGTCCTGTGAGGAAAATGGATTACGATGCAGTGTCTGGAACTCATAGTTGGAGAACCAAAAGGAACAGATCTATACTAAATACTGATCAGAGGAACATGATTCTTGAAGAACAATGCTACGCTAAATGTTGCAATCTTTTTGAGGCCTGTTTTAACAGTGCATCATACAGGAAACCAGTAGGGCAACATAGCATGCTTGAGGCTATGGCCCATAGATTAAGAATGGATGCACGACTAGATTATGAATCAGGAAGAATGTCAAAGGATGATTTTGAAAAAGCAATGGCTCACCTTGGTGAGATTGGGTACACATAA -EPI_ISL_227813 A/California/07/2009 A / H1N1 fd1e3a4867f5dc66c756966578f784237e40e092 A_PA HK4801 PA-X MEDFVRQCFNPMIVELAXKAMKEYGEDPKIETNKFAAICTHLEVCFMYSDFHFIDERGESIIVESGDPNALLKHRFEIIEGRDRIMAWTVVNSICNTTGVEKPKFLPDLYDYKENRFIEIGVTRREVHIYYLEKANKIKSEKTHIHIFSFTGEEMATKADYTLDEESRARIKTRLFTIRQEMASRSLWDSFVSPKEAKRQLKKNLRLQELCASLPTKVSHRTSPALKTLEPM* ATGGAAGACTTTGTGCGACAATGCTTCAATCCAATGATCGTCGAGCTTGCGGRAAAGGCAATGAAAGAATATGGGGAAGATCCGAAAATCGAAACTAACAAGTTTGCTGCAATATGCACACATTTGGAAGTTTGTTTCATGTATTCGGATTTCCATTTCATCGACGAACGGGGTGAATCAATAATTGTAGAATCTGGTGACCCGAATGCACTATTGAAGCACCGATTTGAGATAATTGAAGGAAGAGACCGAATCATGGCCTGGACAGTGGTGAACAGTATATGTAACACAACAGGGGTAGAGAAGCCTAAATTTCTTCCTGATTTGTATGATTACAAAGAGAACCGGTTCATTGAAATTGGAGTAACACGGAGGGAAGTCCACATATATTACCTAGAGAAAGCCAACAAAATAAAATCTGAGAAGACACACATTCACATCTTTTCATTCACTGGAGAGGAGATGGCCACCAAAGCGGACTACACCCTTGACGAAGAGAGCAGGGCAAGAATCAAAACTAGGCTTTTCACTATAAGACAAGAAATGGCCAGTAGGAGTCTATGGGATTCCTTCGTCAGTCCGAAAGAGGCGAAGAGACAATTGAAGAAAAATTTGAGATTACAGGAACTATGCGCAAGCTTGCCGACCAAAGTCTCCCACCGAACTTCCCCAGCCTTGAAAACTTTAGAGCCTATGTAG............................................................ EPI_ISL_227813 A/California/07/2009 A / H1N1 a0a15825739c1b21b1cac3dbf2bd879c63791358 A_HA_H1 CALI07 HA DTLCIGYHANNSTDTVDTVLEKNVTVTHSVNLLEDKHNGKLCKLRGVAPLHLGKCNIAGWILGNPECESLSTASSWSYIVETPSSDNGTCYPGDFIDYEELREQLSSVSSFERFEIFPKTSSWPNHDSNKGVTAACPHAGAKSFYKNLIWLVKKGNSYPKLSKSYINDKGKEVLVLWGIHHPSTSADQQSLYQNADAYVFVGSSRYSKKFKPEIAIRPKVRDQEGRMNYYWTLVEPGDKITFEATGNLVVPRYAFAMERNAGSGIIISDTPVHDCNTTCQTPKGAINTSLPFQNIHPITIGKCPKYVKSTKLRLATGLRNIPSIQSRGLFGAIAGFIEGGWTGMVDGWYGYHHQNEQGSGYAADLKSTQNAIDEITNKVNSVIEKMNTQFTAVGKEFNHLEKRIENLNKKVDDGFLDIWTYNAELLVLLENERTLDYHDSNVKNLYEKVRSQLKNNAKEIGNGCFEFYHKCDNTCMESVKNGTYDYPKYSEEAKLNREEIDGVKLESTRIYQILAIYSTVASSLVLVVSLGAISFWMCSNGSLQCRICI GACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATT -EPI_ISL_227813 A/California/07/2009 A / H1N1 fd1e3a4867f5dc66c756966578f784237e40e092 A_PA HK4801 PA MEDFVRQCFNPMIVELAXKAMKEYGEDPKIETNKFAAICTHLEVCFMYSDFHFIDERGESIIVESGDPNALLKHRFEIIEGRDRIMAWTVVNSICNTTGVEKPKFLPDLYDYKENRFIEIGVTRREVHIYYLEKANKIKSEKTHIHIFSFTGEEMATKADYTLDEESRARIKTRLFTIRQEMASRSLWDSFRQSERGEETIEEKFEITGTMRKLADQSLPPNFPSLENFRAYVDGFEPNGCIEGKLSQMSKEVNAKIEPFLRTTPRPLRLPDGPLCHQRSKFLLMDALKLSIEDPSHEGEGIPLYDAIKCMKTFFGWKEPNIVKPHEKGINPNYLMAWKQVLAELQDIENEEKIPRTKNMKRTSQLKWALGENMAPEKVDFDDCKDVGDLKQYDSDEPEPRSLASWVQNEFNKACELTDSSWIELDEIGEDVAPIEHIASMRRNYFTAEVSHCRATEYIMKGVYINTALLNASCAAMDDFQLIPMISKCRTKEGRRKTNLYGFIIKGRSHLRNDTDVVNFVSMEFSLTDPRLEPHKWEKYCVLEIGDMLLRTAIGQVSRPMFLYVRTNGTSKIKMKWGMEMRRCLLQSLQQIESMIEAESSVKEKDMTKEFFENKSETWPIGESPRGVEEGSIGKVCRTLLAKSVFNSLYASPQLEGFSAESRKLLLIVQALRDNLEPGTFDLGGLYEAIEECLINDPWVLLNASWFNSFLTHALK* ATGGAAGACTTTGTGCGACAATGCTTCAATCCAATGATCGTCGAGCTTGCGGRAAAGGCAATGAAAGAATATGGGGAAGATCCGAAAATCGAAACTAACAAGTTTGCTGCAATATGCACACATTTGGAAGTTTGTTTCATGTATTCGGATTTCCATTTCATCGACGAACGGGGTGAATCAATAATTGTAGAATCTGGTGACCCGAATGCACTATTGAAGCACCGATTTGAGATAATTGAAGGAAGAGACCGAATCATGGCCTGGACAGTGGTGAACAGTATATGTAACACAACAGGGGTAGAGAAGCCTAAATTTCTTCCTGATTTGTATGATTACAAAGAGAACCGGTTCATTGAAATTGGAGTAACACGGAGGGAAGTCCACATATATTACCTAGAGAAAGCCAACAAAATAAAATCTGAGAAGACACACATTCACATCTTTTCATTCACTGGAGAGGAGATGGCCACCAAAGCGGACTACACCCTTGACGAAGAGAGCAGGGCAAGAATCAAAACTAGGCTTTTCACTATAAGACAAGAAATGGCCAGTAGGAGTCTATGGGATTCCTTTCGTCAGTCCGAAAGAGGCGAAGAGACAATTGAAGAAAAATTTGAGATTACAGGAACTATGCGCAAGCTTGCCGACCAAAGTCTCCCACCGAACTTCCCCAGCCTTGAAAACTTTAGAGCCTATGTAGATGGATTCGAGCCGAACGGCTGCATTGAGGGCAAGCTTTCCCAAATGTCAAAAGAAGTGAACGCCAAAATTGAACCATTCTTGAGGACGACACCACGCCCCCTCAGATTGCCTGATGGGCCTCTTTGCCATCAGCGGTCAAAGTTCCTGCTGATGGATGCTCTGAAATTAAGTATTGAAGACCCGAGTCACGAGGGGGAGGGAATACCACTATATGATGCAATCAAATGCATGAAGACATTCTTTGGCTGGAAAGAGCCTAACATAGTCAAACCACATGAGAAAGGCATAAATCCCAATTACCTCATGGCTTGGAAGCAGGTGCTAGCAGAGCTACAGGACATTGAAAATGAAGAGAAGATCCCAAGGACAAAGAACATGAAGAGAACAAGCCAATTGAAGTGGGCACTCGGTGAAAATATGGCACCAGAAAAAGTAGACTTTGATGACTGCAAAGATGTTGGAGACCTTAAACAGTATGACAGTGATGAGCCAGAGCCCAGATCTCTAGCAAGCTGGGTCCAAAATGAATTCAATAAGGCATGTGAATTGACTGATTCAAGCTGGATAGAACTTGATGAAATAGGAGAAGATGTTGCCCCGATTGAACATATCGCAAGCATGAGGAGGAACTATTTTACAGCAGAAGTGTCCCACTGCAGGGCTACTGAATACATAATGAAGGGAGTGTACATAAATACGGCCTTGCTCAATGCATCCTGTGCAGCCATGGATGACTTTCAGCTGATCCCAATGATAAGCAAATGTAGGACCAAAGAAGGAAGACGGAAAACAAACCTGTATGGGTTCATTATAAAAGGAAGGTCTCATTTGAGAAATGATACTGATGTGGTGAACTTTGTAAGTATGGAGTTCTCACTCACTGACCCGAGACTGGAGCCACACAAATGGGAAAAATACTGTGTTCTTGAAATAGGAGACATGCTCTTGAGGACTGCGATAGGCCAAGTGTCGAGGCCCATGTTCCTATATGTGAGAACCAATGGAACCTCCAAGATCAAGATGAAATGGGGCATGGAAATGAGGCGCTGCCTTCTTCAGTCTCTTCAGCAGATTGAGAGCATGATTGAGGCCGAGTCTTCTGTCAAAGAGAAAGACATGACCAAGGAATTCTTTGAAAACAAATCGGAAACATGGCCAATCGGAGAGTCACCCAGGGGAGTGGAGGAAGGCTCTATTGGGAAAGTGTGCAGGACCTTACTGGCAAAATCTGTATTCAACAGTCTATATGCGTCTCCACAACTTGAGGGGTTTTCGGCTGAATCTAGAAAATTGCTTCTCATTGTTCAGGCACTTAGGGACAACCTGGAACCTGGAACCTTCGATCTTGGGGGGCTATATGAAGCAATCGAGGAGTGCCTGATTAATGATCCCTGGGTTTTGCTTAATGCATCTTGGTTCAACTCCTTCCTCACACATGCACTGAAGTAG -EPI_ISL_227813 A/California/07/2009 A / H1N1 34a41e99f87608708a6d9bb72eceb394762f00e7 A_NS HK4801 NEP MDSNTMSSFQDILMRMSKMQLGSSSEDLNGMVTRFESLKIYRDSLGETVMRMGDLHYLQSRNEKWREQLGQKFEEIRWLIEEMRHRLKATENSFEQITFMQALQLLLEVEQEIRAFSFQLI* ATGGACTCCAACACCATGTCAAGCTTTCAGGACATACTTATGAGGATGTCAAAAATGCAGTTGGGGTCCTCATCGGAGGACTTGAATGGAATGGTAACACGGTTCGAGTCTCTGAAAATATACAGAGATTCGCTTGGAGAAACTGTGATGAGAATGGGAGACCTTCACTACCTCCAGAGCAGAAATGAAAAGTGGCGAGAGCAATTGGGACAGAAATTTGAGGAAATAAGGTGGTTAATTGAAGAAATGCGGCACAGATTGAAAGCGACAGAGAATAGTTTCGAACAAATAACATTTATGCAAGCCTTACAACTACTGCTTGAAGTAGAACAAGAGATAAGAGCTTTCTCGTTTCAGCTTATTTAA -EPI_ISL_227813 A/California/07/2009 A / H1N1 34a41e99f87608708a6d9bb72eceb394762f00e7 A_NS HK4801 NS1 MDSNTMSSFQVDCFLWHIRKRFADNGLGDAPFLDRLRRDQKSLKGRGNTLGLDIETATLVGKQIVEWILKEESSETLRMTIASVPTSRYLSDMTLEEMSRDWFMLMPRQKIIGPLCVRLDQAIMEKNIVLKANFSVIFNRLETLILLRAFTEEGAIVGEISPLPSLPGHTYEDVKNAVGVLIGGLEWNGNTVRVSENIQRFAWRNCDENGRPSLPPEQK* ATGGACTCCAACACCATGTCAAGCTTTCAGGTAGACTGTTTCCTTTGGCATATCCGCAAGCGATTTGCAGACAATGGATTGGGTGATGCCCCATTCCTTGATCGGCTCCGCCGAGATCAAAAGTCCTTAAAAGGAAGAGGCAACACCCTTGGCCTCGATATCGAAACAGCCACTCTTGTTGGGAAACAAATCGTGGAATGGATCTTGAAAGAGGAATCCAGCGAGACACTTAGAATGACAATTGCATCTGTACCTACTTCGCGCTACCTTTCTGACATGACCCTCGAGGAAATGTCACGAGACTGGTTCATGCTCATGCCTAGGCAAAAGATAATAGGCCCTCTTTGCGTGCGATTGGACCAGGCGATCATGGAAAAGAACATAGTACTGAAAGCGAACTTCAGTGTAATCTTTAACCGATTAGAGACCTTGATACTACTAAGGGCTTTCACTGAGGAGGGAGCAATAGTTGGAGAAATTTCACCATTACCTTCTCTTCCAGGACATACTTATGAGGATGTCAAAAATGCAGTTGGGGTCCTCATCGGAGGACTTGAATGGAATGGTAACACGGTTCGAGTCTCTGAAAATATACAGAGATTCGCTTGGAGAAACTGTGATGAGAATGGGAGACCTTCACTACCTCCAGAGCAGAAATGA................................................................................................................................................................................. -EPI_ISL_227813 A/California/07/2009 A / H1N1 ab5727e85de808584628386d3b6125f6a5687f35 A_PB1 HK4801 PB1-F2 MEQEQDTPWTQ* ATGGAACAGGAACAGGATACACCATGGACACAGTAA.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. EPI_ISL_227813 A/California/07/2009 A / H1N1 a0a15825739c1b21b1cac3dbf2bd879c63791358 A_HA_H1 CALI07 HA-signal MKAILVVLLYTFATANA ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCA -EPI_ISL_227813 A/California/07/2009 A / H1N1 ab5727e85de808584628386d3b6125f6a5687f35 A_PB1 HK4801 PB1 MDVNPTLLFLKIPAQNAISTTFPYTGDPPYSHGTGTGYTMDTVNRTHQYSEKGKWTTNTETGAPQLNPIDGPLPEDNEPSGYAQTDCVLEAMAFLEESHPGIFENSCLETMEVVQQTRVDKLTQGRQTYDWTLNRNQPAATALANTIEVFRSNGLTANESGRLIDFLKDVMESMNKEEIEITTHFQRKRRVRDNMTKKMVTQRTIGKKKQRLNKRGYLIRALTLNTMTKDAERGKLKRRAIATPGMQIRGFVYFVETLARSICEKLEQSGLPVGGNEKKAKLANVVRKMMTNSQDTEISFTITGDNTKWNENQNPRMFLAMITYITRNQPEWFRNILSMAPIMFSNKMARLGKGYMFESKRMKIRTQIPAEMLASIDLKYFNESTKKKIEKIRPLLIDGTASLSPGMMMGMFNMLSTVLGVSILNLGQKKYTKTIYWWDGLQSSDDFALIVNAPNHEGIQAGVDRFYRTCKLVGINMSKKKSYINKTGTFEFTSFFYRYGFVANFSMELPSFGVSGVNESADMSIGVTVIKNNMINNDLGPATAQMALQLFIKDYRYTYRCHRGDTQIQTRRSFELKKLWDQTQSKVGLLVSDGGPNLYNIRNLHIPEVCLKWELMDDDYRGRLCNPLNPFVSHKEIDSVNNAVVMPAHGPAKSMEYDAVATTHSWIPKRNRSILNTSQRGILEDEQMYQKCCNLFEKFFPSSSYRRPVGISSMVEAMVSRARIDARVDFESGRIKKEEFSEIMKICSTIEELRRQK* ATGGATGTCAATCCGACTCTACTTTTCCTAAAAATTCCAGCGCAAAATGCCATAAGCACCACATTCCCTTATACTGGAGATCCTCCATACAGCCATGGAACAGGAACAGGATACACCATGGACACAGTAAACAGAACACACCAATACTCAGAAAAGGGAAAGTGGACGACAAACACAGAGACTGGTGCACCCCAGCTCAACCCGATTGATGGACCACTACCTGAGGATAATGAACCAAGTGGGTATGCACAAACAGACTGTGTTCTAGAGGCTATGGCTTTCCTTGAAGAATCCCACCCAGGAATATTTGAGAATTCATGCCTTGAAACAATGGAAGTTGTTCAACAAACAAGGGTAGATAAACTAACTCAAGGTCGCCAGACTTATGATTGGACATTAAACAGAAATCAACCGGCAGCAACTGCATTGGCCAACACCATAGAAGTCTTTAGATCGAATGGCCTAACAGCTAATGAGTCAGGAAGGCTAATAGATTTCTTAAAGGATGTAATGGAATCAATGAACAAAGAGGAAATAGAGATAACAACCCACTTTCAAAGAAAAAGGAGAGTAAGAGACAACATGACCAAGAAGATGGTCACGCAAAGAACAATAGGGAAGAAAAAACAAAGACTGAATAAGAGAGGCTATCTAATAAGAGCACTGACATTAAATACGATGACCAAAGATGCAGAGAGAGGCAAGTTAAAAAGAAGGGCTATCGCAACACCTGGGATGCAGATTAGAGGTTTCGTATACTTTGTTGAAACTTTAGCTAGGAGCATTTGCGAAAAGCTTGAACAGTCTGGGCTCCCAGTAGGGGGCAATGAAAAGAAGGCCAAACTGGCAAATGTTGTGAGAAAGATGATGACTAATTCACAAGACACAGAGATTTCTTTCACAATCACTGGGGACAACACTAAGTGGAATGAAAATCAAAATCCTCGAATGTTCCTGGCGATGATTACATATATCACCAGAAATCAACCCGAGTGGTTCAGAAACATCCTGAGCATGGCACCCATAATGTTCTCAAACAAAATGGCAAGACTAGGGAAAGGGTACATGTTCGAGAGTAAAAGAATGAAGATTCGAACACAAATACCAGCAGAAATGCTAGCAAGCATTGACCTGAAGTACTTCAATGAATCAACAAAGAAGAAAATTGAGAAAATAAGGCCTCTTCTAATAGATGGCACAGCATCACTGAGTCCTGGGATGATGATGGGCATGTTCAACATGCTAAGTACGGTCTTGGGAGTCTCGATACTGAATCTTGGACAAAAGAAATACACCAAGACAATATACTGGTGGGATGGGCTCCAATCATCCGACGATTTTGCTCTCATAGTGAATGCACCAAACCATGAGGGAATACAAGCAGGAGTGGACAGATTCTACAGGACCTGCAAGTTAGTGGGAATCAACATGAGCAAAAAGAAGTCCTATATAAATAAGACAGGGACATTTGAATTCACAAGCTTTTTTTATCGCTATGGATTTGTGGCTAATTTTAGCATGGAGCTACCCAGCTTTGGAGTGTCTGGAGTAAATGAATCAGCTGACATGAGTATTGGAGTAACAGTGATAAAGAACAACATGATAAACAATGACCTTGGACCTGCAACGGCCCAGATGGCTCTTCAATTGTTCATCAAAGACTACAGATACACATATAGGTGCCATAGGGGAGACACACAAATTCAGACGAGAAGATCATTTGAGTTAAAGAAGCTGTGGGATCAAACCCAATCAAAGGTAGGGCTATTAGTATCAGATGGAGGACCAAACTTATACAATATACGGAATCTTCACATTCCTGAAGTCTGCTTAAAATGGGAGCTAATGGATGATGATTATCGGGGAAGACTTTGTAATCCCCTGAATCCCTTTGTCAGTCATAAAGAGATTGATTCTGTAAACAATGCTGTGGTAATGCCAGCCCATGGTCCAGCCAAAAGCATGGAATATGATGCCGTTGCAACTACACATTCCTGGATTCCCAAGAGGAATCGTTCTATTCTCAACACAAGCCAAAGGGGAATTCTTGAGGATGAACAGATGTACCAGAAGTGCTGCAATCTATTCGAGAAATTTTTCCCTAGCAGTTCATATAGGAGACCGGTTGGAATTTCTAGCATGGTGGAGGCCATGGTGTCTAGGGCCCGGATTGATGCCAGGGTCGACTTCGAGTCTGGACGGATCAAGAAAGAAGAGTTCTCTGAGATCATGAAGATCTGTTCCACCATTGAAGAACTCAGACGGCAAAAATAA -EPI_ISL_227813 A/California/07/2009 A / H1N1 a0a15825739c1b21b1cac3dbf2bd879c63791358 A_HA_H1 CALI07 HA1 DTLCIGYHANNSTDTVDTVLEKNVTVTHSVNLLEDKHNGKLCKLRGVAPLHLGKCNIAGWILGNPECESLSTASSWSYIVETPSSDNGTCYPGDFIDYEELREQLSSVSSFERFEIFPKTSSWPNHDSNKGVTAACPHAGAKSFYKNLIWLVKKGNSYPKLSKSYINDKGKEVLVLWGIHHPSTSADQQSLYQNADAYVFVGSSRYSKKFKPEIAIRPKVRDQEGRMNYYWTLVEPGDKITFEATGNLVVPRYAFAMERNAGSGIIISDTPVHDCNTTCQTPKGAINTSLPFQNIHPITIGKCPKYVKSTKLRLATGLRNIPSIQS GACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCT -EPI_ISL_227813 A/California/07/2009 A / H1N1 90cfbd0cd45fd99f3375abde7c6b66af5ce77cf0 A_MP HK4801 M1 MSLLTEVETYVLSIIPSGPLKAEIAQRLESVFAGKNTDLEALMEWLKTRPILSPLTKGILGFVFTLTVPSERGLQRRRFVQNALNGNGDPNNMDRAVKLYKKLKREITFHGAKEVSLSYSTGALASCMGLIYNRMGTVTTEAAFGLVCATCEQIADSQHRSHRQMATTTNPLIRHENRMVLASTTAKAMEQMAGSSEQAAEAMEVANQTRQMVHAMRTIGTHPSSSAGLKDDLLENLQAYQKRMGVQMQRFK* ATGAGTCTTCTAACCGAGGTCGAAACGTACGTTCTTTCTATCATCCCGTCAGGCCCCCTCAAAGCCGAGATCGCGCAGAGACTGGAAAGTGTCTTTGCAGGAAAGAACACAGATCTTGAGGCTCTCATGGAATGGCTAAAGACAAGACCAATCTTGTCACCTCTGACTAAGGGAATTTTAGGATTTGTGTTCACGCTCACCGTGCCCAGTGAGCGAGGACTGCAGCGTAGACGCTTTGTCCAAAATGCCCTAAATGGGAATGGGGACCCGAACAACATGGATAGAGCAGTTAAACTATACAAGAAGCTCAAAAGAGAAATAACGTTCCATGGGGCCAAGGAGGTGTCACTAAGCTATTCAACTGGTGCACTTGCCAGTTGCATGGGCCTCATATACAACAGGATGGGAACAGTGACCACAGAAGCTGCTTTTGGTCTAGTGTGTGCCACTTGTGAACAGATTGCTGATTCACAGCATCGGTCTCACAGACAGATGGCTACTACCACCAATCCACTAATCAGGCATGAAAACAGAATGGTGCTGGCTAGCACTACGGCAAAGGCTATGGAACAGATGGCTGGATCGAGTGAACAGGCAGCGGAGGCCATGGAGGTTGCTAATCAGACTAGGCAGATGGTACATGCAATGAGAACTATTGGGACTCATCCTAGCTCCAGTGCTGGTCTGAAAGATGACCTTCTTGAAAATTTGCAGGCCTACCAGAAGCGAATGGGAGTGCAGATGCAGCGATTCAAGTGA EPI_ISL_227813 A/California/07/2009 A / H1N1 9b0d9c8e81fb0b89d2d12cac40ea8b55bfd6cc6a A_NA_N1 CALI07 NA MNPNQKIITIGSVCMTIGMANLILQIGNIISIWISHSIQLGNQNQIETCNQSVITYENNTWVNQTYVNISNTNFAAGQSVVSVKLAGNSSLCPVSGWAIYSKDNSVRIGSKGDVFVIREPFISCSPLECRTFFLTQGALLNDKHSNGTIKDRSPYRTLMSCPIGEVPSPYNSRFESVAWSASACHDGINWLTIGISGPDNGAVAVLKYNGIITDTIKSWRNNILRTQESECACVNGSCFTVMTDGPSNGQASYKIFRIEKGKIVKSVEMNAPNYHYEECSCYPDSSEITCVCRDNWHGSNRPWVSFNQNLEYQIGYICSGIFGDNPRPNDKTGSCGPVSSNGANGVKGFSFKYGNGVWIGRTKSISSRNGFEMIWDPNGWTGTDNNFSIKQDIVGINEWSGYSGSFVQHPELTGLDCIRPCFWVELIRGRPKENTIWTSGSSISFCGVNSDTVGWSWPDGAELPFTIDK* ATGAATCCAAACCAAAAGATAATAACCATTGGTTCGGTCTGTATGACAATTGGAATGGCTAACTTAATATTACAAATTGGAAACATAATCTCAATATGGATTAGCCACTCAATTCAACTTGGGAATCAAAATCAGATTGAAACATGCAATCAAAGCGTCATTACTTATGAAAACAACACTTGGGTAAATCAGACATATGTTAACATCAGCAACACCAACTTTGCTGCTGGACAGTCAGTGGTTTCCGTGAAATTAGCGGGCAATTCCTCTCTCTGCCCTGTTAGTGGATGGGCTATATACAGTAAAGACAACAGTGTAAGAATCGGTTCCAAGGGGGATGTGTTTGTCATAAGGGAACCATTCATATCATGCTCCCCCTTGGAATGCAGAACCTTCTTCTTGACTCAAGGGGCCTTGCTAAATGACAAACATTCCAATGGAACCATTAAAGACAGGAGCCCATATCGAACCCTAATGAGCTGTCCTATTGGTGAAGTTCCCTCTCCATACAACTCAAGATTTGAGTCAGTCGCTTGGTCAGCAAGTGCTTGTCATGATGGCATCAATTGGCTAACAATTGGAATTTCTGGCCCAGACAATGGGGCAGTGGCTGTGTTAAAGTACAACGGCATAATAACAGACACTATCAAGAGTTGGAGAAACAATATATTGAGAACACAAGAGTCTGAATGTGCATGTGTAAATGGTTCTTGCTTTACTGTAATGACCGATGGACCAAGTAATGGACAGGCCTCATACAAGATCTTCAGAATAGAAAAGGGAAAGATAGTCAAATCAGTCGAAATGAATGCCCCTAATTATCACTATGAGGAATGCTCCTGTTATCCTGATTCTAGTGAAATCACATGTGTGTGCAGGGATAACTGGCATGGCTCGAATCGACCGTGGGTGTCTTTCAACCAGAATCTGGAATATCAGATAGGATACATATGCAGTGGGATTTTCGGAGACAATCCACGCCCTAATGATAAGACAGGCAGTTGTGGTCCAGTATCGTCTAATGGAGCAAATGGAGTAAAAGGGTTTTCATTCAAATACGGCAATGGTGTTTGGATAGGGAGAACTAAAAGCATTAGTTCAAGAAACGGTTTTGAGATGATTTGGGATCCGAACGGATGGACTGGGACAGACAATAACTTCTCAATAAAGCAAGATATCGTAGGAATAAATGAGTGGTCAGGATATAGCGGGAGTTTTGTTCAGCATCCAGAACTAACAGGGCTGGATTGTATAAGACCTTGCTTCTGGGTTGAACTAATCAGAGGGCGACCCAAAGAGAACACAATCTGGACTAGCGGGAGCAGCATATCCTTTTGTGGTGTAAACAGTGACACTGTGGGTTGGTCTTGGCCAGACGGTGCTGAGTTGCCATTTACCATTGACAAGTAA -EPI_ISL_227813 A/California/07/2009 A / H1N1 90cfbd0cd45fd99f3375abde7c6b66af5ce77cf0 A_MP HK4801 M2 MSLLTEVETPTRSEWECRCSDSSDPLVIAANIIGILHLILWITDRLFFKCIYRRFKYGLKRGPSTEGVPESMREEYQQEQQSAVDVDDGHFVNIELE* ATGAGTCTTCTAACCGAGGTCGAAACGCCTACCAGAAGCGAATGGGAGTGCAGATGCAGCGATTCAAGTGATCCTCTCGTCATTGCAGCAAATATCATTGGGATCTTGCACCTGATATTGTGGATTACTGATCGTCTTTTTTTCAAATGTATTTATCGTCGCTTTAAATACGGTTTGAAAAGAGGGCCTTCTACGGAAGGAGTGCCTGAGTCCATGAGGGAAGAATATCAACAGGAACAGCAGAGTGCTGTGGATGTTGACGATGGTCATTTTGTCAACATAGAGCTAGAGTAA -EPI_ISL_227813 A/California/07/2009 A / H1N1 3778dd9e102e5ee1459460f64e97a070050db167 A_PB2 HK4801 PB2 MERIKELRDLMSQSRTREILTKTTVDHMAIIKKYTSGRQEKNPALRMKWMMAMRYPITADKRIMDMIPERNEQGQTLWSKTNDAGSDRVMVSPLAVTWWNRNGPTTSTVHYPKVYKTYFEKVERLKHGTFGPVHFRNQVKIRRRVDTNPGHADLSAKEAQDVIMEVVFPNEVGARILTSESQLAITKEKKEELQDCKIAPLMVAYMLERELVRKTRFLPVAGGTGSVYIEVLHLTQGTCWEQMYTPGGEVRNDDVDQSLIIAARNIVRRAAVSADPLASLLEMCHSTQIGGVRMVDILRQNPTEEQAVDICKAAIGLRISSSFSFGGFTFKRTSGSSVKKEEEVLTGNLQTLKIRVHEGYEEFTMVGRRATAILRKATRRLIQLIVSGRDEQSIAEAIIVAMVFSQEDCMIKAVRGDLNFVNRANQRLNPMHQLLRHFQKDAKVLFQNWGIESIDNVMGMIGILPDMTPSTEMSLRGIRVSKMGVDEYSSTERVVVSIDRFLRVRDQRGNVLLSPEEVSETQGTEKLTITYSSSMMWEINGPESVLVNTYQWIIRNWEIVKIQWSQDPTMLYNKMEFEPFQSLVPKATRSRYSGFVRTLFQQMRDVLGTFDTVQIIKLLPFAAAPPEQSRMQFSSLTVNVRGSGLRILVRGNSPVFNYNKATKRLTVLGKDAGALTEDPDEGTSGVESAVLRGFLILGKEDKRYGPALSINELSNLAKGEKANVLIGQGDVVLVMKRKRDSSILTDSQTATKRIRMAIN* ATGGAGAGAATAAAAGAACTGAGAGATCTAATGTCGCAGTCCCGCACTCGCGAGATACTCACTAAGACCACTGTGGACCATATGGCCATAATCAAAAAGTACACATCAGGAAGGCAAGAGAAGAACCCCGCACTCAGAATGAAGTGGATGATGGCAATGAGATACCCAATTACAGCAGACAAGAGAATAATGGACATGATTCCAGAGAGGAATGAACAAGGACAAACCCTCTGGAGCAAAACAAACGATGCTGGATCAGACCGAGTGATGGTATCACCTCTGGCCGTAACATGGTGGAATAGGAATGGCCCAACAACAAGTACAGTTCATTACCCTAAGGTATATAAAACTTATTTCGAAAAGGTCGAAAGGTTGAAACATGGTACCTTCGGCCCTGTCCACTTCAGAAATCAAGTTAAAATAAGGAGGAGAGTTGATACAAACCCTGGCCATGCAGATCTCAGTGCCAAGGAGGCACAGGATGTGATTATGGAAGTTGTTTTCCCAAATGAAGTGGGGGCAAGAATACTGACATCAGAGTCACAGCTGGCAATAACAAAAGAGAAGAAAGAAGAGCTCCAGGATTGTAAAATTGCTCCCTTGATGGTGGCGTACATGCTAGAAAGAGAATTGGTCCGTAAAACAAGGTTTCTCCCAGTAGCCGGCGGAACAGGCAGTGTTTATATTGAAGTGTTGCACTTAACCCAAGGGACGTGCTGGGAGCAGATGTACACTCCAGGAGGAGAAGTGAGAAATGATGATGTTGACCAAAGTTTGATTATCGCTGCTAGAAACATAGTAAGAAGAGCAGCAGTGTCAGCAGACCCATTAGCATCTCTCTTGGAAATGTGCCACAGCACACAGATTGGAGGAGTAAGGATGGTGGACATCCTTAGACAGAATCCAACTGAGGAACAAGCCGTAGACATATGCAAGGCAGCAATAGGGTTGAGGATTAGCTCATCTTTCAGTTTTGGTGGGTTCACTTTCAAAAGGACAAGCGGATCATCAGTCAAGAAAGAAGAAGAAGTGCTAACGGGCAACCTCCAAACACTGAAAATAAGAGTACATGAAGGGTATGAAGAATTCACAATGGTTGGGAGAAGAGCAACAGCTATTCTCAGAAAGGCAACCAGGAGATTGATCCAGTTGATAGTAAGCGGGAGAGACGAGCAGTCAATTGCTGAGGCAATAATTGTGGCCATGGTATTCTCACAGGAGGATTGCATGATCAAGGCAGTTAGGGGCGATCTGAACTTTGTCAATAGGGCAAACCAGCGACTGAACCCCATGCACCAACTCTTGAGGCATTTCCAAAAAGATGCAAAAGTGCTTTTCCAGAACTGGGGAATTGAATCCATCGACAATGTGATGGGAATGATCGGAATACTGCCCGACATGACCCCAAGCACGGAGATGTCGCTGAGAGGGATAAGAGTCAGCAAAATGGGAGTAGATGAATACTCCAGCACGGAGAGAGTGGTAGTGAGTATTGACCGATTTTTAAGGGTTAGAGATCAAAGAGGGAACGTACTATTGTCTCCCGAAGAAGTCAGTGAAACGCAAGGAACTGAGAAGTTGACAATAACTTATTCGTCATCAATGATGTGGGAGATCAATGGCCCTGAGTCAGTGCTAGTCAACACTTATCAATGGATAATCAGGAACTGGGAAATTGTGAAAATTCAATGGTCACAAGATCCCACAATGYTATACAACAAAATGGAATTTGAACCATTTCAGTCTCTTGTCCCTAAGGCAACCAGAAGCCGGTACAGTGGATTCGTAAGGACACTGTTCCAGCAAATGCGGGATGTGCTTGGGACATTTGACACTGTCCAAATAATAAAACTTCTCCCCTTTGCTGCTGCCCCACCAGAACAGAGTAGGATGCAATTTTCCTCATTGACTGTGAATGTGAGAGGATCAGGGTTGAGGATACTGGTAAGAGGCAATTCTCCAGTATTCAATTACAACAAGGCAACCAAACGACTTACAGTTCTTGGAAAGGATGCAGGTGCATTGACTGAAGATCCAGATGAAGGCACATCTGGGGTGGAGTCTGCTGTCCTGAGAGGATTTCTCATTTTGGGCAAAGAAGACAAGAGATATGGCCCAGCATTAAGCATCAATGAACTGAGCAATCTTGCAAAAGGAGAGAAGGCTAATGTGCTAATTGGGCAAGGGGACGTAGTGTTGGTAATGAAACGAAAACGGGACTCTAGCATACTTACTGACAGCCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAG -EPI_ISL_227813 A/California/07/2009 A / H1N1 c0389b6ec52fbcda5b39803a4bb1f9d4046dbb90 A_NP HK4801 NP MASQGTKRSYEQMETGGERQDATEIRASVGRMIGGIGRFYIQMCTELKLSDYDGRLIQNSITIERMVLSAFDERRNKYLEEHPSAGKDPKKTGGPIYRRVDGKWMRELILYDKEEIRRVWRQANNGEDATAGLTHIMIWHSNLNDATYQRTRALVRTGMDPRMCSLMQGSTLPRRSGAAGAAVKGVGTIAMELIRMIKRGINDRNFWRGENGRRTRVAYERMCNILKGKFQTAAQRAMMDQVRESRNPGNAEIEDLIFLARSALILRGSVAHKSCLPACVYGLAVASGHDFEREGYSLVGIDPFKLLQNSQVVSLMRPNENPAHKSQLVWMACHSAAFEDLRVSSFIRGKKVIPRGKLSTRGVQIASNENVETMDSNTLELRSRYWAIRTRSGGNTNQQKASAGQISVQPTFSVQRNLPFERATVMAAFSGNNEGRTSDMRTEVIRMMESAKPEDLSFQGRGVFELSDEKATNPIVPSFDMSNEGSYFFGDNAEEYDS* ATGGCGTCTCAAGGCACCAAACGATCATATGAACAAATGGAGACTGGTGGGGAGCGCCAGGATGCCACAGAAATCAGAGCATCTGTCGGAAGAATGATTGGTGGAATCGGGAGATTCTACATCCAAATGTGCACTGAACTCAAACTCAGTGATTATGATGGACGACTAATCCAGAATAGCATAACAATAGAGAGGATGGTGCTTTCTGCTTTTGATGAGAGAAGAAATAAATACCTAGAAGAGCATCCCAGTGCTGGGAAGGACCCTAAGAAAACAGGAGGACCCATATATAGAAGAGTAGACGGAAAGTGGATGAGAGAACTCATCCTTTATGACAAAGAAGAAATAAGGAGAGTTTGGCGCCAAGCAAACAATGGCGAAGATGCAACAGCAGGTCTTACTCATATCATGATTTGGCATTCCAACCTGAATGATGCCACATATCAGAGAACAAGAGCGCTTGTTCGCACCGGAATGGATCCCAGAATGTGCTCTCTAATGCAAGGTTCAACACTTCCCAGAAGGTCTGGTGCCGCAGGTGCTGCGGTGAAAGGAGTTGGAACAATAGCAATGGAGTTAATCAGAATGATCAAACGTGGAATCAATGACCGAAATTTCTGGAGGGGTGAAAATGGACGAAGGACAAGGGTTGCTTATGAAAGAATGTGCAATATCCTCAAAGGAAAATTTCAAACAGCTGCCCAGAGGGCAATGATGGATCAAGTAAGAGAAAGTCGAAACCCAGGAAACGCTGAGATTGAAGACCTCATTTTCCTGGCACGGTCAGCACTCATTCTGAGGGGATCAGTTGCACATAAATCCTGCCTGCCTGCTTGTGTGTATGGGCTTGCAGTAGCAAGTGGGCATGACTTTGAAAGGGAAGGGTACTCACTGGTCGGGATAGACCCATTCAAATTACTCCAAAACAGCCAAGTGGTCAGCCTGATGAGACCAAATGAAAACCCAGCTCACAAGAGTCAATTGGTGTGGATGGCATGCCACTCTGCTGCATTTGAAGATTTAAGAGTATCAAGTTTCATAAGAGGAAAGAAAGTGATTCCAAGAGGAAAGCTTTCCACAAGAGGGGTCCAGATTGCTTCAAATGAGAATGTGGAAACCATGGACTCCAATACCCTGGAACTGAGAAGCAGATACTGGGCCATAAGGACCAGGAGTGGAGGAAATACCAATCAACAAAAGGCATCCGCAGGCCAGATCAGTGTGCAGCCTACATTCTCAGTGCAGCGGAATCTCCCTTTTGAAAGAGCAACCGTTATGGCAGCATTCAGCGGGAACAATGAAGGACGGACATCCGACATGCGAACAGAAGTTATAAGAATGATGGAAAGTGCAAAGCCAGAAGATTTGTCCTTCCAGGGGCGGGGAGTCTTCGAGCTCTCGGACGAAAAGGCAACGAACCCGATCGTGCCTTCCTTTGACATGAGTAATGAAGGGTCTTATTTCTTCGGAGACAATGCAGAGGAGTATGACAGTTGA +EPI_ISL_227813 A/California/07/2009 A / H1N1 a0a15825739c1b21b1cac3dbf2bd879c63791358 A_HA_H1 CALI07 HA1 DTLCIGYHANNSTDTVDTVLEKNVTVTHSVNLLEDKHNGKLCKLRGVAPLHLGKCNIAGWILGNPECESLSTASSWSYIVETPSSDNGTCYPGDFIDYEELREQLSSVSSFERFEIFPKTSSWPNHDSNKGVTAACPHAGAKSFYKNLIWLVKKGNSYPKLSKSYINDKGKEVLVLWGIHHPSTSADQQSLYQNADAYVFVGSSRYSKKFKPEIAIRPKVRDQEGRMNYYWTLVEPGDKITFEATGNLVVPRYAFAMERNAGSGIIISDTPVHDCNTTCQTPKGAINTSLPFQNIHPITIGKCPKYVKSTKLRLATGLRNIPSIQS GACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCT EPI_ISL_233740 A/Hong Kong/4801/2014 A / H3N2 C4/S4 bdb31091d2d43140191c38eb61599f568b3a394a A_HA_H3 HK4801 HA QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNVPEKQTRGIFGAIAGFIENGWEGMVDGWYGFRHQNSEGRGQAADLKSTQAAIDQINGKLNRLIGKTNEKFHQIEKEFSEVEGRIQDLEKYVEDTKIDLWSYNAELLVALENQHTIDLTDSEMNKLFEKTKKQLRENAEDMGNGCFKIYHKCDNACIGSIRNGTYDHNVYRDEALNNRFQIKGVELKSGYKDWILWISFAISCFLLCVALLGFIMWACQKGNIRCNICI CAAAAAATTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAGTAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTACACATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCATAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACTAGAGGCATATTTGGCGCAATAGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATTCTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGAAGAATTCAGGACCTTGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATAAGAAATGGAACTTATGACCACAATGTGTACAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATTAGGTGCAACATTTGCATT EPI_ISL_233740 A/Hong Kong/4801/2014 A / H3N2 C4/S4 bdb31091d2d43140191c38eb61599f568b3a394a A_HA_H3 HK4801 HA-signal MKTIIALSYILCLVFA ATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCT EPI_ISL_233740 A/Hong Kong/4801/2014 A / H3N2 C4/S4 bdb31091d2d43140191c38eb61599f568b3a394a A_HA_H3 HK4801 HA1 QKIPGNDNSTATLCLGHHAVPNGTIVKTITNDRIEVTNATELVQNSSIGEICDSPHQILDGENCTLIDALLGDPQCDGFQNKKWDLFVERSKAYSNCYPYDVPDYASLRSLVASSGTLEFNNESFNWTGVTQNGTSSACIRRSSSSFFSRLNWLTHLNYTYPALNVTMPNNEQFDKLYIWGVHHPGTDKDQIFLYAQSSGRITVSTKRSQQAVIPNIGSRPRIRDIPSRISIYWTIVKPGDILLINSTGNLIAPRGYFKIRSGKSSIMRSDAPIGKCKSECITPNGSIPNDKPFQNVNRITYGACPRYVKHSTLKLATGMRNV CAAAAAATTCCTGGAAATGACAATAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACGAATGACCGAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAATTCCTCAATAGGTGAAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGGAGACCCTCAGTGTGATGGCTTTCAAAATAAGAAATGGGACCTTTTTGTTGAACGAAGCAAAGCCTACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGGAGTTTAACAATGAAAGCTTCAATTGGACTGGAGTCACTCAAAACGGAACAAGTTCTGCTTGCATAAGGAGATCTAGTAGTAGTTTCTTTAGTAGATTAAATTGGTTGACCCACTTAAACTACACATACCCAGCATTGAACGTGACTATGCCAAACAATGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGGTACGGACAAGGACCAAATCTTCCTGTATGCTCAATCATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGGGATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAAGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCATTCCAAAATGTAAACAGGATCACATACGGGGCCTGTCCCAGATATGTTAAGCATAGCACTCTGAAATTGGCAACAGGAATGCGAAATGTA @@ -44,4 +34,4 @@ EPI_ISL_233740 A/Hong Kong/4801/2014 A / H3N2 C4/S4 9c6c9e7edfc0640c71b518d40cb2 EPI_ISL_233740 A/Hong Kong/4801/2014 A / H3N2 C4/S4 9c6c9e7edfc0640c71b518d40cb29b7a6fb12f97 A_PA HK4801 PA-X MEDFVRQCFNPMIVELAEKAMKEYGEDLKIETNKFAAICTHLEVCFMYSDFHFINEQGESIVVELDDPNALLKHRFEIIEGRDRTMAWTVVNSICNTTGAGKPKFLPDLYDYKENRFIEIGVTRREVHIYYLEKANKIKSENTHIHIFSFTGEEMATKADYTLDEESRARIKTRLFTIRQEMANRGLWDSFVSPKEAKKQLKKNLKSQELCAGLPTKVSHRTSPALRILEPMWMDSNRTAALRASFLKCPKK* ATGGAAGATTTTGTGCGACAATGCTTCAACCCGATGATTGTCGAACTTGCAGAAAAAGCAATGAAAGAGTATGGGGAGGATCTGAAAATTGAAACCAACAAATTTGCAGCAATATGCACTCACTTGGAGGTCTGTTTCATGTATTCAGATTTCCATTTCATCAATGAACAAGGCGAATCAATAGTAGTAGAACTTGACGATCCAAATGCACTGTTAAAGCACAGATTTGAAATAATCGAGGGGAGAGACAGAACAATGGCATGGACAGTAGTAAACAGTATCTGCAACACTACTGGAGCTGGAAAACCGAAGTTTCTACCGGATTTGTATGATTACAAAGAGAACAGATTCATCGAAATTGGAGTGACAAGGAGAGAAGTCCACATATATTACCTTGAAAAGGCCAATAAGATTAAATCTGAGAACACACACATTCACATTTTTTCATTCACTGGGGAGGAAATGGCCACAAAGGCAGATTACACTCTCGACGAGGAAAGCAGGGCTAGGATCAAAACCAGGCTGTTTACCATAAGACAAGAAATGGCCAACAGAGGCCTCTGGGATTCCTTCGTCAGTCCGAAAGAGGCGAAGAAACAATTGAAGAAAAATTTGAAATCACAGGAACTATGCGCAGGCTTGCCGACCAAAGTCTCCCACCGAACTTCTCCTGCCTTGAGAATTTTAGAGCCTATGTGGATGGATTCGAACCGAACGGCTGCATTGAGGGCAAGCTTTCTCAAATGTCCAAAGAAGTGA EPI_ISL_233740 A/Hong Kong/4801/2014 A / H3N2 C4/S4 991b220a331f094569f0e12ed2beb5391d97847c A_PB1 HK4801 PB1 MDVNPTLLFLKVPAQNAISTTFPYTGDPPYSHGTGTGYTMDTVNRTHQYSERGKWTTNTETGAPQLNPIDGPLPEDNEPSGYAQTDCVLEAMAFLEESHPGIFENSCLETMEAVQQTRVDKLTQGRQTYDWTLNRNQPAATALANTIEVFRTNGLTANESGRLIDYLKDVMESMDKEEMEITTHFQRKRRVRDNMTKKMVTQRTIGKKKQRVNKRGYLIRALTLNTMTKDAERGKLKRRAIATPGMQIRGFVYFVETLARSICEKLEQSGLPVGGNEKKAKLANVVRKMMTNSQDTELSFTITGDNTKWNENQNPRMFLAMITYITKNQPEWFRNILSIAPIMFSNKMARLGKGYMFESKRMKLRTQIPAEMLASIDLKYFNESTRKKIEKIRPLLIDGTASLSPGMMMGMFNMLSTVLGVSILNLGQKKYTKTTYWWDGLQSSDDFALIVNAPNHEGIQAGVDRFYRTCKLVGINMSKKKSYINKTGTFEFTSFFYRYGFVANFSMELPSFGVSGINESADMSIGVTVIKNNMINNDLGPATAQMALQLFIKDYRYTYRCHRGDTQIQTRRSFEIKKLWDQTQSRTGLLVSDGGPNLYNIRNLHIPEVCLKWELMDENYRGRLCNPLNPFVSHKEIESVNNAVVMPAHGPAKSMEYDAVATTHSWIPKRNRSILNTSQRGILEDEQMYQKCCNLFEKFFPSSSYRRPIGISSMVEAMVSRARIDARIDFESGRIKKEEFSEIMKICSTIEELRRQK* ATGGATGTCAATCCGACTCTACTGTTCTTAAAAGTTCCAGCGCAAAATGCCATAAGCACAACATTCCCTTATACTGGAGATCCTCCATACAGCCATGGAACAGGGACAGGGTACACTATGGACACAGTCAACAGAACACACCAATACTCAGAGAGGGGGAAGTGGACGACAAATACAGAAACTGGGGCGCCCCAGCTCAACCCAATTGATGGACCACTACCTGAGGATAATGAACCAAGTGGATATGCACAAACAGACTGTGTCCTGGAGGCTATGGCCTTCCTTGAAGAATCCCACCCAGGTATCTTTGAGAACTCATGCCTTGAAACAATGGAAGCCGTTCAACAAACAAGGGTGGACAAACTAACCCAAGGTCGCCAGACTTATGATTGGACATTAAACAGGAATCAACCGGCAGCAACTGCATTAGCCAACACCATAGAAGTCTTTAGAACGAACGGATTAACAGCTAATGAATCAGGAAGACTAATAGATTACCTCAAGGATGTGATGGAATCAATGGATAAAGAGGAAATGGAGATAACAACACACTTTCAAAGAAAAAGGAGAGTAAGGGACAACATGACCAAGAAAATGGTCACACAAAGAACAATAGGGAAGAAAAAGCAAAGAGTGAACAAGAGAGGCTACCTAATAAGAGCTTTGACATTGAACACGATGACCAAAGATGCAGAGAGAGGCAAATTAAAAAGAAGGGCTATTGCAACACCCGGGATGCAAATTAGAGGGTTCGTGTACTTCGTTGAAACTTTAGCTAGAAGCATTTGCGAAAAGCTTGAACAATCTGGACTTCCGGTTGGGGGTAATGAAAAGAAGGCCAAACTGGCAAATGTTGTGAGAAAAATGATGACTAACTCACAAGACACAGAGCTTTCCTTCACAATCACTGGGGACAACACTAAGTGGAATGAAAATCAAAACCCCCGAATGTTTTTGGCGATGATTACATACATCACAAAGAATCAGCCTGAATGGTTCAGAAACATCCTGAGCATCGCACCAATAATGTTCTCAAACAAAATGGCAAGACTGGGAAAAGGATACATGTTCGAGAGTAAGAGAATGAAGCTCCGGACACAAATACCTGCAGAAATGCTAGCAAGCATTGACCTGAAGTATTTTAATGAATCAACAAGGAAGAAAATTGAGAAAATAAGGCCTCTTCTAATAGATGGCACAGCATCATTGAGCCCTGGAATGATGATGGGCATGTTCAACATGCTAAGTACAGTTTTAGGAGTCTCGATACTGAATCTTGGACAAAAGAAATACACCAAGACAACATACTGGTGGGATGGGCTCCAATCCTCAGACGATTTTGCCCTCATAGTGAATGCACCAAATCATGAGGGAATACAAGCAGGAGTGGATAGATTCTATAGGACCTGCAAGTTAGTGGGAATCAACATGAGCAAAAAGAAGTCCTATATAAATAAAACAGGGACATTTGAATTCACTAGCTTTTTTTATCGATATGGATTTGTGGCTAATTTTAGCATGGAGCTGCCAAGTTTTGGAGTGTCTGGAATAAATGAGTCAGCTGACATGAGCATTGGAGTAACAGTGATAAAGAACAACATGATAAACAATGACCTTGGACCAGCAACAGCCCAAATGGCTCTCCAATTGTTCATCAAAGATTACAGATACACATATCGGTGCCATAGAGGAGACACACAAATCCAAACGAGAAGATCATTCGAGATAAAGAAGCTGTGGGATCAAACCCAATCAAGGACAGGACTATTGGTATCAGATGGGGGACCAAACTTATACAATATCCGGAATCTTCACATCCCTGAAGTCTGCTTAAAGTGGGAGCTGATGGATGAGAATTATCGGGGAAGACTTTGTAATCCCCTGAATCCCTTTGTCAGCCATAAAGAAATTGAATCTGTAAACAATGCTGTAGTAATGCCAGCCCATGGTCCGGCCAAAAGTATGGAATATGATGCCGTTGCAACTACACACTCCTGGATTCCCAAAAGGAACCGCTCTATTCTAAACACAAGCCAAAGGGGAATTCTTGAGGATGAGCAGATGTACCAGAAGTGCTGCAACTTGTTCGAGAAATTTTTCCCTAGTAGTTCATATAGGAGACCGATTGGAATTTCTAGCATGGTGGAGGCCATGGTGTCTAGGGCCCGGATTGATGCCAGAATTGACTTCGAGTCTGGAAGGATTAAGAAGGAAGAGTTCTCTGAGATCATGAAGATCTGTTCCACCATTGAAGAACTCAGACGGCAAAAATAA EPI_ISL_233740 A/Hong Kong/4801/2014 A / H3N2 C4/S4 991b220a331f094569f0e12ed2beb5391d97847c A_PB1 HK4801 PB1-F2 MEQGQGTLWTQSTEHTNTQRGGSGRQIQKLGRPSSTQLMDHYLRIMNQVDMHKQTVSWRLWPSLKNPTQVSLRTHALKQWKPFNKQGWTN* ATGGAACAGGGACAGGGTACACTATGGACACAGTCAACAGAACACACCAATACTCAGAGAGGGGGAAGTGGACGACAAATACAGAAACTGGGGCGCCCCAGCTCAACCCAATTGATGGACCACTACCTGAGGATAATGAACCAAGTGGATATGCACAAACAGACTGTGTCCTGGAGGCTATGGCCTTCCTTGAAGAATCCCACCCAGGTATCTTTGAGAACTCATGCCTTGAAACAATGGAAGCCGTTCAACAAACAAGGGTGGACAAACTAA................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. -EPI_ISL_233740 A/Hong Kong/4801/2014 A / H3N2 C4/S4 bd266fa205c93cd1860b6df8a821ca1ba428a8fe A_PB2 HK4801 PB2 MERIKELRNLMSQSRTREILTKTTVDHMAIIKKYTSGRQEKNPSLRMKWMMAMKYPITADKRVTEMVPERNEQGQTLWSKMSDAGSDRVMVSPLAVTWWNRNGPVTSTVHYPKVYKTYFDKVERLKHGTFGPVHFRNQVKIRRRVDINPGHADLSAKEAQDVIMEVVFPNEVGARILTSESQLTITKEKKEELRDCKISPLMVAYMLERELVRKTRFLPVAGGTSSIYIEVLHLTQGTCWEQMYTPGGGVRNDDVDQSLIIAARNIVRRAAVSADPLASLLEMCHSTQIGGTRMVDILRQNPTEEQAVDICKAAMGLRISSSFSFGGFTFKRTSGSSVKKEEEVLTGNLQTLRIRVHEGYEEFTMVGKRATAILRKATRRLVQLIVSGRDEQSIAEAIIVAMVFSQEDCMIKAVRGDLNFVNRANQRLNPMHQLLRHFQKDAKVLFQNWGVEHIDSVMGMVGVLPDMTPSTEMSMRGIRVSKMGVDEYSSTERVVVSIDRFLRVRDQRGNVLLSPEEVSETQGTERLTITYSSSMMWEINGPESVLVNTYQWIIRNWEAVKIQWSQNPAMLYNKMEFEPFQSLVPKAIRSQYSGFVRTLFQQMRDVLGTFDTAQIIKLLPFAAAPPKQSRMQFSSLTVNVRGSGMRILVRGNSPVFNYNKTTKRLTILGKDAGTLIEDPDESTSGVESAVLRGFLIIGKEDRRYGPALSINELSNLAKGEKANVLIGQGDVVLVMKRKRDSSILTDSQTATKRIRMAIN* ATGGAAAGAATAAAAGAACTACGGAATCTGATGTCGCAGTCTCGCACTCGCGAGATACTGACAAAAACCACAGTGGACCATATGGCCATAATTAAGAAGTACACATCGGGGAGACAGGAAAAGAACCCGTCACTTAGGATGAAATGGATGATGGCAATGAAATATCCAATCACTGCTGACAAAAGGGTAACAGAAATGGTTCCGGAGAGAAATGAACAAGGACAAACTCTATGGAGCAAAATGAGTGATGCTGGATCAGATAGAGTGATGGTATCACCTTTGGCTGTAACATGGTGGAATAGGAATGGACCCGTGACAAGTACGGTCCATTACCCAAAAGTGTACAAAACTTATTTCGACAAAGTCGAAAGGTTAAAACATGGAACCTTTGGCCCTGTCCATTTTAGAAATCAAGTCAAGATACGCAGAAGAGTAGACATAAACCCTGGTCATGCAGACCTCAGTGCCAAAGAGGCACAAGATGTAATTATGGAAGTTGTTTTTCCCAATGAAGTGGGAGCCAGAATACTAACATCAGAATCACAACTAACAATAACTAAAGAGAAAAAAGAAGAACTCCGAGATTGCAAAATTTCTCCCTTGATGGTCGCATACATGTTAGAGAGAGAACTTGTGCGGAAAACAAGATTTCTCCCAGTTGCTGGCGGAACAAGCAGTATATACATTGAAGTTTTACATTTGACTCAAGGAACGTGTTGGGAACAAATGTACACTCCAGGTGGAGGAGTGAGGAATGACGATGTTGACCAAAGCCTAATTATTGCGGCCAGGAACATAGTAAGAAGAGCCGCAGTATCAGCAGATCCATTAGCATCTTTATTGGAGATGTGCCACAGCACGCAAATTGGCGGAACAAGGATGGTGGACATTCTTAGACAGAACCCGACTGAAGAACAAGCTGTGGATATATGCAAGGCTGCAATGGGATTGAGAATCAGCTCATCCTTCAGCTTTGGTGGCTTTACATTTAAAAGAACAAGCGGGTCGTCAGTCAAAAAAGAAGAAGAGGTGCTTACAGGCAATCTCCAAACATTGAGAATAAGAGTACATGAGGGGTATGAGGAGTTCACAATGGTGGGGAAAAGAGCAACAGCTATACTAAGAAAAGCAACCAGAAGATTGGTTCAACTCATAGTGAGTGGAAGAGACGAACAGTCAATAGCCGAAGCAATAATCGTGGCCATGGTGTTTTCACAAGAAGATTGCATGATAAAAGCAGTTAGAGGTGACCTGAATTTTGTCAACAGAGCAAATCAGCGGTTGAACCCCATGCATCAGCTTTTAAGGCATTTTCAGAAAGATGCGAAAGTACTCTTTCAAAATTGGGGAGTTGAACACATCGACAGTGTGATGGGAATGGTTGGAGTATTACCAGATATGACTCCAAGCACAGAGATGTCAATGAGAGGAATAAGAGTCAGCAAAATGGGTGTGGATGAATACTCCAGTACAGAGAGGGTGGTGGTTAGCATTGATCGGTTTTTGAGAGTTCGAGACCAACGTGGGAATGTATTATTATCTCCTGAGGAGGTTAGTGAAACACAGGGAACTGAGAGACTGACAATAACTTATTCATCGTCGATGATGTGGGAGATTAACGGTCCTGAGTCAGTCTTGGTCAATACCTATCAATGGATCATCAGGAATTGGGAAGCTGTTAAAATTCAATGGTCTCAGAATCCTGCAATGTTGTACAACAAAATGGAATTTGAACCATTTCAATCTTTAGTCCCCAAGGCCATTAGAAGCCAATACAGTGGGTTTGTCAGAACTCTATTCCAACAAATGAGAGACGTACTTGGGACATTTGACACTGCCCAGATAATAAAGCTTCTCCCTTTTGCAGCTGCTCCACCGAAGCAAAGCAGAATGCAGTTCTCTTCACTGACTGTGAATGTGAGGGGATCAGGGATGAGAATACTTGTAAGGGGCAATTCTCCTGTATTCAACTACAACAAGACCACTAAAAGGCTAACAATTCTCGGAAAAGATGCCGGCACTTTAATTGAAGACCCAGATGAAAGCACATCCGGAGTGGAGTCCGCCGTCTTGAGAGGGTTCCTCATTATAGGTAAAGAAGACAGAAGATACGGACCTGCATTAAGCATCAATGAACTGAGTAACCTTGCAAAAGGAGAAAAGGCTAATGTGCTAATTGGGCAAGGAGACGTGGTGTTGGTAATGAAACGAAAACGGGACTCTAGTATACTTACTGACAGCCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAA +EPI_ISL_233740 A/Hong Kong/4801/2014 A / H3N2 C4/S4 bd266fa205c93cd1860b6df8a821ca1ba428a8fe A_PB2 HK4801 PB2 MERIKELRNLMSQSRTREILTKTTVDHMAIIKKYTSGRQEKNPSLRMKWMMAMKYPITADKRVTEMVPERNEQGQTLWSKMSDAGSDRVMVSPLAVTWWNRNGPVTSTVHYPKVYKTYFDKVERLKHGTFGPVHFRNQVKIRRRVDINPGHADLSAKEAQDVIMEVVFPNEVGARILTSESQLTITKEKKEELRDCKISPLMVAYMLERELVRKTRFLPVAGGTSSIYIEVLHLTQGTCWEQMYTPGGGVRNDDVDQSLIIAARNIVRRAAVSADPLASLLEMCHSTQIGGTRMVDILRQNPTEEQAVDICKAAMGLRISSSFSFGGFTFKRTSGSSVKKEEEVLTGNLQTLRIRVHEGYEEFTMVGKRATAILRKATRRLVQLIVSGRDEQSIAEAIIVAMVFSQEDCMIKAVRGDLNFVNRANQRLNPMHQLLRHFQKDAKVLFQNWGVEHIDSVMGMVGVLPDMTPSTEMSMRGIRVSKMGVDEYSSTERVVVSIDRFLRVRDQRGNVLLSPEEVSETQGTERLTITYSSSMMWEINGPESVLVNTYQWIIRNWEAVKIQWSQNPAMLYNKMEFEPFQSLVPKAIRSQYSGFVRTLFQQMRDVLGTFDTAQIIKLLPFAAAPPKQSRMQFSSLTVNVRGSGMRILVRGNSPVFNYNKTTKRLTILGKDAGTLIEDPDESTSGVESAVLRGFLIIGKEDRRYGPALSINELSNLAKGEKANVLIGQGDVVLVMKRKRDSSILTDSQTATKRIRMAIN* ATGGAAAGAATAAAAGAACTACGGAATCTGATGTCGCAGTCTCGCACTCGCGAGATACTGACAAAAACCACAGTGGACCATATGGCCATAATTAAGAAGTACACATCGGGGAGACAGGAAAAGAACCCGTCACTTAGGATGAAATGGATGATGGCAATGAAATATCCAATCACTGCTGACAAAAGGGTAACAGAAATGGTTCCGGAGAGAAATGAACAAGGACAAACTCTATGGAGCAAAATGAGTGATGCTGGATCAGATAGAGTGATGGTATCACCTTTGGCTGTAACATGGTGGAATAGGAATGGACCCGTGACAAGTACGGTCCATTACCCAAAAGTGTACAAAACTTATTTCGACAAAGTCGAAAGGTTAAAACATGGAACCTTTGGCCCTGTCCATTTTAGAAATCAAGTCAAGATACGCAGAAGAGTAGACATAAACCCTGGTCATGCAGACCTCAGTGCCAAAGAGGCACAAGATGTAATTATGGAAGTTGTTTTTCCCAATGAAGTGGGAGCCAGAATACTAACATCAGAATCACAACTAACAATAACTAAAGAGAAAAAAGAAGAACTCCGAGATTGCAAAATTTCTCCCTTGATGGTCGCATACATGTTAGAGAGAGAACTTGTGCGGAAAACAAGATTTCTCCCAGTTGCTGGCGGAACAAGCAGTATATACATTGAAGTTTTACATTTGACTCAAGGAACGTGTTGGGAACAAATGTACACTCCAGGTGGAGGAGTGAGGAATGACGATGTTGACCAAAGCCTAATTATTGCGGCCAGGAACATAGTAAGAAGAGCCGCAGTATCAGCAGATCCATTAGCATCTTTATTGGAGATGTGCCACAGCACGCAAATTGGCGGAACAAGGATGGTGGACATTCTTAGACAGAACCCGACTGAAGAACAAGCTGTGGATATATGCAAGGCTGCAATGGGATTGAGAATCAGCTCATCCTTCAGCTTTGGTGGCTTTACATTTAAAAGAACAAGCGGGTCGTCAGTCAAAAAAGAAGAAGAGGTGCTTACAGGCAATCTCCAAACATTGAGAATAAGAGTACATGAGGGGTATGAGGAGTTCACAATGGTGGGGAAAAGAGCAACAGCTATACTAAGAAAAGCAACCAGAAGATTGGTTCAACTCATAGTGAGTGGAAGAGACGAACAGTCAATAGCCGAAGCAATAATCGTGGCCATGGTGTTTTCACAAGAAGATTGCATGATAAAAGCAGTTAGAGGTGACCTGAATTTTGTCAACAGAGCAAATCAGCGGTTGAACCCCATGCATCAGCTTTTAAGGCATTTTCAGAAAGATGCGAAAGTACTCTTTCAAAATTGGGGAGTTGAACACATCGACAGTGTGATGGGAATGGTTGGAGTATTACCAGATATGACTCCAAGCACAGAGATGTCAATGAGAGGAATAAGAGTCAGCAAAATGGGTGTGGATGAATACTCCAGTACAGAGAGGGTGGTGGTTAGCATTGATCGGTTTTTGAGAGTTCGAGACCAACGTGGGAATGTATTATTATCTCCTGAGGAGGTTAGTGAAACACAGGGAACTGAGAGACTGACAATAACTTATTCATCGTCGATGATGTGGGAGATTAACGGTCCTGAGTCAGTCTTGGTCAATACCTATCAATGGATCATCAGGAATTGGGAAGCTGTTAAAATTCAATGGTCTCAGAATCCTGCAATGTTGTACAACAAAATGGAATTTGAACCATTTCAATCTTTAGTCCCCAAGGCCATTAGAAGCCAATACAGTGGGTTTGTCAGAACTCTATTCCAACAAATGAGAGACGTACTTGGGACATTTGACACTGCCCAGATAATAAAGCTTCTCCCTTTTGCAGCTGCTCCACCGAAGCAAAGCAGAATGCAGTTCTCTTCACTGACTGTGAATGTGAGGGGATCAGGGATGAGAATACTTGTAAGGGGCAATTCTCCTGTATTCAACTACAACAAGACCACTAAAAGGCTAACAATTCTCGGAAAAGATGCCGGCACTTTAATTGAAGACCCAGATGAAAGCACATCCGGAGTGGAGTCCGCCGTCTTGAGAGGGTTCCTCATTATAGGTAAAGAAGACAGAAGATACGGACCTGCATTAAGCATCAATGAACTGAGTAACCTTGCAAAAGGAGAAAAGGCTAATGTGCTAATTGGGCAAGGAGACGTGGTGTTGGTAATGAAACGAAAACGGGACTCTAGTATACTTACTGACAGCCAGACAGCGACCAAAAGAATTCGGATGGCCATCAATTAA \ No newline at end of file diff --git a/docker-compose-all.yml b/docker-compose-all.yml deleted file mode 100644 index af21926..0000000 --- a/docker-compose-all.yml +++ /dev/null @@ -1,75 +0,0 @@ -x-python-image: &python-image python:3.10-alpine - -x-python-version: &python-version python3.10 - -x-pyarrow-image: &pyarrow-image mira-nf:pyarrow-alpine - -x-mira-nf-image: &mira-nf-image mira-nf:python3.10-alpine - -x-data-volume: &data-volume - type: bind - source: /home/xpa3/FLU_SC2_SEQUENCING - target: /data - -services: - multiqc: - container_name: multiqc - image: mira-nf:multiqc-alpine - build: - context: . - dockerfile: multiqc/Dockerfile-multiqc - args: - python_image: *python-image - python_version: *python-version - multiqc_version: 1.19 - restart: always - volumes: - - *data-volume - command: tail -f /dev/null - - fastqc: - container_name: fastqc - image: mira-nf:fastqc-alpine - build: - context: . - dockerfile: fastqc/Dockerfile-fastqc - args: - python_image: *python-image - python_version: *python-version - fastqc_version: 0.12.1 - restart: always - volumes: - - *data-volume - command: tail -f /dev/null - - pyarrow: - container_name: pyarrow - image: *pyarrow-image - build: - context: . - dockerfile: pyarrow/Dockerfile-pyarrow - args: - python_image: *python-image - ARROW_VERSION: 17.0.0 - restart: always - volumes: - - *data-volume - command: tail -f /dev/null - - mira-nf: - container_name: mira-nf - image: *mira-nf-image - build: - context: . - dockerfile: Dockerfile - args: - python_image: *pyarrow-image - python_version: *python-version - depends_on: - - multiqc - - fastqc - - pyarrow - restart: always - volumes: - - *data-volume - command: tail -f /dev/null diff --git a/docker-compose-cdcgov-all.yml b/docker-compose-cdcgov-all.yml deleted file mode 100644 index b88ef1f..0000000 --- a/docker-compose-cdcgov-all.yml +++ /dev/null @@ -1,38 +0,0 @@ -x-multiqc-image: &multiqc-image cdcgov/multiqc:v1.19-alpine - -x-fastqc-image: &fastqc-image cdcgov/fastqc:v0.12.1-alpine - -x-mira-nf-image: &mira-nf-image cdcgov/mira-nf:python3.10-alpine - -x-data-volume: &data-volume - type: bind - source: /home/snu3/Github/FLU_SC2_SEQUENCING - target: /data - -services: - multiqc: - container_name: multiqc - image: *multiqc-image - restart: always - volumes: - - *data-volume - command: tail -f /dev/null - - fastqc: - container_name: fastqc - image: *fastqc-image - restart: always - volumes: - - *data-volume - command: tail -f /dev/null - - mira-nf: - container_name: mira-nf - image: *mira-nf-image - depends_on: - - multiqc - - fastqc - restart: always - volumes: - - *data-volume - command: tail -f /dev/null diff --git a/docker-compose-git-all.yml b/docker-compose-git-all.yml deleted file mode 100644 index 5f9a998..0000000 --- a/docker-compose-git-all.yml +++ /dev/null @@ -1,77 +0,0 @@ -x-mira-nf-git-repo: &mira-nf-git-repo https://github.com/CDCgov/MIRA-NF.git#master - -x-python-image: &python-image python:3.10-alpine - -x-python-version: &python-version python3.10 - -x-pyarrow-image: &pyarrow-image mira-nf:pyarrow-alpine - -x-mira-nf-image: &mira-nf-image mira-nf:python3.10-alpine - -x-data-volume: &data-volume - type: bind - source: /home/snu3/Github/FLU_SC2_SEQUENCING - target: /data - -services: - multiqc: - container_name: multiqc - image: mira-nf:multiqc-alpine - build: - context: *mira-nf-git-repo - dockerfile: multiqc/Dockerfile-multiqc - args: - python_image: *python-image - python_version: *python-version - multiqc_version: 1.19 - restart: always - volumes: - - *data-volume - command: tail -f /dev/null - - fastqc: - container_name: fastqc - image: mira-nf:fastqc-alpine - build: - context: *mira-nf-git-repo - dockerfile: fastqc/Dockerfile-fastqc - args: - python_image: *python-image - python_version: *python-version - fastqc_version: 0.12.1 - restart: always - volumes: - - *data-volume - command: tail -f /dev/null - - pyarrow: - container_name: pyarrow - image: *pyarrow-image - build: - context: *mira-nf-git-repo - dockerfile: pyarrow/Dockerfile-pyarrow - args: - python_image: *python-image - ARROW_VERSION: 17.0.0 - restart: always - volumes: - - *data-volume - command: tail -f /dev/null - - mira-nf: - container_name: mira-nf - image: *mira-nf-image - build: - context: *mira-nf-git-repo - dockerfile: Dockerfile - args: - python_image: *pyarrow-image - python_version: *python-version - depends_on: - - multiqc - - fastqc - - pyarrow - restart: always - volumes: - - *data-volume - command: tail -f /dev/null diff --git a/docker-compose-git.yml b/docker-compose-git.yml deleted file mode 100644 index 1eae6ba..0000000 --- a/docker-compose-git.yml +++ /dev/null @@ -1,27 +0,0 @@ -x-mira-nf-git-repo: &mira-nf-git-repo https://github.com/CDCgov/MIRA-NF.git#master - -x-python-version: &python-version python3.10 - -x-pyarrow-image: &pyarrow-image cdcgov/pyarrow:v17.0.0-alpine - -x-mira-nf-image: &mira-nf-image mira-nf:python3.10-alpine - -x-data-volume: &data-volume - type: bind - source: /home/snu3/Github/FLU_SC2_SEQUENCING - target: /data - -services: - mira-nf: - container_name: mira-nf - image: *mira-nf-image - build: - context: *mira-nf-git-repo - dockerfile: Dockerfile - args: - python_image: *pyarrow-image - python_version: *python-version - restart: always - volumes: - - *data-volume - command: tail -f /dev/null diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index db0d2a9..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,25 +0,0 @@ -x-python-version: &python-version python3.10 - -x-pyarrow-image: &pyarrow-image cdcgov/pyarrow:v17.0.0-alpine - -x-mira-nf-image: &mira-nf-image mira-nf:python3.10-alpine - -x-data-volume: &data-volume - type: bind - source: /home/snu3/Github/FLU_SC2_SEQUENCING - target: /data - -services: - mira-nf: - container_name: mira-nf - image: *mira-nf-image - build: - context: . - dockerfile: Dockerfile - args: - python_image: *pyarrow-image - python_version: *python-version - restart: always - volumes: - - *data-volume - command: tail -f /dev/null diff --git a/docker_files/fixed_vulnerability_pkgs.sh b/docker_files/fixed_vulnerability_pkgs.sh deleted file mode 100644 index 9269c82..0000000 --- a/docker_files/fixed_vulnerability_pkgs.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Wrapper to install Python packages - -# Path to requirement file -PROJECT_DIR=${PROJECT_DIR:-/fastqc} -FIXED_PKGS="${PROJECT_DIR}/fixed_vulnerability_pkgs.txt" -FIXED_PKGS_CLEAN="${PROJECT_DIR}/fixed_vulnerability_pkgs_clean.txt" - -echo "Fixed packages file: $FIXED_PKGS" - -# Install updated version of Python packages if the file exists -if [[ -f "$FIXED_PKGS" ]]; then - echo "Updating Python packages..." - - # Remove blank lines from the file - awk NF <"$FIXED_PKGS" >"$FIXED_PKGS_CLEAN" - - # Get number of packages in the file - n=$(wc -l <"$FIXED_PKGS_CLEAN") - i=1 - - while [[ $i -le $n ]]; do - echo "Processing package #$i" - - # Get the package name and version - updated_pkg=$(sed -n "${i}p" "$FIXED_PKGS_CLEAN" | tr -d '\r') - echo "Package to update: $updated_pkg" - - pkg_name=$(echo "$updated_pkg" | sed -E 's/(.*)==(.*)/\1/') - echo "Package name: $pkg_name" - - # Check if package is already installed - check_pip_pkg=$(pip list --format=freeze | grep -w "$pkg_name") - echo "Currently installed: ${check_pip_pkg:-none}" - - # Update package if it exists - if [[ -n "$check_pip_pkg" ]]; then - pip install --no-cache-dir "$updated_pkg" - fi - - ((i++)) - done - - echo "All packages updated." -fi diff --git a/docker_files/fixed_vulnerability_pkgs.txt b/docker_files/fixed_vulnerability_pkgs.txt deleted file mode 100644 index 6723484..0000000 --- a/docker_files/fixed_vulnerability_pkgs.txt +++ /dev/null @@ -1,3 +0,0 @@ -setuptools==70.0.0 -Werkzeug==3.0.3 -idna==3.10 diff --git a/docker_files/remove_vulnerability_pkgs.sh b/docker_files/remove_vulnerability_pkgs.sh deleted file mode 100644 index 475a007..0000000 --- a/docker_files/remove_vulnerability_pkgs.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash -# Wrapper to remove vulnerability packages - -PROJECT_DIR=${PROJECT_DIR:-/mira-nf} -python_version=${python_version:-python3.10} -remove_pkgs=${PROJECT_DIR}/remove_vulnerability_pkgs.txt -remove_pkgs_clean=${PROJECT_DIR}/remove_vulnerability_pkgs_clean.txt - -echo $remove_pkgs - -if [[ -f ${remove_pkgs} ]]; then - - echo "Remove vulnerability packages" - - # Remove blank lines from the file and save a cleaner version of it - awk NF <${remove_pkgs} >${remove_pkgs_clean} - - # Get number of packages in the file - n=$(wc -l <${remove_pkgs_clean}) - i=1 - #!/bin/bash - # Wrapper to remove vulnerable Python packages - - PROJECT_DIR=${PROJECT_DIR:-/mira-nf} - PYTHON_VERSION=${python_version:-python3.10} - REMOVE_PKGS="${PROJECT_DIR}/remove_vulnerability_pkgs.txt" - REMOVE_PKGS_CLEAN="${PROJECT_DIR}/remove_vulnerability_pkgs_clean.txt" - - echo "Remove packages file: $REMOVE_PKGS" - - # Remove packages if the file exists - if [[ -f "$REMOVE_PKGS" ]]; then - echo "Removing vulnerability packages..." - - # Remove blank lines from the file - awk NF <"$REMOVE_PKGS" >"$REMOVE_PKGS_CLEAN" - - # Get number of packages - n=$(wc -l <"$REMOVE_PKGS_CLEAN") - i=1 - - while [[ $i -le $n ]]; do - echo "Processing package #$i" - - # Get the package name - pkg_name=$(sed -n "${i}p" "$REMOVE_PKGS_CLEAN" | tr -d '\r') - echo "Removing package: $pkg_name" - - # Remove package directories if they exist - find "/usr/local/lib/${PYTHON_VERSION}/site-packages" -name "*${pkg_name}*" -exec rm -rf {} \; - - ((i++)) - done - - echo "All vulnerable packages removed." - fi - - while [[ i -le $n ]]; do - echo $i - # Get the name of the package - pkg_name=$(head -${i} ${remove_pkgs_clean} | tail -1 | sed 's,\r,,g') - echo $pkg_name - # Remove the package if it exists - find /usr/local/lib/${python_version}/site-packages -name "*${pkg_name}*" -exec rm -rf {} \; - # Go to next file - i=$(($i + 1)) - done - - # Return message to keep the process going - echo "Done" - -fi diff --git a/docker_files/remove_vulnerability_pkgs.txt b/docker_files/remove_vulnerability_pkgs.txt deleted file mode 100644 index 8b13789..0000000 --- a/docker_files/remove_vulnerability_pkgs.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/docker_files/requirements.txt b/docker_files/requirements.txt deleted file mode 100644 index dbb5074..0000000 --- a/docker_files/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ - -pyarrow==17.0.0 -pandas==2.0.3 -numpy==1.23.1 -plotly==5.11.0 -pulp==2.7.0 -openpyxl==3.1.0 -pyyaml==6.0.2 -datetime diff --git a/docs/find_positions_of_interest_docs/README.md b/docs/find_positions_of_interest_docs/README.md index 2764252..3797b62 100644 --- a/docs/find_positions_of_interest_docs/README.md +++ b/docs/find_positions_of_interest_docs/README.md @@ -4,7 +4,7 @@ The **find_positions_of_int** workflow is a handy tool that runs (or reruns) DAI To get started, you'll need to provide the DAIS-ribosome input, a reference table, and a positions of interest table. Once it's done, you'll get a CSV file that lists all the positions in your input sequences that match the positions you're interested in from the positions of interest table you provided. Check out the info below for more details about how it works. -![find_positions_of_int workflow](../../docs/images/find_variants_of_interst_workflow_img_v2.png) +![find_positions_of_int workflow](../../docs/images/find_variants_of_interst_workflow_img_v3.png) *find_positions_of_int workflow* ### The DAIS-ribosome input diff --git a/docs/find_variants_of_interest_docs/README.md b/docs/find_variants_of_interest_docs/README.md index 11ebf43..5e8160e 100644 --- a/docs/find_variants_of_interest_docs/README.md +++ b/docs/find_variants_of_interest_docs/README.md @@ -4,7 +4,7 @@ The **find_variants_of_int** workflow is a handy tool that runs (or reruns) DAIS To get started, you'll need to provide the DAIS-ribosome input, a reference table, and a variants of interest table. Once it's done, you'll get a CSV file that lists all the positions in your input sequences that match the variants you're interested in from the variants of interest table you provided. Check out the info below for more details about how it works. -![find_variants_of_int workflow](../../docs/images/find_variants_of_interst_workflow_img_v2.png) +![find_variants_of_int workflow](../../docs/images/find_variants_of_interst_workflow_img_v3.png) *find_variants_of_int workflow* ### The DAIS-ribosome input diff --git a/docs/images/find_variants_of_interst_workflow_img_v2.png b/docs/images/find_variants_of_interst_workflow_img_v2.png deleted file mode 100644 index afc01d6..0000000 Binary files a/docs/images/find_variants_of_interst_workflow_img_v2.png and /dev/null differ diff --git a/docs/images/find_variants_of_interst_workflow_img_v3.png b/docs/images/find_variants_of_interst_workflow_img_v3.png new file mode 100644 index 0000000..91b697a Binary files /dev/null and b/docs/images/find_variants_of_interst_workflow_img_v3.png differ diff --git a/docs/images/mira_nf_workflow_img_v6.png b/docs/images/mira_nf_workflow_img_v6.png deleted file mode 100644 index f8d422a..0000000 Binary files a/docs/images/mira_nf_workflow_img_v6.png and /dev/null differ diff --git a/docs/images/mira_nf_workflow_img_v7.png b/docs/images/mira_nf_workflow_img_v7.png new file mode 100644 index 0000000..eb8e20f Binary files /dev/null and b/docs/images/mira_nf_workflow_img_v7.png differ diff --git a/docs/output.md b/docs/output.md index 879f47e..1aa9843 100644 --- a/docs/output.md +++ b/docs/output.md @@ -21,24 +21,25 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ## Output Structure ```bash -|---outputs - |---aggregate_outputs - |---multiqc (when applicable) --> multiqc outputs - |---dais-ribosome -> dais inputs and outputs - |---dash-json -> json files - |---mira-reports -> the aggregated fasta files and html files - |---csv-reports -> CSV summary files - |---parquet-reports (when applicable) - |---Sample_ID - |---subsampled-reads (when applicable) -> fastqs and log files - |---barcode-trimmed-reads (when applicable) -> fastqs and log files - |---primer-trimmed-reads (when applicable) -> fastqs and log files - |---IRMA/Sample_ID -> IRMA outputs and log files - |---IRMA-negative (when applicable) +|---outputs/ + |---aggregate_outputs/ + |---multiqc/ (when applicable) --> multiqc outputs + |---dais-ribosome/ -> dais inputs and outputs + |---dash-json/ -> json files + |---mira-reports/ -> the aggregated fasta files and html files + |---csv-reports/ -> CSV summary files + |---parquet-reports/ (when applicable) + |---Sample_ID/ + |---subsampled-reads/ (when applicable) -> fastqs and log files + |---barcode-trimmed-reads/ (when applicable) -> fastqs and log files + |---primer-trimmed-reads/ (when applicable) -> fastqs and log files + |---IRMA/Sample_ID/ -> IRMA outputs and log files + |---IRMA-negative/ (when applicable) |---nextclade -> Inputs and outputs for Nextclade - |---input_fasta_files -> Input FASTA files for running Nextclade - |---fastq_pass -> ONT data only – concatenated fastqs - |---pipeline_info -> execution reports + |---input_fasta_files/ -> Input FASTA files for running Nextclade + |--- All nextclade outputs files including aligned fastas, auspice json, and csv files. + |---fastq_pass/ -> ONT data only – concatenated fastqs + |---pipeline_info/ -> execution reports, sad_samples.tsv and program versions file ``` ### MultiQC diff --git a/docs/usage.md b/docs/usage.md index 18566c3..c076d32 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -43,43 +43,43 @@ The sample sheet will need to be set up as seen below. Using the samplesheet tha Illumina data should be set up as follows: ```csv -Sample ID,Sample Type +sample_id,sample_type sample_1,Test sample_2,Test sample_3,Test sample_4,Test ``` - Each row represents a sample. -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `Sample ID` | Custom sample name. This entry must match the name associated with the paired reads. Convert all spaces in sample names to underscores (`_`). | -| `Sample Type` | The sample type for the given sample. Ex: test, - control, + control, etc. | - -**Important things to note about samplesheet:** +| Column | Description | +|------------|-----------------------------------------------------------------------------------------------------------| +| `sample_id` | Custom sample name. This entry must match the name associated with the paired reads. Convert all spaces in sample names to underscores (`_`). | +| `sample_type` | The sample type for the given sample. Ex: test, - control, + control, etc. | -- Sample names within the "Sample ID" column need to be unique. -- Be sure that sample names are not nested within another sample name (i.e. having sample_1 and sample_1_1) -- Be sure that there are no empty lines at the end of the samplesheet. -- For Illumina samples be sure that you have read 1 and read 2 for all samples in samplesheet. ONT data should be set up as follows: ```csv -Barcode #,Sample ID,Sample Type +barcode,sample_id,sample_type barcode07,s1,Test barcode37,s2,Test barcode41,s3,Test ``` - Each row represents a sample. -| Column | Description | -| --------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `Barcode #` | The barcode used to create the ONT data for this sample. Must match the fold contain the fastq files associated with the sample. Single digit numbers must have 0 in front of them. Ex: barcode07 | -| `Sample ID` | Custom sample name. Convert all spaces in sample names to underscores (`_`). | -| `Sample Type` | The sample type for the given sample. Ex: test, positive, negative, etc. | +| Column | Description | +|------------|-----------------------------------------------------------------------------------------------------------| +| `barcode` | The barcode used to create the ONT data for this sample. Must match the fold contain the fastq files associated with the sample. Single digit numbers must have 0 in front of them. Ex: barcode07 | +| `sample_id` | Custom sample name. Convert all spaces in sample names to underscores (`_`). | +| `sample_type` | The sample type for the given sample. Ex: test, positive, negative, etc. | + +**Important things to note about samplesheet:** + +- Sample names within the "Sample ID" column need to be unique. +- The headers must be named as seen above. +- Be sure that there are no empty lines at the end of the samplesheet. +- For Illumina samples be sure that you have read 1 and read 2 for all samples in samplesheet. +- Illumina fastq file must be in this format: {sample_id}_R1\*fastq\* or {sample_id}_R1\*fq\* AND {sample_id}_R2\*fastq\* or {sample_id}_R2\*fq\* ### amd platform samplesheet set up @@ -93,12 +93,12 @@ TREATMENT_REP2,/fastqs/AEG588A5_S5_L003_R1_001.fastq.gz,/f TREATMENT_REP3,/fastqs/AEG588A6_S6_L003_R1_001.fastq.gz,/fastqs/AEG588A6_S6_L003_R2_001.fastq.gz,test ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `sample_type` | The sample type for the given sample. Ex: test, - control, + control, etc. | +| Column | Description | +|------------|-----------------------------------------------------------------------------------------------------------| +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `sample_type` | The sample type for the given sample. Ex: test, - control, + control, etc. | ONT data should be set up as follows: @@ -110,13 +110,13 @@ s3,/fastq_pass/cat_fastqs/s3.fastq.gz,,barcode41,Test ``` -| Column | Description | -| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for ONT that have been concatenated by barcode. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Leave blank for ONT data. | +| Column | Description | +|------------|-----------------------------------------------------------------------------------------------------------| +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for ONT that have been concatenated by barcode. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Leave blank for ONT data. | | `barcode` | The barcode used to create the ONT data for this sample. Must match the fold contain the fastq files associated with the sample. Single digit numbers must have 0 in front of them. Ex: barcode07 | -| `sample_type` | The sample type for the given sample. Ex: test, - control, + control, etc. | +| `sample_type` | The sample type for the given sample. Ex: test, - control, + control, etc. | ### File set-up with MIRA samplesheet @@ -162,8 +162,8 @@ Oxford Nanopore set up should be set up as follows: ### *all commands listed below can not be included in run command and the defaults will be used, aside from the p flag that must be used wit hSC2 and RSV pipelines* -| Flag | Description | -|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Flag | Description | +|------------|-----------------------------------------------------------------------------------------------------------------------| | `p` | Provide a built-in primer schema if using experiment type SC2-Whole-Genome-Illumina. SARS-CoV-2 options: articv3, articv4, articv4.1, articv5.3.2, qiagen, swift, swift_211206. RSV options: RSV_CDC_8amplicon_230901 **Will be overwritten by custom_primers flag if both flags are provided** | | `custom_primers` | Provide a custom primer schema by entering the file path to your own custom primer fasta file. Must be fasta formatted. **primer_kmer_len and primer_restrict_window flags must also be used with this flag** | | `primer_kmer_len` | When primer_kmer_len is set to K, all K-mers for the primers are stored and matching against K-mers in the queries (reads) is performed. | diff --git a/modules/local/checkmiraversion.nf b/modules/local/checkmiraversion.nf index a13766c..c9a6a33 100644 --- a/modules/local/checkmiraversion.nf +++ b/modules/local/checkmiraversion.nf @@ -1,7 +1,7 @@ process CHECKMIRAVERSION { label 'process_single' - container 'cdcgov/mira-oxide:v1.3.1' + container 'cdcgov/mira-oxide:v1.4.0' input: path description_file_path diff --git a/modules/local/findchemistry.nf b/modules/local/findchemistry.nf index aa42bb0..9da80aa 100644 --- a/modules/local/findchemistry.nf +++ b/modules/local/findchemistry.nf @@ -2,7 +2,7 @@ process FINDCHEMISTRY { tag "${sample}" label 'process_single' - container 'cdcgov/mira-oxide:v1.3.1' + container 'cdcgov/mira-oxide:v1.4.0' input: tuple val(sample), path(fastq) diff --git a/modules/local/nextflowsamplesheet.nf b/modules/local/nextflowsamplesheet.nf new file mode 100644 index 0000000..67506b9 --- /dev/null +++ b/modules/local/nextflowsamplesheet.nf @@ -0,0 +1,42 @@ +process NEXTFLOWSAMPLESHEET { + label 'process_single' + + container 'cdcgov/mira-oxide:v1.4.0' + + input: + path samplesheet + val fastq_files + val experiment_type + + output: + path 'nextflow_samplesheet.csv', emit: nf_samplesheet + path ('bad_samples.tsv', emit: bad_samples, optional: true) + path 'versions.yml', emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + if [[ "${experiment_type}" == *"Illumina"* ]]; then + mira-oxide create-nextflow-samplesheet -s "${samplesheet}" -r "${fastq_files}" -e "${experiment_type}" + elif [[ "${experiment_type}" == *"ONT"* ]]; then + mira-oxide create-nextflow-samplesheet -s "${samplesheet}" -r "${params.outdir}" -e "${experiment_type}" + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": nextflowsamplesheet: mira-oxide \$(mira-oxide --version |& sed '1!d; s/mira-oxide //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + + """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": nextflowsamplesheet: mira-oxide \$(mira-oxide --version |& sed '1!d; s/mira-oxide //') + END_VERSIONS + """ +} diff --git a/modules/local/nextflowsamplesheeti.nf b/modules/local/nextflowsamplesheeti.nf deleted file mode 100644 index 7550169..0000000 --- a/modules/local/nextflowsamplesheeti.nf +++ /dev/null @@ -1,38 +0,0 @@ -process NEXTFLOWSAMPLESHEETI { - label 'process_single' - - container 'cdcgov/mira-nf:python3.10-alpine' - - input: - path samplesheet - val fastq_files - val experiment_type - - output: - path 'nextflow_samplesheet.csv', emit: nf_samplesheet - path 'versions.yml', emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - """ - #Create nf samplesheet - create_nextflow_samplesheet_i.py -s "${samplesheet}" -r "${fastq_files}" -e "${experiment_type}" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": nextflowsamplesheeti: \$(python3 --version |& sed '1!d ; s/python //') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - - """ - cat <<-END_VERSIONS > versions.yml - "${task.process}": nextflowsamplesheeti: \$(python3 --version |& sed '1!d ; /python //') - END_VERSIONS - """ -} diff --git a/modules/local/nextflowsamplesheeto.nf b/modules/local/nextflowsamplesheeto.nf deleted file mode 100644 index cc52b23..0000000 --- a/modules/local/nextflowsamplesheeto.nf +++ /dev/null @@ -1,38 +0,0 @@ -process NEXTFLOWSAMPLESHEETO { - label 'process_single' - - container 'cdcgov/mira-nf:python3.10-alpine' - - input: - path samplesheet - path run_ID - val experiment_type - - output: - path 'nextflow_samplesheet.csv', emit: nf_samplesheet - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - """ - # AWS Healthomics requires a path to the samplesheet in order to stage the files for the pipeline - create_nextflow_samplesheet_o.py -s "${samplesheet}" -r "${params.outdir}" -e "${experiment_type}" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": nextflowsamplesheeto: \$(python3 --version |& sed '1!d ; s/python3 //') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - - """ - cat <<-END_VERSIONS > versions.yml - "${task.process}": nextflowsamplesheeto: \$(python3 --version |& sed '1!d ; s/python3 //') - END_VERSIONS - """ -} diff --git a/modules/local/positionsofint.nf b/modules/local/positionsofint.nf index fdd61d0..a9a69a3 100644 --- a/modules/local/positionsofint.nf +++ b/modules/local/positionsofint.nf @@ -1,6 +1,6 @@ process POSITIONSOFINT { label 'process_low' - container 'cdcgov/mira-oxide:v1.3.1' + container 'cdcgov/mira-oxide:v1.4.4.0' input: path dais_seq_output diff --git a/modules/local/preparemirareports.nf b/modules/local/preparemirareports.nf index 4d12c1f..eba22f0 100644 --- a/modules/local/preparemirareports.nf +++ b/modules/local/preparemirareports.nf @@ -1,7 +1,7 @@ process PREPAREMIRAREPORTS { label 'process_medium' - container 'cdcgov/mira-oxide:v1.3.1' + container 'cdcgov/mira-oxide:v1.4.0' input: path dais_outputs diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index bb76310..b631c9e 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,8 +1,7 @@ process SAMPLESHEET_CHECK { label 'process_single' - conda 'conda-forge::python=3.8.3' - container 'cdcgov/mira-nf:python3.10-alpine' + container 'cdcgov/mira-oxide:v1.4.0' input: path samplesheet @@ -17,12 +16,10 @@ process SAMPLESHEET_CHECK { script: // This script is bundled with the pipeline, in mira/cli/bin/ """ - check_samplesheet.py \\ - ${samplesheet} \\ - samplesheet.valid.csv + mira-oxide samplesheet-check -i ${samplesheet} -o samplesheet.valid.csv cat <<-END_VERSIONS > versions.yml - "${task.process}": python \$(python --version | sed 's/Python //g') + "${task.process}": mira-oxide \$(mira-oxide --version |& sed '1!d; s/mira-oxide //') END_VERSIONS """ } diff --git a/modules/local/variantsofint.nf b/modules/local/variantsofint.nf index a10f6a7..119ac80 100644 --- a/modules/local/variantsofint.nf +++ b/modules/local/variantsofint.nf @@ -1,6 +1,6 @@ process VARIANTSOFINT { label 'process_low' - container 'cdcgov/mira-oxide:v1.3.1' + container 'cdcgov/mira-oxide:v1.4.0' input: path dais_seq_output diff --git a/nextflow.config b/nextflow.config index 2415fd6..daa627c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -290,7 +290,7 @@ manifest { affiliation: 'CDC', email: '', github: '', - contribution: ['author'], // List of contribution types ('author', 'maintainer' or 'contributor') + contribution: ['author'], orcid: '' ], [ @@ -298,7 +298,7 @@ manifest { affiliation: 'CDC', email: 'xpa3@cdc.gov', github: '@mandysulli', - contribution: ['maintainer','author'], + contribution: ['maintainer'], orcid: '' ], [ diff --git a/pyarrow/Dockerfile-pyarrow b/pyarrow/Dockerfile-pyarrow deleted file mode 100755 index 540d3f3..0000000 --- a/pyarrow/Dockerfile-pyarrow +++ /dev/null @@ -1,82 +0,0 @@ -# Create an argument to pull a particular version of base image -ARG python_image -ARG python_image=${python_image:-python:3.10-alpine} - -#################################################################################################### -# BASE IMAGE -#################################################################################################### -FROM ${python_image} AS base - -# Required certs for apk update -COPY ca.crt /root/ca.crt - -# Put certs in /etc/ssl/certs location -RUN cat /root/ca.crt >> /etc/ssl/certs/ca-certificates.crt - -# Setup env -ENV LANG=C.UTF-8 -ENV LC_ALL=C.UTF-8 -ENV PYTHONDONTWRITEBYTECODE=1 -ENV PYTHONFAULTHANDLER=1 -ENV ACCEPT_EULA=Y - -RUN apk update && apk add --no-cache \ - build-base \ - g++ \ - gcc \ - cmake \ - libffi-dev \ - openssl-dev \ - libstdc++ \ - zlib-dev \ - lz4-dev \ - libgcc \ - ninja \ - git \ - bash \ - && pip install --upgrade pip \ - && pip install pipenv cython numpy==1.23.1 - -ARG ARROW_VERSION -ARG ARROW_VERSION=${ARROW_VERSION:-17.0.0} -ARG ARROW_SHA256=8379554d89f19f2c8db63620721cabade62541f47a4e706dfb0a401f05a713ef -ARG ARROW_BUILD_TYPE=release - -ENV ARROW_HOME=/usr/local \ - PARQUET_HOME=/usr/local - -RUN mkdir /arrow \ - && wget -q https://github.com/apache/arrow/archive/apache-arrow-${ARROW_VERSION}.tar.gz -O /tmp/apache-arrow.tar.gz \ - && echo "${ARROW_SHA256} *apache-arrow.tar.gz" | sha256sum /tmp/apache-arrow.tar.gz \ - && tar -xvf /tmp/apache-arrow.tar.gz -C /arrow --strip-components 1 - -# Create the patch file for re2 -RUN echo "diff --git a/util/pcre.h b/util/pcre.h" > /arrow/re2_patch.diff \ - && echo "index e69de29..b6f3e31 100644" >> /arrow/re2_patch.diff \ - && echo "--- a/util/pcre.h" >> /arrow/re2_patch.diff \ - && echo "+++ b/util/pcre.h" >> /arrow/re2_patch.diff \ - && echo "@@ -21,6 +21,7 @@" >> /arrow/re2_patch.diff \ - && echo " #include \"re2/filtered_re2.h\"" >> /arrow/re2_patch.diff \ - && echo " #include \"re2/pod_array.h\"" >> /arrow/re2_patch.diff \ - && echo " #include \"re2/stringpiece.h\"" >> /arrow/re2_patch.diff \ - && echo "+#include " >> /arrow/re2_patch.diff - -# Configure the build using CMake -RUN cd /arrow/cpp \ - && cmake --preset ninja-release-python - -# Pre-fetch dependencies without building -RUN cd /arrow/cpp \ - && cmake --build . --target re2_ep -- -j1 || true - -# Apply the patch to re2 after the dependencies are fetched but before the build -RUN cd /arrow/cpp/re2_ep-prefix/src/re2_ep \ - && patch -p1 < /arrow/re2_patch.diff - -# Continue with the build and install Apache Arrow -RUN cd /arrow/cpp \ - && cmake --build . --target install \ - && cd /arrow/python \ - && python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --with-parquet \ - && python setup.py install \ - && rm -rf /arrow /tmp/apache-arrow.tar.gz diff --git a/pyarrow/Dockerfile-pyarrow-older-version b/pyarrow/Dockerfile-pyarrow-older-version deleted file mode 100644 index 0a1a069..0000000 --- a/pyarrow/Dockerfile-pyarrow-older-version +++ /dev/null @@ -1,82 +0,0 @@ -# Create an argument to pull a particular version of base image -ARG python_image -ARG python_image=${python_image:-python:3.10-alpine} - -#################################################################################################### -# BASE IMAGE -#################################################################################################### -FROM ${python_image} AS base - -# Required certs for apk update -COPY ca.crt /root/ca.crt - -# Put certs in /etc/ssl/certs location -RUN cat /root/ca.crt >> /etc/ssl/certs/ca-certificates.crt - -# Setup env -ENV LANG=C.UTF-8 -ENV LC_ALL=C.UTF-8 -ENV PYTHONDONTWRITEBYTECODE=1 -ENV PYTHONFAULTHANDLER=1 -ENV ACCEPT_EULA=Y - -RUN apk update && apk add --no-cache \ - build-base \ - g++ \ - gcc \ - cmake \ - libffi-dev \ - openssl-dev \ - libstdc++ \ - zlib-dev \ - lz4-dev \ - libgcc \ - ninja \ - bash \ - git \ - && pip install --upgrade pip \ - && pip install pipenv cython numpy==1.23.1 - -ARG ARROW_VERSION -ARG ARROW_VERSION=${ARROW_VERSION:-17.0.0} -ARG ARROW_SHA256=8379554d89f19f2c8db63620721cabade62541f47a4e706dfb0a401f05a713ef -ARG ARROW_BUILD_TYPE=release - -ENV ARROW_HOME=/usr/local \ - PARQUET_HOME=/usr/local - -RUN mkdir /arrow \ - && wget -q https://github.com/apache/arrow/archive/apache-arrow-${ARROW_VERSION}.tar.gz -O /tmp/apache-arrow.tar.gz \ - && echo "${ARROW_SHA256} *apache-arrow.tar.gz" | sha256sum /tmp/apache-arrow.tar.gz \ - && tar -xvf /tmp/apache-arrow.tar.gz -C /arrow --strip-components 1 - -# Create the patch file for re2 -RUN echo "diff --git a/util/pcre.h b/util/pcre.h" > /arrow/re2_patch.diff \ - && echo "index e69de29..b6f3e31 100644" >> /arrow/re2_patch.diff \ - && echo "--- a/util/pcre.h" >> /arrow/re2_patch.diff \ - && echo "+++ b/util/pcre.h" >> /arrow/re2_patch.diff \ - && echo "@@ -21,6 +21,7 @@" >> /arrow/re2_patch.diff \ - && echo " #include \"re2/filtered_re2.h\"" >> /arrow/re2_patch.diff \ - && echo " #include \"re2/pod_array.h\"" >> /arrow/re2_patch.diff \ - && echo " #include \"re2/stringpiece.h\"" >> /arrow/re2_patch.diff \ - && echo "+#include " >> /arrow/re2_patch.diff - -# Configure the build using CMake -RUN cd /arrow/cpp \ - && cmake --preset ninja-release-python - -# Pre-fetch dependencies without building -RUN cd /arrow/cpp \ - && cmake --build . --target re2_ep -- -j1 || true - -# Apply the patch to re2 after the dependencies are fetched but before the build -RUN cd /arrow/cpp/re2_ep-prefix/src/re2_ep \ - && patch -p1 < /arrow/re2_patch.diff - -# Continue with the build and install Apache Arrow -RUN cd /arrow/cpp \ - && cmake --build . --target install \ - && cd /arrow/python \ - && python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --with-parquet \ - && python setup.py install \ - && rm -rf /arrow /tmp/apache-arrow.tar.gz diff --git a/workflows/mira.nf b/workflows/mira.nf index 7f78e40..c75cdc7 100644 --- a/workflows/mira.nf +++ b/workflows/mira.nf @@ -9,8 +9,7 @@ // include { STAGES3FILES } from '../modules/local/stages3files' include { CONCATFASTQS } from '../modules/local/concatfastqs' -include { NEXTFLOWSAMPLESHEETI } from '../modules/local/nextflowsamplesheeti' -include { NEXTFLOWSAMPLESHEETO } from '../modules/local/nextflowsamplesheeto' +include { NEXTFLOWSAMPLESHEET } from '../modules/local/nextflowsamplesheet' include { INPUT_CHECK } from '../subworkflows/local/input_check' include { READQC } from '../subworkflows/local/readqc' include { PREPILLUMINAREADS } from '../subworkflows/local/prepilluminareads' @@ -111,17 +110,25 @@ workflow flu_i { } - NEXTFLOWSAMPLESHEETI(samplesheet_ch, sequences_ch, experiment_type_ch) - // OMICS & Local PLATFORM: END - - ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEETI.out.versions) - nf_samplesheet_ch = NEXTFLOWSAMPLESHEETI.out.nf_samplesheet + NEXTFLOWSAMPLESHEET(samplesheet_ch, sequences_ch, experiment_type_ch) + ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEET.out.versions) + nf_samplesheet_ch = NEXTFLOWSAMPLESHEET.out.nf_samplesheet + + // Get the channel for the bad samples TSV + def bad_samples_ch = NEXTFLOWSAMPLESHEET.out.bad_samples + + // Consume the channel and print the contents + bad_samples_ch.subscribe { tsv_file -> + tsv_file.eachLine { line -> + println line + } + } // SUBWORKFLOW: Read in samplesheet, validate and stage input files // - INPUT_CHECK(NEXTFLOWSAMPLESHEETI.out.nf_samplesheet) + INPUT_CHECK(NEXTFLOWSAMPLESHEET.out.nf_samplesheet) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - } else if (params.amd_platform == true) { + } else if (params.amd_platform == true) { //save samplesheet as the nf sample nf_samplesheet_ch = samplesheet_ch @@ -291,17 +298,25 @@ workflow flu_o { collected_concatenated_fastqs_ch = concatenated_fastqs_ch.collect() // MODULE: Convert the samplesheet to a nextflow format - NEXTFLOWSAMPLESHEETO(samplesheet_ch, collected_concatenated_fastqs_ch, experiment_type_ch) - // OMICS & Local END + NEXTFLOWSAMPLESHEET(samplesheet_ch, collected_concatenated_fastqs_ch, experiment_type_ch) + ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEET.out.versions) + nf_samplesheet_ch = NEXTFLOWSAMPLESHEET.out.nf_samplesheet - ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEETO.out.versions) - nf_samplesheet_ch = NEXTFLOWSAMPLESHEETO.out.nf_samplesheet + // Get the channel for the bad samples TSV + def bad_samples_ch = NEXTFLOWSAMPLESHEET.out.bad_samples + + // Consume the channel and print the contents + bad_samples_ch.subscribe { tsv_file -> + tsv_file.eachLine { line -> + println line + } + } // SUBWORKFLOW: Read in samplesheet, validate and stage input files // - INPUT_CHECK(NEXTFLOWSAMPLESHEETO.out.nf_samplesheet) + INPUT_CHECK(NEXTFLOWSAMPLESHEET.out.nf_samplesheet) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - } else if (params.amd_platform == true) { + } else if (params.amd_platform == true) { //save samplesheet as the nf sample nf_samplesheet_ch = samplesheet_ch @@ -471,17 +486,25 @@ workflow sc2_spike_o { concatenated_fastqs_ch = fastq_ch | CONCATFASTQS collected_concatenated_fastqs_ch = concatenated_fastqs_ch.collect() // MODULE: Convert the samplesheet to a nextflow format - NEXTFLOWSAMPLESHEETO(samplesheet_ch, collected_concatenated_fastqs_ch, experiment_type_ch) - // OMICS & Local END + NEXTFLOWSAMPLESHEET(samplesheet_ch, collected_concatenated_fastqs_ch, experiment_type_ch) + ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEET.out.versions) + nf_samplesheet_ch = NEXTFLOWSAMPLESHEET.out.nf_samplesheet - ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEETO.out.versions) - nf_samplesheet_ch = NEXTFLOWSAMPLESHEETO.out.nf_samplesheet + // Get the channel for the bad samples TSV + def bad_samples_ch = NEXTFLOWSAMPLESHEET.out.bad_samples + + // Consume the channel and print the contents + bad_samples_ch.subscribe { tsv_file -> + tsv_file.eachLine { line -> + println line + } + } // SUBWORKFLOW: Read in samplesheet, validate and stage input files // - INPUT_CHECK(NEXTFLOWSAMPLESHEETO.out.nf_samplesheet) + INPUT_CHECK(NEXTFLOWSAMPLESHEET.out.nf_samplesheet) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - } else if (params.amd_platform == true) { + } else if (params.amd_platform == true) { //save samplesheet as the nf sample nf_samplesheet_ch = samplesheet_ch @@ -619,17 +642,25 @@ workflow sc2_wgs_o { collected_concatenated_fastqs_ch = concatenated_fastqs_ch.collect() // MODULE: Convert the samplesheet to a nextflow format - NEXTFLOWSAMPLESHEETO(samplesheet_ch, collected_concatenated_fastqs_ch, experiment_type_ch) - // OMICS & Local END + NEXTFLOWSAMPLESHEET(samplesheet_ch, collected_concatenated_fastqs_ch, experiment_type_ch) + ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEET.out.versions) + nf_samplesheet_ch = NEXTFLOWSAMPLESHEET.out.nf_samplesheet + + // Get the channel for the bad samples TSV + def bad_samples_ch = NEXTFLOWSAMPLESHEET.out.bad_samples - ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEETO.out.versions) - nf_samplesheet_ch = NEXTFLOWSAMPLESHEETO.out.nf_samplesheet + // Consume the channel and print the contents + bad_samples_ch.subscribe { tsv_file -> + tsv_file.eachLine { line -> + println line + } + } // SUBWORKFLOW: Read in samplesheet, validate and stage input files // - INPUT_CHECK(NEXTFLOWSAMPLESHEETO.out.nf_samplesheet) + INPUT_CHECK(NEXTFLOWSAMPLESHEET.out.nf_samplesheet) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - } else if (params.amd_platform == true) { + } else if (params.amd_platform == true) { //save samplesheet as the nf sample nf_samplesheet_ch = samplesheet_ch @@ -802,7 +833,7 @@ workflow sc2_wgs_i { } if (params.amd_platform == false) { - // MODULE: Convert the samplesheet to a nextflow format + // OMICS & Local PLATFORM: START Concat all fastq files by barcode // Stage fastq files based on profile if (params.restage == true){ fastq_ch = Channel @@ -813,18 +844,26 @@ workflow sc2_wgs_i { sequences_ch = Channel.fromPath("${params.runpath}/fastqs", checkIfExists: true) } - NEXTFLOWSAMPLESHEETI(samplesheet_ch, sequences_ch, experiment_type_ch) - // OMICS & Local PLATFORM: END + // MODULE: Convert the samplesheet to a nextflow format + NEXTFLOWSAMPLESHEET(samplesheet_ch, sequences_ch, experiment_type_ch) + ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEET.out.versions) + nf_samplesheet_ch = NEXTFLOWSAMPLESHEET.out.nf_samplesheet + + // Get the channel for the bad samples TSV + def bad_samples_ch = NEXTFLOWSAMPLESHEET.out.bad_samples - // NEXTFLOWSAMPLESHEETI(samplesheet_ch, experiment_type_ch) - ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEETI.out.versions) - nf_samplesheet_ch = NEXTFLOWSAMPLESHEETI.out.nf_samplesheet + // Consume the channel and print the contents + bad_samples_ch.subscribe { tsv_file -> + tsv_file.eachLine { line -> + println line + } + } // SUBWORKFLOW: Read in samplesheet, validate and stage input files // - INPUT_CHECK(NEXTFLOWSAMPLESHEETI.out.nf_samplesheet) + INPUT_CHECK(NEXTFLOWSAMPLESHEET.out.nf_samplesheet) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - } else if (params.amd_platform == true) { + } else if (params.amd_platform == true) { // save samplesheet as the nf sample nf_samplesheet_ch = samplesheet_ch @@ -995,7 +1034,7 @@ workflow rsv_i { } if (params.amd_platform == false) { - // MODULE: Convert the samplesheet to a nextflow format + // OMICS & Local PLATFORM: START Concat all fastq files by barcode // Stage fastq files based on profile if (params.restage == true){ fastq_ch = Channel @@ -1006,15 +1045,24 @@ workflow rsv_i { sequences_ch = Channel.fromPath("${params.runpath}/fastqs", checkIfExists: true) } - NEXTFLOWSAMPLESHEETI(samplesheet_ch, sequences_ch, experiment_type_ch) - // OMICS & Local PLATFORM: END - // NEXTFLOWSAMPLESHEETI(samplesheet_ch, experiment_type_ch) - ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEETI.out.versions) - nf_samplesheet_ch = NEXTFLOWSAMPLESHEETI.out.nf_samplesheet + // MODULE: Convert the samplesheet to a nextflow format + NEXTFLOWSAMPLESHEET(samplesheet_ch, sequences_ch, experiment_type_ch) + ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEET.out.versions) + nf_samplesheet_ch = NEXTFLOWSAMPLESHEET.out.nf_samplesheet + + // Get the channel for the bad samples TSV + def bad_samples_ch = NEXTFLOWSAMPLESHEET.out.bad_samples + + // Consume the channel and print the contents + bad_samples_ch.subscribe { tsv_file -> + tsv_file.eachLine { line -> + println line + } + } // SUBWORKFLOW: Read in samplesheet, validate and stage input files // - INPUT_CHECK(NEXTFLOWSAMPLESHEETI.out.nf_samplesheet) + INPUT_CHECK(NEXTFLOWSAMPLESHEET.out.nf_samplesheet) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) } else if (params.amd_platform == true) { // save samplesheet as the nf sample @@ -1183,18 +1231,25 @@ workflow rsv_o { collected_concatenated_fastqs_ch = concatenated_fastqs_ch.collect() // MODULE: Convert the samplesheet to a nextflow format - NEXTFLOWSAMPLESHEETO(samplesheet_ch, collected_concatenated_fastqs_ch, experiment_type_ch) - // OMICS & Local END + NEXTFLOWSAMPLESHEET(samplesheet_ch, collected_concatenated_fastqs_ch, experiment_type_ch) + ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEET.out.versions) + nf_samplesheet_ch = NEXTFLOWSAMPLESHEET.out.nf_samplesheet - // NEXTFLOWSAMPLESHEETO(samplesheet_ch, experiment_type_ch, CONCATFASTQS.out) - ch_versions = ch_versions.mix(NEXTFLOWSAMPLESHEETO.out.versions) - nf_samplesheet_ch = NEXTFLOWSAMPLESHEETO.out.nf_samplesheet + // Get the channel for the bad samples TSV + def bad_samples_ch = NEXTFLOWSAMPLESHEET.out.bad_samples + + // Consume the channel and print the contents + bad_samples_ch.subscribe { tsv_file -> + tsv_file.eachLine { line -> + println line + } + } // SUBWORKFLOW: Read in samplesheet, validate and stage input files // - INPUT_CHECK(NEXTFLOWSAMPLESHEETO.out.nf_samplesheet) + INPUT_CHECK(NEXTFLOWSAMPLESHEET.out.nf_samplesheet) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - } else if (params.amd_platform == true) { + } else if (params.amd_platform == true) { // save samplesheet as the nf sample nf_samplesheet_ch = samplesheet_ch