forked from GW-HIVE/CensuScope
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
executable file
·65 lines (51 loc) · 2.02 KB
/
Dockerfile
File metadata and controls
executable file
·65 lines (51 loc) · 2.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Dockerfile
## Base Image & System Packages
FROM python:3.10-slim
RUN apt-get update && apt-get install -y \
wget \
gcc \
make \
curl \
sqlite3 \
zlib1g-dev \
build-essential \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Install NCBI BLAST+
RUN apt-get update && apt-get install -y ncbi-blast+ \
&& rm -rf /var/lib/apt/lists/*
# Install Seqtk
RUN wget https://github.com/lh3/seqtk/archive/refs/tags/v1.3.tar.gz && \
tar -xzvf v1.3.tar.gz && \
cd seqtk-1.3 && \
make && \
mv seqtk /usr/local/bin/ && \
cd .. && \
rm -rf seqtk-1.3 v1.3.tar.gz
## Create a non-root user
RUN useradd -ms /bin/bash appuser
USER appuser
WORKDIR /app
## Download NCBI Taxonomy Data
RUN mkdir raw_data
RUN curl -o /app/raw_data/nucl_gb.accession2taxid.gz \
ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/nucl_gb.accession2taxid.gz
RUN curl -o /app/raw_data/nucl_wgs.accession2taxid.EXTRA.gz \
ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/nucl_wgs.accession2taxid.EXTRA.gz
RUN curl -o /app/raw_data/nucl_wgs.accession2taxid.gz \
ftp://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/nucl_wgs.accession2taxid.gz
RUN curl -o /app/raw_data/new_taxdump.tar.gz \
ftp://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz
RUN tar -xzf /app/raw_data/new_taxdump.tar.gz -C /app/raw_data
RUN chown -R appuser:appuser /app/raw_data
COPY requirements.txt requirements.txt
COPY lib ./lib
## Build the SQLite Database
RUN lib/nucleotide-db.sh raw_data/ taxonomy.db
RUN lib/add-nodes.sh raw_data/nodes.dmp taxonomy.db
RUN lib/add-names.sh raw_data/names.dmp taxonomy.db
RUN lib/add-hosts.sh raw_data/host.dmp taxonomy.db
## Python Setup
RUN pip install --no-cache-dir -r requirements.txt
## Default Command: ITERATIONS, etc., should be passed as environment variables via docker run or docker-compose.yml
CMD ["python", "lib/censuscope.py", "--iterations", "$ITERATIONS", "--sample_size", "$SAMPLE_SIZE", "--tax-depth", "$TAXDEPTH", "--query_path", "$QUERYPATH", "--database", "$DATABASE"]