@@ -70,39 +70,65 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
7070# Create app directory
7171WORKDIR /app
7272
73+ # Upgrade pip and install build tools
74+ RUN pip install --no-cache-dir --upgrade pip setuptools wheel
75+
7376# Copy Python dependencies installation info from README
74- # Install Python dependencies
77+ # Install Python dependencies in stages to avoid timeout and memory issues
78+
79+ # Stage 1: Install PyTorch CPU version first (largest package)
80+ # Using official PyTorch CPU index for smaller size
81+ RUN pip install --no-cache-dir --default-timeout=100 torch --index-url https://download.pytorch.org/whl/cpu
82+
83+ # Stage 2: Install core dependencies
7584RUN pip install --no-cache-dir \
7685 fastapi \
7786 uvicorn \
7887 boto3 \
7988 botocore \
8089 openai \
81- ddgs \
82- rich \
90+ requests
91+
92+ # Stage 3: Install data processing libraries
93+ RUN pip install --no-cache-dir \
8394 numpy \
84- openpyxl \
85- biopython \
86- mammoth \
87- markdownify \
8895 pandas \
96+ openpyxl \
8997 pdfminer-six \
9098 python-pptx \
9199 pdf2image \
92- puremagic \
100+ puremagic
101+
102+ # Stage 4: Install document and media processing
103+ RUN pip install --no-cache-dir \
104+ biopython \
105+ mammoth \
106+ markdownify \
93107 pydub \
94- SpeechRecognition \
108+ SpeechRecognition
109+
110+ # Stage 5: Install web scraping and search
111+ RUN pip install --no-cache-dir \
112+ ddgs \
95113 bs4 \
96114 youtube-transcript-api \
97- requests \
115+ selenium \
116+ helium
117+
118+ # Stage 6: Install AI/ML libraries
119+ RUN pip install --no-cache-dir \
98120 transformers \
99121 protobuf \
100122 langchain_openai \
101123 langchain \
102- selenium \
103- helium \
104124 smolagents
105125
126+ # Stage 7: Install utilities
127+ RUN pip install --no-cache-dir rich
128+
129+ # Pre-download and cache the Qwen tokenizer to avoid download on first run
130+ RUN python3 -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Qwen/Qwen3-32B')"
131+
106132# Copy the entire project
107133COPY . /app/
108134
@@ -121,25 +147,9 @@ RUN mkdir -p test_output ck_pro/ck_web/_web/DownloadedFiles ck_pro/ck_web/_web/s
121147# 3001 for Playwright web service
122148EXPOSE 8080 3001
123149
124- # Create startup script
125- RUN echo '#!/bin/bash\n \
126- set -e\n \
127- \n \
128- # Start the Playwright web service in background\n \
129- echo "Starting Playwright web service on port 3001..."\n \
130- cd /app/ck_pro/ck_web/_web\n \
131- LISTEN_PORT=3001 npm start &\n \
132- WEB_PID=$!\n \
133- \n \
134- # Wait for web service to be ready\n \
135- echo "Waiting for web service to start..."\n \
136- sleep 5\n \
137- \n \
138- # Start the FastAPI service\n \
139- echo "Starting FastAPI service on port 8080..."\n \
140- cd /app\n \
141- exec uvicorn agentcompass_service_fastapi:app --host 0.0.0.0 --port 8080 --workers ${WORKERS:-4}\n \
142- ' > /app/start.sh && chmod +x /app/start.sh
150+ # Copy startup script
151+ COPY start.sh /app/start.sh
152+ RUN chmod +x /app/start.sh
143153
144154# Set environment variables
145155ENV PYTHONPATH=/app
@@ -154,4 +164,3 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
154164
155165# Run the startup script
156166CMD ["/app/start.sh" ]
157-
0 commit comments