update docker

Zhudongsheng75 · Zhudongsheng75 · commit 9a881aa6b33b · 2025-10-15T12:23:18.000+08:00
diff --git a/Dockerfile b/Dockerfile
@@ -70,39 +70,65 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
 # Create app directory
 WORKDIR /app
 
+# Upgrade pip and install build tools
+RUN pip install --no-cache-dir --upgrade pip setuptools wheel
+
 # Copy Python dependencies installation info from README
-# Install Python dependencies
+# Install Python dependencies in stages to avoid timeout and memory issues
+
+# Stage 1: Install PyTorch CPU version first (largest package)
+# Using official PyTorch CPU index for smaller size
+RUN pip install --no-cache-dir --default-timeout=100 torch --index-url https://download.pytorch.org/whl/cpu
+
+# Stage 2: Install core dependencies
 RUN pip install --no-cache-dir \
     fastapi \
     uvicorn \
     boto3 \
     botocore \
     openai \
-    ddgs \
-    rich \
+    requests
+
+# Stage 3: Install data processing libraries
+RUN pip install --no-cache-dir \
     numpy \
-    openpyxl \
-    biopython \
-    mammoth \
-    markdownify \
     pandas \
+    openpyxl \
     pdfminer-six \
     python-pptx \
     pdf2image \
-    puremagic \
+    puremagic
+
+# Stage 4: Install document and media processing
+RUN pip install --no-cache-dir \
+    biopython \
+    mammoth \
+    markdownify \
     pydub \
-    SpeechRecognition \
+    SpeechRecognition
+
+# Stage 5: Install web scraping and search
+RUN pip install --no-cache-dir \
+    ddgs \
     bs4 \
     youtube-transcript-api \
-    requests \
+    selenium \
+    helium
+
+# Stage 6: Install AI/ML libraries
+RUN pip install --no-cache-dir \
     transformers \
     protobuf \
     langchain_openai \
     langchain \
-    selenium \
-    helium \
     smolagents
 
+# Stage 7: Install utilities
+RUN pip install --no-cache-dir rich
+
+# Pre-download and cache the Qwen tokenizer to avoid download on first run
+RUN python3 -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Qwen/Qwen3-32B')"
+
 # Copy the entire project
 COPY . /app/
 
@@ -121,25 +147,9 @@ RUN mkdir -p test_output ck_pro/ck_web/_web/DownloadedFiles ck_pro/ck_web/_web/s
 # 3001 for Playwright web service
 EXPOSE 8080 3001
 
-# Create startup script
-RUN echo '#!/bin/bash\n\
-set -e\n\
-\n\
-# Start the Playwright web service in background\n\
-echo "Starting Playwright web service on port 3001..."\n\
-cd /app/ck_pro/ck_web/_web\n\
-LISTEN_PORT=3001 npm start &\n\
-WEB_PID=$!\n\
-\n\
-# Wait for web service to be ready\n\
-echo "Waiting for web service to start..."\n\
-sleep 5\n\
-\n\
-# Start the FastAPI service\n\
-echo "Starting FastAPI service on port 8080..."\n\
-cd /app\n\
-exec uvicorn agentcompass_service_fastapi:app --host 0.0.0.0 --port 8080 --workers ${WORKERS:-4}\n\
-' > /app/start.sh && chmod +x /app/start.sh
+# Copy startup script
+COPY start.sh /app/start.sh
+RUN chmod +x /app/start.sh
 
 # Set environment variables
 ENV PYTHONPATH=/app
@@ -154,4 +164,3 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
 
 # Run the startup script
 CMD ["/app/start.sh"]
-
diff --git a/ck_pro/ck_web/_web/server.js b/ck_pro/ck_web/_web/server.js
@@ -147,7 +147,15 @@ app.post('/getBrowser', async (req, res) => {
     let browserEntry = browserPool[availableBrowserslot];
     if (!browserEntry.browser) {
       chromium.use(StealthPlugin())
-      const new_browser = await chromium.launch({headless: true, chromiumSandbox: true});
+      const new_browser = await chromium.launch({
+        headless: true,
+        chromiumSandbox: false,
+        args: [
+          '--no-sandbox',
+          '--disable-setuid-sandbox',
+          '--disable-dev-shm-usage'
+        ]
+      });
       browserEntry.browser = await new_browser.newContext({
         viewport: {width: 1024, height: 768},
         locale: 'en-US',  // Set the locale to English (US)
diff --git a/docker/build-and-push.sh b/docker/build-and-push.sh
diff --git a/readme.md b/readme.md
@@ -41,6 +41,14 @@ Key variables you may want to set:
 - WEB_IP: host:port for the web browser service (default: localhost:3001)
 - Provider keys if using Azure OpenAI or others (see upstream README for details)
 
+### Agent Configuration
+
+All agents in this project are configured with the following default settings:
+- **max_steps**: 10 - Maximum number of steps each agent can take to solve a task
+  - This applies to all agents: CKAgent (main agent), WebAgent, and FileAgent
+  - The actual execution may allow slightly more steps (up to 1.5x) to compensate for error recovery
+  - You can override this at runtime by passing `max_steps` parameter to the agent's `run()` method
+
 
 ## 3) Run the API server
 - Recommended (multiple workers):
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,56 @@
+# Core FastAPI dependencies
+fastapi
+uvicorn
+
+# AWS SDK
+boto3
+botocore
+
+# OpenAI and LLM
+openai
+transformers
+protobuf
+
+# LangChain
+langchain_openai
+langchain
+
+# AI Agents
+smolagents
+
+# Search engines
+ddgs
+
+# Web scraping and automation
+selenium
+helium
+bs4
+requests
+
+# Data processing
+numpy
+pandas
+openpyxl
+
+# Document processing
+pdfminer-six
+python-pptx
+pdf2image
+biopython
+mammoth
+markdownify
+puremagic
+
+# Media processing
+pydub
+SpeechRecognition
+
+# YouTube
+youtube-transcript-api
+
+# Utilities
+rich
+
+# PyTorch CPU version (must be installed separately with --index-url)
+# torch --index-url https://download.pytorch.org/whl/cpu
+
diff --git a/start.sh b/start.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+set -e
+
+# Get the directory where this script is located
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Start the Playwright web service in background
+echo "Starting Playwright web service on port 3001..."
+cd "$SCRIPT_DIR/ck_pro/ck_web/_web"
+LISTEN_PORT=3001 npm start &
+WEB_PID=$!
+
+# Wait for web service to be ready
+echo "Waiting for web service to start..."
+sleep 5
+
+# Start the FastAPI service
+echo "Starting FastAPI service on port 8080..."
+cd "$SCRIPT_DIR"
+exec uvicorn agentcompass_service_fastapi:app --host 0.0.0.0 --port 8080 --workers ${WORKERS:-4}
diff --git a/test.sh b/test.sh
@@ -0,0 +1,13 @@
+# 1. 获取浏览器
+BROWSER_ID=$(curl -s -X POST http://localhost:3001/getBrowser \
+  -H "Content-Type: application/json" \
+  -d '{}' | grep -o '"browserId":"[^"]*"' | cut -d'"' -f4)
+
+echo "Browser ID: $BROWSER_ID"
+
+# 2. 打开页面
+curl -s -X POST http://localhost:3001/openPage \
+  -H "Content-Type: application/json" \
+  -d "{\"browserId\":\"$BROWSER_ID\",\"url\":\"https://www.google.com\"}"
+
+# 应该返回: {"browserId":"xxx","pageId":"0"}