Skip to content

Latest commit

 

History

History
984 lines (861 loc) · 19.8 KB

File metadata and controls

984 lines (861 loc) · 19.8 KB

🏭 Production Deployment Guide

Complete guide for deploying the Dataproc MCP Server with response optimization in production environments.

Overview

This guide covers production deployment options, Qdrant setup, performance tuning, monitoring, and scaling considerations for the Dataproc MCP Server with response optimization.

Deployment Options

1. Docker Deployment (Recommended)

Single Container Setup

# Dockerfile
FROM node:18-alpine

WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production

COPY build/ ./build/
COPY config/ ./config/
COPY profiles/ ./profiles/

EXPOSE 3000
CMD ["node", "build/index.js"]
# docker-compose.yml
version: '3.8'
services:
  dataproc-mcp:
    build: .
    ports:
      - "3000:3000"
    environment:
      - NODE_ENV=production
      - LOG_LEVEL=info
      - RESPONSE_OPTIMIZATION_ENABLED=true
      - QDRANT_URL=http://qdrant:6333
      - QDRANT_AUTO_START=false
    volumes:
      - ./config:/app/config:ro
      - ./state:/app/state
    depends_on:
      - qdrant
    restart: unless-stopped

  qdrant:
    image: qdrant/qdrant:v1.7.4
    ports:
      - "6333:6333"
    volumes:
      - qdrant_data:/qdrant/storage
    environment:
      - QDRANT__SERVICE__HTTP_PORT=6333
      - QDRANT__SERVICE__GRPC_PORT=6334
    restart: unless-stopped

volumes:
  qdrant_data:
    driver: local

Multi-Container Production Setup

# docker-compose.prod.yml
version: '3.8'
services:
  dataproc-mcp:
    image: dataproc-mcp-server:${VERSION:-latest}
    deploy:
      replicas: 3
      resources:
        limits:
          memory: 512M
          cpus: '0.5'
        reservations:
          memory: 256M
          cpus: '0.25'
    environment:
      - NODE_ENV=production
      - LOG_LEVEL=warn
      - RESPONSE_OPTIMIZATION_ENABLED=true
      - QDRANT_URL=http://qdrant:6333
      - QDRANT_COLLECTION=dataproc_responses_prod
    volumes:
      - ./config/production.json:/app/config/server.json:ro
    networks:
      - dataproc-network
    restart: unless-stopped

  qdrant:
    image: qdrant/qdrant:v1.7.4
    deploy:
      resources:
        limits:
          memory: 2G
          cpus: '1.0'
        reservations:
          memory: 1G
          cpus: '0.5'
    volumes:
      - qdrant_data:/qdrant/storage
      - ./qdrant/config.yaml:/qdrant/config/production.yaml:ro
    environment:
      - QDRANT__SERVICE__HTTP_PORT=6333
      - QDRANT__SERVICE__GRPC_PORT=6334
      - QDRANT__STORAGE__PERFORMANCE__MAX_SEARCH_THREADS=4
    networks:
      - dataproc-network
    restart: unless-stopped

  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
      - ./ssl:/etc/ssl:ro
    depends_on:
      - dataproc-mcp
    networks:
      - dataproc-network
    restart: unless-stopped

networks:
  dataproc-network:
    driver: bridge

volumes:
  qdrant_data:
    driver: local

2. Kubernetes Deployment

Namespace and ConfigMap

# k8s/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
  name: dataproc-mcp
  labels:
    name: dataproc-mcp

---
# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: dataproc-mcp-config
  namespace: dataproc-mcp
data:
  server.json: |
    {
      "projectId": "my-production-project",
      "region": "us-central1",
      "responseOptimization": {
        "enabled": true,
        "tokenLimit": 500
      },
      "qdrant": {
        "url": "http://qdrant-service:6333",
        "collection": "dataproc_responses_prod"
      }
    }

Qdrant Deployment

# k8s/qdrant.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: qdrant
  namespace: dataproc-mcp
spec:
  replicas: 1
  selector:
    matchLabels:
      app: qdrant
  template:
    metadata:
      labels:
        app: qdrant
    spec:
      containers:
      - name: qdrant
        image: qdrant/qdrant:v1.7.4
        ports:
        - containerPort: 6333
        - containerPort: 6334
        env:
        - name: QDRANT__SERVICE__HTTP_PORT
          value: "6333"
        - name: QDRANT__SERVICE__GRPC_PORT
          value: "6334"
        - name: QDRANT__STORAGE__PERFORMANCE__MAX_SEARCH_THREADS
          value: "4"
        volumeMounts:
        - name: qdrant-storage
          mountPath: /qdrant/storage
        resources:
          requests:
            memory: "1Gi"
            cpu: "500m"
          limits:
            memory: "2Gi"
            cpu: "1000m"
      volumes:
      - name: qdrant-storage
        persistentVolumeClaim:
          claimName: qdrant-pvc

---
apiVersion: v1
kind: Service
metadata:
  name: qdrant-service
  namespace: dataproc-mcp
spec:
  selector:
    app: qdrant
  ports:
  - name: http
    port: 6333
    targetPort: 6333
  - name: grpc
    port: 6334
    targetPort: 6334

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: qdrant-pvc
  namespace: dataproc-mcp
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 10Gi
  storageClassName: fast-ssd

MCP Server Deployment

# k8s/dataproc-mcp.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: dataproc-mcp
  namespace: dataproc-mcp
spec:
  replicas: 3
  selector:
    matchLabels:
      app: dataproc-mcp
  template:
    metadata:
      labels:
        app: dataproc-mcp
    spec:
      containers:
      - name: dataproc-mcp
        image: dataproc-mcp-server:v2.0.2
        ports:
        - containerPort: 3000
        env:
        - name: NODE_ENV
          value: "production"
        - name: LOG_LEVEL
          value: "info"
        - name: RESPONSE_OPTIMIZATION_ENABLED
          value: "true"
        - name: QDRANT_URL
          value: "http://qdrant-service:6333"
        - name: QDRANT_AUTO_START
          value: "false"
        volumeMounts:
        - name: config
          mountPath: /app/config
          readOnly: true
        - name: state
          mountPath: /app/state
        resources:
          requests:
            memory: "256Mi"
            cpu: "250m"
          limits:
            memory: "512Mi"
            cpu: "500m"
        livenessProbe:
          httpGet:
            path: /health
            port: 3000
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /ready
            port: 3000
          initialDelaySeconds: 5
          periodSeconds: 5
      volumes:
      - name: config
        configMap:
          name: dataproc-mcp-config
      - name: state
        emptyDir: {}

---
apiVersion: v1
kind: Service
metadata:
  name: dataproc-mcp-service
  namespace: dataproc-mcp
spec:
  selector:
    app: dataproc-mcp
  ports:
  - port: 3000
    targetPort: 3000
  type: ClusterIP

3. Cloud Provider Deployments

Google Cloud Run

# cloudrun.yaml
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
  name: dataproc-mcp
  annotations:
    run.googleapis.com/ingress: all
spec:
  template:
    metadata:
      annotations:
        run.googleapis.com/cpu-throttling: "false"
        run.googleapis.com/memory: "1Gi"
        run.googleapis.com/cpu: "1000m"
    spec:
      containers:
      - image: gcr.io/my-project/dataproc-mcp:latest
        ports:
        - containerPort: 3000
        env:
        - name: NODE_ENV
          value: "production"
        - name: QDRANT_URL
          value: "https://my-qdrant-instance.qdrant.cloud"
        - name: RESPONSE_OPTIMIZATION_ENABLED
          value: "true"
        resources:
          limits:
            memory: "1Gi"
            cpu: "1000m"

AWS ECS

{
  "family": "dataproc-mcp",
  "networkMode": "awsvpc",
  "requiresCompatibilities": ["FARGATE"],
  "cpu": "512",
  "memory": "1024",
  "executionRoleArn": "arn:aws:iam::account:role/ecsTaskExecutionRole",
  "containerDefinitions": [
    {
      "name": "dataproc-mcp",
      "image": "dataproc-mcp-server:latest",
      "portMappings": [
        {
          "containerPort": 3000,
          "protocol": "tcp"
        }
      ],
      "environment": [
        {
          "name": "NODE_ENV",
          "value": "production"
        },
        {
          "name": "QDRANT_URL",
          "value": "http://qdrant-cluster.internal:6333"
        },
        {
          "name": "RESPONSE_OPTIMIZATION_ENABLED",
          "value": "true"
        }
      ],
      "logConfiguration": {
        "logDriver": "awslogs",
        "options": {
          "awslogs-group": "/ecs/dataproc-mcp",
          "awslogs-region": "us-west-2",
          "awslogs-stream-prefix": "ecs"
        }
      }
    }
  ]
}

Qdrant Production Setup

1. Qdrant Cloud (Recommended)

# Sign up at https://cloud.qdrant.io
# Create cluster and get connection details

export QDRANT_URL="https://xyz-abc123.eu-central.aws.cloud.qdrant.io:6333"
export QDRANT_API_KEY="your-api-key"

2. Self-Hosted Qdrant

High Availability Setup

# qdrant-cluster.yml
version: '3.8'
services:
  qdrant-node-1:
    image: qdrant/qdrant:v1.7.4
    environment:
      - QDRANT__CLUSTER__ENABLED=true
      - QDRANT__CLUSTER__P2P__PORT=6335
      - QDRANT__CLUSTER__CONSENSUS__TICK_PERIOD_MS=100
    volumes:
      - qdrant_node_1:/qdrant/storage
    ports:
      - "6333:6333"
      - "6335:6335"

  qdrant-node-2:
    image: qdrant/qdrant:v1.7.4
    environment:
      - QDRANT__CLUSTER__ENABLED=true
      - QDRANT__CLUSTER__P2P__PORT=6335
      - QDRANT__CLUSTER__BOOTSTRAP__PEER_ADDRS=qdrant-node-1:6335
    volumes:
      - qdrant_node_2:/qdrant/storage
    ports:
      - "6334:6333"
      - "6336:6335"
    depends_on:
      - qdrant-node-1

  qdrant-node-3:
    image: qdrant/qdrant:v1.7.4
    environment:
      - QDRANT__CLUSTER__ENABLED=true
      - QDRANT__CLUSTER__P2P__PORT=6335
      - QDRANT__CLUSTER__BOOTSTRAP__PEER_ADDRS=qdrant-node-1:6335
    volumes:
      - qdrant_node_3:/qdrant/storage
    ports:
      - "6337:6333"
      - "6338:6335"
    depends_on:
      - qdrant-node-1

volumes:
  qdrant_node_1:
  qdrant_node_2:
  qdrant_node_3:

Performance Configuration

# qdrant/config.yaml
service:
  http_port: 6333
  grpc_port: 6334
  enable_cors: true

storage:
  # Storage performance settings
  performance:
    max_search_threads: 4
    max_optimization_threads: 2
  
  # Memory settings
  memory:
    # Use 70% of available memory for vectors
    vectors_memory_threshold: 0.7
    # Use 30% for payload
    payload_memory_threshold: 0.3

  # Disk settings
  disk:
    # Enable memory mapping for better performance
    mmap_threshold: 1000000
    # Optimize for SSD
    optimize_for_ssd: true

cluster:
  enabled: true
  p2p:
    port: 6335
  consensus:
    # Faster consensus for production
    tick_period_ms: 100
    bootstrap_timeout_sec: 30

Environment Configuration

Production Environment Variables

# Core settings
NODE_ENV=production
LOG_LEVEL=warn
PORT=3000

# Response optimization
RESPONSE_OPTIMIZATION_ENABLED=true
RESPONSE_TOKEN_LIMIT=500
RESPONSE_CACHE_ENABLED=true
RESPONSE_CACHE_TTL=300

# Qdrant settings
QDRANT_URL=http://qdrant-cluster:6333
QDRANT_API_KEY=your-production-api-key
QDRANT_COLLECTION=dataproc_responses_prod
QDRANT_AUTO_START=false
QDRANT_VECTOR_SIZE=384
QDRANT_DISTANCE_METRIC=Cosine

# Performance tuning
QDRANT_MAX_POINTS=100000
QDRANT_CLEANUP_ENABLED=true
QDRANT_CLEANUP_INTERVAL=3600000  # 1 hour

# Security
GOOGLE_APPLICATION_CREDENTIALS=/app/config/service-account.json
DATAPROC_CONFIG_PATH=/app/config/production.json

# Monitoring
METRICS_ENABLED=true
HEALTH_CHECK_ENABLED=true

Configuration Files

Production Server Config

{
  "projectId": "my-production-project",
  "region": "us-central1",
  "authentication": {
    "type": "service_account_impersonation",
    "impersonateServiceAccount": "dataproc-worker@your-project.iam.gserviceaccount.com",
    "fallbackKeyPath": "/app/config/source-key.json"
  },
  "responseOptimization": {
    "enabled": true,
    "tokenLimit": 500,
    "cacheEnabled": true,
    "cacheTtl": 300
  },
  "qdrant": {
    "url": "http://qdrant-cluster:6333",
    "collection": "dataproc_responses_prod",
    "vectorSize": 384,
    "distanceMetric": "Cosine",
    "maxPoints": 100000,
    "cleanupEnabled": true,
    "cleanupInterval": 3600000
  },
  "performance": {
    "maxConcurrentRequests": 100,
    "requestTimeout": 30000,
    "retryAttempts": 3,
    "retryDelay": 1000
  },
  "monitoring": {
    "metricsEnabled": true,
    "healthCheckEnabled": true,
    "logLevel": "warn"
  }
}

Performance Tuning

1. Memory Optimization

# Node.js memory settings
NODE_OPTIONS="--max-old-space-size=512 --max-semi-space-size=64"

# Qdrant memory settings
QDRANT__STORAGE__MEMORY__VECTORS_MEMORY_THRESHOLD=0.7
QDRANT__STORAGE__MEMORY__PAYLOAD_MEMORY_THRESHOLD=0.3

2. CPU Optimization

# Docker resource limits
deploy:
  resources:
    limits:
      cpus: '1.0'
      memory: 1G
    reservations:
      cpus: '0.5'
      memory: 512M

3. Network Optimization

# nginx.conf
upstream dataproc_mcp {
    least_conn;
    server dataproc-mcp-1:3000 max_fails=3 fail_timeout=30s;
    server dataproc-mcp-2:3000 max_fails=3 fail_timeout=30s;
    server dataproc-mcp-3:3000 max_fails=3 fail_timeout=30s;
}

server {
    listen 80;
    server_name dataproc-mcp.example.com;

    # Connection pooling
    keepalive_timeout 65;
    keepalive_requests 100;

    # Compression
    gzip on;
    gzip_types application/json text/plain;

    # Rate limiting
    limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
    limit_req zone=api burst=20 nodelay;

    location / {
        proxy_pass http://dataproc_mcp;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        
        # Timeouts
        proxy_connect_timeout 5s;
        proxy_send_timeout 30s;
        proxy_read_timeout 30s;
    }
}

Monitoring and Alerting

1. Health Checks

// Health check endpoints
GET /health      // Basic health check
GET /ready       // Readiness check
GET /metrics     // Prometheus metrics

2. Prometheus Metrics

# prometheus.yml
global:
  scrape_interval: 15s

scrape_configs:
  - job_name: 'dataproc-mcp'
    static_configs:
      - targets: ['dataproc-mcp:3000']
    metrics_path: '/metrics'
    scrape_interval: 30s

  - job_name: 'qdrant'
    static_configs:
      - targets: ['qdrant:6333']
    metrics_path: '/metrics'
    scrape_interval: 30s

3. Grafana Dashboard

{
  "dashboard": {
    "title": "Dataproc MCP Server",
    "panels": [
      {
        "title": "Response Optimization Metrics",
        "targets": [
          {
            "expr": "rate(response_optimization_tokens_saved_total[5m])",
            "legendFormat": "Tokens Saved/sec"
          },
          {
            "expr": "response_optimization_reduction_percentage",
            "legendFormat": "Reduction %"
          }
        ]
      },
      {
        "title": "Qdrant Performance",
        "targets": [
          {
            "expr": "rate(qdrant_collections_points_total[5m])",
            "legendFormat": "Points/sec"
          },
          {
            "expr": "qdrant_collections_vectors_count",
            "legendFormat": "Total Vectors"
          }
        ]
      }
    ]
  }
}

4. Alerting Rules

# alerts.yml
groups:
  - name: dataproc-mcp
    rules:
      - alert: HighErrorRate
        expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "High error rate detected"

      - alert: QdrantDown
        expr: up{job="qdrant"} == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "Qdrant is down"

      - alert: OptimizationDisabled
        expr: response_optimization_enabled == 0
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Response optimization is disabled"

Security

1. Network Security

# Network policies for Kubernetes
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: dataproc-mcp-policy
  namespace: dataproc-mcp
spec:
  podSelector:
    matchLabels:
      app: dataproc-mcp
  policyTypes:
  - Ingress
  - Egress
  ingress:
  - from:
    - namespaceSelector:
        matchLabels:
          name: ingress-nginx
    ports:
    - protocol: TCP
      port: 3000
  egress:
  - to:
    - podSelector:
        matchLabels:
          app: qdrant
    ports:
    - protocol: TCP
      port: 6333
  - to: []
    ports:
    - protocol: TCP
      port: 443  # HTTPS to Google Cloud APIs

2. Secret Management

# Kubernetes secrets
apiVersion: v1
kind: Secret
metadata:
  name: dataproc-mcp-secrets
  namespace: dataproc-mcp
type: Opaque
data:
  service-account.json: <base64-encoded-service-account>
  qdrant-api-key: <base64-encoded-api-key>

---
# Mount secrets in deployment
spec:
  template:
    spec:
      containers:
      - name: dataproc-mcp
        volumeMounts:
        - name: secrets
          mountPath: /app/secrets
          readOnly: true
      volumes:
      - name: secrets
        secret:
          secretName: dataproc-mcp-secrets

Scaling

1. Horizontal Scaling

# Horizontal Pod Autoscaler
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: dataproc-mcp-hpa
  namespace: dataproc-mcp
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: dataproc-mcp
  minReplicas: 3
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80

2. Vertical Scaling

# Vertical Pod Autoscaler
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
  name: dataproc-mcp-vpa
  namespace: dataproc-mcp
spec:
  targetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: dataproc-mcp
  updatePolicy:
    updateMode: "Auto"
  resourcePolicy:
    containerPolicies:
    - containerName: dataproc-mcp
      maxAllowed:
        cpu: 2
        memory: 2Gi
      minAllowed:
        cpu: 100m
        memory: 128Mi

Backup and Recovery

1. Qdrant Backup

#!/bin/bash
# backup-qdrant.sh

BACKUP_DIR="/backups/qdrant/$(date +%Y%m%d_%H%M%S)"
QDRANT_URL="http://qdrant:6333"
COLLECTION="dataproc_responses_prod"

# Create snapshot
curl -X POST "${QDRANT_URL}/collections/${COLLECTION}/snapshots"

# Download snapshot
SNAPSHOT_NAME=$(curl -s "${QDRANT_URL}/collections/${COLLECTION}/snapshots" | jq -r '.result[-1].name')
curl -o "${BACKUP_DIR}/${SNAPSHOT_NAME}" "${QDRANT_URL}/collections/${COLLECTION}/snapshots/${SNAPSHOT_NAME}"

# Cleanup old snapshots (keep last 7 days)
find /backups/qdrant -type d -mtime +7 -exec rm -rf {} \;

2. Configuration Backup

#!/bin/bash
# backup-config.sh

BACKUP_DIR="/backups/config/$(date +%Y%m%d_%H%M%S)"
mkdir -p "${BACKUP_DIR}"

# Backup configurations
cp -r /app/config "${BACKUP_DIR}/"
cp -r /app/profiles "${BACKUP_DIR}/"

# Backup Kubernetes manifests
kubectl get all -n dataproc-mcp -o yaml > "${BACKUP_DIR}/k8s-manifests.yaml"

Troubleshooting

Common Production Issues

  1. High Memory Usage

    • Check Qdrant vector count
    • Enable cleanup policies
    • Reduce vector size
  2. Slow Response Times

    • Monitor Qdrant performance
    • Check network latency
    • Scale horizontally
  3. Optimization Not Working

    • Verify Qdrant connectivity
    • Check configuration
    • Review logs

Debug Commands

# Check service health
kubectl get pods -n dataproc-mcp
kubectl logs -f deployment/dataproc-mcp -n dataproc-mcp

# Check Qdrant status
curl http://qdrant:6333/health
curl http://qdrant:6333/collections/dataproc_responses_prod

# Performance metrics
curl http://dataproc-mcp:3000/metrics

🏭 Deploy with confidence using production-ready configurations and monitoring!