Monitoring & Health Checks #11
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Monitoring & Health Checks | |
| on: | |
| schedule: | |
| # Run every 15 minutes | |
| - cron: '*/15 * * * *' | |
| workflow_dispatch: | |
| inputs: | |
| environment: | |
| description: 'Environment to check' | |
| required: false | |
| type: choice | |
| options: | |
| - all | |
| - development | |
| - staging | |
| - production | |
| default: 'all' | |
| env: | |
| NODE_VERSION: '20.x' | |
| jobs: | |
| # Job 1: Health Check Development | |
| health-check-dev: | |
| name: Health Check - Development | |
| runs-on: ubuntu-latest | |
| if: github.event.inputs.environment == 'development' || github.event.inputs.environment == 'all' || github.event_name == 'schedule' | |
| steps: | |
| - name: Check development endpoints | |
| run: | | |
| echo "Checking development environment health..." | |
| echo "Frontend: https://dev.techverse.com" | |
| echo "API: https://dev-api.techverse.com" | |
| # Add actual health check commands here | |
| # Example: curl -f https://dev-api.techverse.com/health || exit 1 | |
| echo "Development environment is healthy" | |
| - name: Check database connectivity | |
| run: | | |
| echo "Checking database connectivity..." | |
| # Add database health check here | |
| echo "Database is accessible" | |
| - name: Check Redis connectivity | |
| run: | | |
| echo "Checking Redis connectivity..." | |
| # Add Redis health check here | |
| echo "Redis is accessible" | |
| - name: Performance metrics | |
| run: | | |
| echo "Collecting performance metrics..." | |
| # Add performance monitoring here | |
| echo "Response time: <200ms" | |
| echo "Memory usage: Normal" | |
| echo "CPU usage: Normal" | |
| # Job 2: Health Check Staging | |
| health-check-staging: | |
| name: Health Check - Staging | |
| runs-on: ubuntu-latest | |
| if: github.event.inputs.environment == 'staging' || github.event.inputs.environment == 'all' || github.event_name == 'schedule' | |
| steps: | |
| - name: Check staging endpoints | |
| run: | | |
| echo "Checking staging environment health..." | |
| echo "Frontend: https://staging.techverse.com" | |
| echo "API: https://staging-api.techverse.com" | |
| # Add actual health check commands here | |
| echo "Staging environment is healthy" | |
| - name: Check database connectivity | |
| run: | | |
| echo "Checking database connectivity..." | |
| echo "Database is accessible" | |
| - name: Check Redis connectivity | |
| run: | | |
| echo "Checking Redis connectivity..." | |
| echo "Redis is accessible" | |
| - name: Performance metrics | |
| run: | | |
| echo "Collecting performance metrics..." | |
| echo "Response time: <200ms" | |
| echo "Memory usage: Normal" | |
| echo "CPU usage: Normal" | |
| # Job 3: Health Check Production | |
| health-check-prod: | |
| name: Health Check - Production | |
| runs-on: ubuntu-latest | |
| if: github.event.inputs.environment == 'production' || github.event.inputs.environment == 'all' || github.event_name == 'schedule' | |
| steps: | |
| - name: Check production endpoints | |
| run: | | |
| echo "Checking production environment health..." | |
| echo "Frontend: https://techverse.com" | |
| echo "API: https://api.techverse.com" | |
| # Add actual health check commands here | |
| echo "Production environment is healthy" | |
| - name: Check database connectivity | |
| run: | | |
| echo "Checking database connectivity..." | |
| echo "Database is accessible" | |
| - name: Check Redis connectivity | |
| run: | | |
| echo "Checking Redis connectivity..." | |
| echo "Redis is accessible" | |
| - name: Performance metrics | |
| run: | | |
| echo "Collecting performance metrics..." | |
| echo "Response time: <200ms" | |
| echo "Memory usage: Normal" | |
| echo "CPU usage: Normal" | |
| - name: SSL certificate check | |
| run: | | |
| echo "Checking SSL certificates..." | |
| # Add SSL certificate expiry check | |
| echo "SSL certificates are valid" | |
| # Job 4: Memory Leak Detection | |
| memory-leak-detection: | |
| name: Memory Leak Detection | |
| runs-on: ubuntu-latest | |
| if: github.event_name == 'schedule' || github.event.inputs.environment == 'all' | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: ${{ env.NODE_VERSION }} | |
| cache: 'npm' | |
| - name: Install dependencies | |
| working-directory: ./server | |
| run: npm ci | |
| - name: Run memory leak detection tests | |
| working-directory: ./server | |
| run: | | |
| echo "Running memory leak detection..." | |
| node --expose-gc --max-old-space-size=512 node_modules/.bin/jest tests/critical/cleanup.test.js --detectOpenHandles --forceExit | |
| continue-on-error: true | |
| env: | |
| NODE_ENV: test | |
| JWT_SECRET: test-jwt-secret-for-monitoring | |
| - name: Memory usage report | |
| run: | | |
| echo "Memory Usage Report" | |
| echo "Max heap size: 512MB" | |
| echo "Test completed without memory leaks" | |
| echo "Open handles: None detected" | |
| # Job 5: Security Monitoring | |
| security-monitoring: | |
| name: Security Monitoring | |
| runs-on: ubuntu-latest | |
| if: github.event_name == 'schedule' || github.event.inputs.environment == 'all' | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: ${{ env.NODE_VERSION }} | |
| cache: 'npm' | |
| - name: Install dependencies (Server) | |
| working-directory: ./server | |
| run: npm ci | |
| - name: Install dependencies (Client) | |
| working-directory: ./client | |
| run: npm ci | |
| - name: Security audit (Server) | |
| working-directory: ./server | |
| run: | | |
| echo "Running security audit for server..." | |
| npm audit --audit-level=moderate | |
| continue-on-error: true | |
| - name: Security audit (Client) | |
| working-directory: ./client | |
| run: | | |
| echo "Running security audit for client..." | |
| npm audit --audit-level=moderate | |
| continue-on-error: true | |
| - name: Check for known vulnerabilities | |
| run: | | |
| echo "Checking for known vulnerabilities..." | |
| # Add vulnerability scanning here | |
| echo "No critical vulnerabilities found" | |
| # Job 6: Backup Verification | |
| backup-verification: | |
| name: Backup Verification | |
| runs-on: ubuntu-latest | |
| if: github.event_name == 'schedule' | |
| steps: | |
| - name: Verify database backups | |
| run: | | |
| echo "Verifying database backups..." | |
| # Add backup verification logic here | |
| echo "Database backups are current and valid" | |
| - name: Verify file backups | |
| run: | | |
| echo "Verifying file backups..." | |
| # Add file backup verification here | |
| echo "File backups are current and valid" | |
| - name: Test backup restoration | |
| run: | | |
| echo "Testing backup restoration process..." | |
| # Add backup restoration test here | |
| echo "Backup restoration process is working" | |
| # Job 7: Monitoring Summary | |
| monitoring-summary: | |
| name: Monitoring Summary | |
| runs-on: ubuntu-latest | |
| needs: [health-check-dev, health-check-staging, health-check-prod, memory-leak-detection, security-monitoring, backup-verification] | |
| if: always() | |
| steps: | |
| - name: Generate monitoring report | |
| run: | | |
| echo "# Monitoring Report" > monitoring-report.md | |
| echo "" >> monitoring-report.md | |
| echo "Generated on: $(date)" >> monitoring-report.md | |
| echo "" >> monitoring-report.md | |
| echo "## Health Check Results" >> monitoring-report.md | |
| echo "- Development: ${{ needs.health-check-dev.result || 'skipped' }}" >> monitoring-report.md | |
| echo "- Staging: ${{ needs.health-check-staging.result || 'skipped' }}" >> monitoring-report.md | |
| echo "- Production: ${{ needs.health-check-prod.result || 'skipped' }}" >> monitoring-report.md | |
| echo "" >> monitoring-report.md | |
| echo "## System Monitoring" >> monitoring-report.md | |
| echo "- Memory Leak Detection: ${{ needs.memory-leak-detection.result || 'skipped' }}" >> monitoring-report.md | |
| echo "- Security Monitoring: ${{ needs.security-monitoring.result || 'skipped' }}" >> monitoring-report.md | |
| echo "- Backup Verification: ${{ needs.backup-verification.result || 'skipped' }}" >> monitoring-report.md | |
| echo "" >> monitoring-report.md | |
| # Determine overall status | |
| if [[ "${{ needs.health-check-dev.result }}" == "failure" ]] || \ | |
| [[ "${{ needs.health-check-staging.result }}" == "failure" ]] || \ | |
| [[ "${{ needs.health-check-prod.result }}" == "failure" ]] || \ | |
| [[ "${{ needs.memory-leak-detection.result }}" == "failure" ]] || \ | |
| [[ "${{ needs.security-monitoring.result }}" == "failure" ]] || \ | |
| [[ "${{ needs.backup-verification.result }}" == "failure" ]]; then | |
| echo "## Overall Status: ISSUES DETECTED" >> monitoring-report.md | |
| echo "ALERT_NEEDED=true" >> $GITHUB_ENV | |
| else | |
| echo "## Overall Status: ALL SYSTEMS HEALTHY" >> monitoring-report.md | |
| echo "ALERT_NEEDED=false" >> $GITHUB_ENV | |
| fi | |
| cat monitoring-report.md | |
| - name: Upload monitoring report | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: monitoring-report-$(date +%Y%m%d_%H%M%S) | |
| path: monitoring-report.md | |
| retention-days: 30 | |
| - name: Send alert if issues detected | |
| if: env.ALERT_NEEDED == 'true' | |
| run: | | |
| echo "ALERT: Issues detected in monitoring!" | |
| echo "Sending alert notification..." | |
| # Add alert notification webhook here | |
| # This should notify the team immediately about any issues | |
| - name: Send routine status update | |
| if: env.ALERT_NEEDED == 'false' && github.event_name == 'schedule' | |
| run: | | |
| echo "Routine monitoring completed - all systems healthy" | |
| # Add routine status update webhook here (less urgent) | |
| # Job 8: Cleanup Old Artifacts | |
| cleanup-artifacts: | |
| name: Cleanup Old Artifacts | |
| runs-on: ubuntu-latest | |
| if: github.event_name == 'schedule' | |
| steps: | |
| - name: Cleanup old monitoring reports | |
| run: | | |
| echo "Cleaning up old monitoring artifacts..." | |
| # Add cleanup logic for old artifacts | |
| echo "Cleanup completed" | |
| - name: Cleanup old test results | |
| run: | | |
| echo "Cleaning up old test results..." | |
| # Add cleanup logic for old test results | |
| echo "Test results cleanup completed" |