@@ -360,53 +360,190 @@ jobs:
360360 set -e # Re-enable exit on error
361361 echo "First run exit code: $FIRST_RUN_EXIT_CODE"
362362
363- # Check for failed tests (including panics) and rerun them once
364- if [ "$FIRST_RUN_EXIT_CODE" -ne 0 ]; then
365- echo "Some tests failed or panicked, checking failure type..."
363+ # ============================================================
364+ # BULLETPROOF RETRY LOGIC - Handles ALL failure scenarios
365+ # ============================================================
366+ # Retries: Up to 2 retries (3 total attempts)
367+ # Detects: FAIL, panic, API errors, auth failures, timeouts
368+ # ============================================================
369+
370+ extract_failed_tests() {
371+ local LOG_FILE="$1"
372+ local FAILED_TESTS=""
373+
374+ echo "=== Analyzing log file for failures ==="
375+
376+ # Method 1: Standard "--- FAIL: TestName" pattern
377+ local STANDARD_FAILS=$(grep -E "^--- FAIL:" "$LOG_FILE" 2>/dev/null | \
378+ grep -oE "Test[A-Za-z0-9_]+" | sort -u | tr '\n' ' ' || true)
379+ if [ -n "$STANDARD_FAILS" ]; then
380+ echo " [Method 1] Found via --- FAIL: $STANDARD_FAILS"
381+ FAILED_TESTS="$STANDARD_FAILS"
382+ fi
366383
367- # Check for infrastructure/server failures that shouldn't be retried
368- if grep -qE "Could not reach provided Checkmarx server|connection refused|no such host|timeout exceeded" test_output_${{ matrix.group }}.log; then
369- echo "Infrastructure/server connectivity issue detected. This is not a test logic failure."
370- echo "Failing the job - please check Checkmarx server availability."
371- exit 1
384+ # Method 2: Find tests that panicked (look for === RUN before each panic)
385+ if grep -q "^panic:" "$LOG_FILE" 2>/dev/null; then
386+ echo " [Method 2] Panic detected, finding affected tests..."
387+ # Get all panic line numbers
388+ local PANIC_LINES=$(grep -n "^panic:" "$LOG_FILE" | cut -d: -f1)
389+ for PANIC_LINE in $PANIC_LINES; do
390+ local PANIC_TEST=$(head -n "$PANIC_LINE" "$LOG_FILE" | grep -E "^=== RUN" | tail -1 | grep -oE "Test[A-Za-z0-9_]+" | head -1 || true)
391+ if [ -n "$PANIC_TEST" ]; then
392+ echo " Panic in: $PANIC_TEST"
393+ FAILED_TESTS="$FAILED_TESTS $PANIC_TEST"
394+ fi
395+ done
372396 fi
373397
374- # Extract failed test names from various failure patterns
375- FAILED_TESTS=$(grep -E "^--- FAIL:|panic:.*Test" test_output_${{ matrix.group }}.log | \
376- grep -oE "Test[A-Za-z0-9_]+" | sort -u | tr '\n' '|' | sed 's/|$//')
398+ # Method 3: Find tests with error messages (API errors, auth failures, etc.)
399+ local ERROR_PATTERNS="Authorization failed|Failed showing|Failed creating|Failed getting|error getting|API error|status code: 5[0-9][0-9]"
400+ if grep -qE "$ERROR_PATTERNS" "$LOG_FILE" 2>/dev/null; then
401+ echo " [Method 3] API/Auth errors detected, finding affected tests..."
402+ local ERROR_LINES=$(grep -nE "$ERROR_PATTERNS" "$LOG_FILE" | cut -d: -f1 | head -5)
403+ for ERROR_LINE in $ERROR_LINES; do
404+ local ERROR_TEST=$(head -n "$ERROR_LINE" "$LOG_FILE" | grep -E "^=== RUN" | tail -1 | grep -oE "Test[A-Za-z0-9_]+" | head -1 || true)
405+ if [ -n "$ERROR_TEST" ]; then
406+ echo " Error in: $ERROR_TEST"
407+ FAILED_TESTS="$FAILED_TESTS $ERROR_TEST"
408+ fi
409+ done
410+ fi
377411
412+ # Method 4: Last resort - get the last running test before FAIL
413+ if [ -z "$FAILED_TESTS" ]; then
414+ echo " [Method 4] Using last running test as fallback..."
415+ local LAST_TEST=$(grep -E "^=== RUN" "$LOG_FILE" | tail -1 | grep -oE "Test[A-Za-z0-9_]+" | head -1 || true)
416+ if [ -n "$LAST_TEST" ]; then
417+ echo " Last running: $LAST_TEST"
418+ FAILED_TESTS="$LAST_TEST"
419+ fi
420+ fi
421+
422+ # Clean up: deduplicate and format as pipe-separated for -run flag
378423 if [ -n "$FAILED_TESTS" ]; then
379- echo "Rerunning failed tests: $FAILED_TESTS"
380-
381- # Add a delay before retry to allow any cleanup and server recovery
382- sleep 10
383-
384- go test \
385- -tags integration \
386- -v \
387- -p 1 \
388- -timeout 60m \
389- -run "^($FAILED_TESTS)$" \
390- -coverpkg github.com/checkmarx/ast-cli/internal/commands,github.com/checkmarx/ast-cli/internal/services,github.com/checkmarx/ast-cli/internal/wrappers \
391- -coverprofile cover-${{ matrix.group }}-rerun.out \
392- github.com/checkmarx/ast-cli/test/integration 2>&1 | tee test_output_${{ matrix.group }}_rerun.log
393-
394- RERUN_EXIT_CODE=$?
395- echo "Rerun exit code: $RERUN_EXIT_CODE"
396-
397- if [ "$RERUN_EXIT_CODE" -ne 0 ]; then
398- echo "Tests still failing after retry"
399- exit 1
424+ # Also extract parent test names (for subtests like TestFoo/SubTest -> TestFoo)
425+ local ALL_TESTS=""
426+ for TEST in $FAILED_TESTS; do
427+ ALL_TESTS="$ALL_TESTS $TEST"
428+ # Extract parent test name if this looks like a subtest
429+ local PARENT=$(echo "$TEST" | sed 's/_[^_]*$//' | grep -E "^Test" || true)
430+ if [ -n "$PARENT" ] && [ "$PARENT" != "$TEST" ]; then
431+ ALL_TESTS="$ALL_TESTS $PARENT"
432+ fi
433+ done
434+ FAILED_TESTS=$(echo "$ALL_TESTS" | tr ' ' '\n' | grep -E "^Test" | sort -u | tr '\n' '|' | sed 's/|$//')
435+ fi
436+
437+ echo "$FAILED_TESTS"
438+ }
439+
440+ run_tests_with_retry() {
441+ local PATTERN="$1"
442+ local ATTEMPT="$2"
443+ local MAX_ATTEMPTS="$3"
444+ local LOG_SUFFIX="$4"
445+
446+ echo ""
447+ echo "=========================================="
448+ echo " RETRY ATTEMPT $ATTEMPT of $MAX_ATTEMPTS"
449+ echo " Pattern: $PATTERN"
450+ echo "=========================================="
451+ echo ""
452+
453+ # Wait before retry to allow cleanup and server recovery
454+ if [ "$ATTEMPT" -gt 1 ]; then
455+ local WAIT_TIME=$((ATTEMPT * 15))
456+ echo "Waiting ${WAIT_TIME}s before retry..."
457+ sleep $WAIT_TIME
458+ fi
459+
460+ set +e
461+ go test \
462+ -tags integration \
463+ -v \
464+ -p 1 \
465+ -timeout 60m \
466+ -run "$PATTERN" \
467+ -coverpkg github.com/checkmarx/ast-cli/internal/commands,github.com/checkmarx/ast-cli/internal/services,github.com/checkmarx/ast-cli/internal/wrappers \
468+ -coverprofile cover-${{ matrix.group }}-${LOG_SUFFIX}.out \
469+ github.com/checkmarx/ast-cli/test/integration 2>&1 | tee test_output_${{ matrix.group }}_${LOG_SUFFIX}.log
470+ local EXIT_CODE=${PIPESTATUS[0]}
471+ set -e
472+
473+ return $EXIT_CODE
474+ }
475+
476+ if [ "$FIRST_RUN_EXIT_CODE" -ne 0 ]; then
477+ echo ""
478+ echo "============================================"
479+ echo " FIRST RUN FAILED - Starting retry logic"
480+ echo "============================================"
481+
482+ # Check for hard infrastructure failures that shouldn't be retried
483+ if grep -qE "Could not reach provided Checkmarx server|connection refused|no such host" test_output_${{ matrix.group }}.log; then
484+ echo "::error::Infrastructure failure detected - Checkmarx server unreachable"
485+ echo "This is a server connectivity issue, not a test failure."
486+ exit 1
487+ fi
488+
489+ # Extract failed tests
490+ FAILED_TESTS=$(extract_failed_tests "test_output_${{ matrix.group }}.log")
491+
492+ if [ -z "$FAILED_TESTS" ]; then
493+ echo "::error::Could not identify which tests failed"
494+ echo "Check the log file for details"
495+ exit 1
496+ fi
497+
498+ echo ""
499+ echo "Tests to retry: $FAILED_TESTS"
500+
501+ # Retry loop - up to 2 more attempts
502+ MAX_RETRIES=2
503+ CURRENT_RETRY=1
504+ RETRY_SUCCESS=false
505+
506+ while [ $CURRENT_RETRY -le $MAX_RETRIES ]; do
507+ run_tests_with_retry "^($FAILED_TESTS)$" "$CURRENT_RETRY" "$MAX_RETRIES" "retry${CURRENT_RETRY}"
508+ RETRY_EXIT_CODE=$?
509+
510+ if [ $RETRY_EXIT_CODE -eq 0 ]; then
511+ echo ""
512+ echo "=========================================="
513+ echo " ✅ TESTS PASSED ON RETRY $CURRENT_RETRY"
514+ echo "=========================================="
515+ RETRY_SUCCESS=true
516+ break
400517 else
401- echo "Tests passed on retry!"
402- fi
403- else
404- echo "Could not extract failed test names from log, checking for timeout..."
405- if grep -q "test timed out" test_output_${{ matrix.group }}.log; then
406- echo "Test timed out - this may be a long-running test or infrastructure issue"
518+ echo ""
519+ echo "Retry $CURRENT_RETRY failed with exit code: $RETRY_EXIT_CODE"
520+
521+ # Check if we should continue retrying
522+ if [ $CURRENT_RETRY -lt $MAX_RETRIES ]; then
523+ # Extract any new failures from this retry
524+ NEW_FAILURES=$(extract_failed_tests "test_output_${{ matrix.group }}_retry${CURRENT_RETRY}.log")
525+ if [ -n "$NEW_FAILURES" ]; then
526+ FAILED_TESTS="$NEW_FAILURES"
527+ echo "Updated failed tests for next retry: $FAILED_TESTS"
528+ fi
529+ fi
407530 fi
531+
532+ CURRENT_RETRY=$((CURRENT_RETRY + 1))
533+ done
534+
535+ if [ "$RETRY_SUCCESS" = false ]; then
536+ echo ""
537+ echo "=========================================="
538+ echo " ❌ TESTS FAILED AFTER $MAX_RETRIES RETRIES"
539+ echo "=========================================="
408540 exit 1
409541 fi
542+ else
543+ echo ""
544+ echo "=========================================="
545+ echo " ✅ ALL TESTS PASSED ON FIRST RUN"
546+ echo "=========================================="
410547 fi
411548
412549 - name : Skip notification (no uncovered tests)
0 commit comments