Skip to content

Commit 68e262c

Browse files
committed
main-loop: gate screenshot mission completion on artifact evidence
1 parent 8f05d27 commit 68e262c

File tree

3 files changed

+107
-1
lines changed

3 files changed

+107
-1
lines changed

src/lib/server/main-agent-loop.ts

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,19 @@ const MEMORY_NOTE_MIN_INTERVAL_MS = 90 * 60 * 1000
1818
const DEFAULT_FOLLOWUP_DELAY_SEC = 45
1919
const MAX_FOLLOWUP_CHAIN = 6
2020
const META_LINE_RE = /\[MAIN_LOOP_META\]\s*(\{[^\n]*\})/i
21+
const SCREENSHOT_GOAL_HINT = /\b(screenshot|screen shot|snapshot|capture)\b/i
22+
const DELIVERY_GOAL_HINT = /\b(send|deliver|return|share|upload|post|message)\b/i
23+
const SCHEDULE_GOAL_HINT = /\b(schedule|scheduled|every\s+\w+|interval|cron|recurr)\b/i
24+
const UPLOAD_ARTIFACT_HINT = /(?:sandbox:)?\/api\/uploads\/[^\s)\]]+|https?:\/\/[^\s)\]]+\.(?:png|jpe?g|webp|gif|pdf)\b/i
25+
const SENT_ARTIFACT_HINT = /\b(sent|shared|uploaded|returned)\b[^.]*\b(screenshot|snapshot|image|file)\b/i
26+
27+
interface MainLoopSessionMessageLike {
28+
text?: string
29+
}
30+
31+
interface MainLoopSessionEvidenceLike {
32+
messages?: MainLoopSessionMessageLike[]
33+
}
2134

2235
export interface MainLoopEvent {
2336
id: string
@@ -425,6 +438,39 @@ function buildGoalContractLines(state: MainLoopState): string[] {
425438
return lines
426439
}
427440

441+
function missionNeedsScreenshotArtifactEvidence(state: MainLoopState): boolean {
442+
const haystack = [
443+
state.goal || '',
444+
state.goalContract?.objective || '',
445+
state.goalContract?.successMetric || '',
446+
state.nextAction || '',
447+
...(state.planSteps || []),
448+
state.currentPlanStep || '',
449+
].join(' ')
450+
if (!SCREENSHOT_GOAL_HINT.test(haystack)) return false
451+
return DELIVERY_GOAL_HINT.test(haystack) || SCHEDULE_GOAL_HINT.test(haystack)
452+
}
453+
454+
function missionHasScreenshotArtifactEvidence(session: MainLoopSessionEvidenceLike | null | undefined, state: MainLoopState, additionalText = ''): boolean {
455+
const candidates: string[] = [
456+
state.summary || '',
457+
additionalText || '',
458+
]
459+
if (Array.isArray(session?.messages)) {
460+
for (let i = session.messages.length - 1; i >= 0 && candidates.length < 16; i--) {
461+
const text = typeof session.messages[i]?.text === 'string' ? session.messages[i].text : ''
462+
if (text.trim()) candidates.push(text)
463+
}
464+
}
465+
return candidates.some((value) => UPLOAD_ARTIFACT_HINT.test(value) || SENT_ARTIFACT_HINT.test(value))
466+
}
467+
468+
function getMissionCompletionGateReason(session: MainLoopSessionEvidenceLike | null | undefined, state: MainLoopState, additionalText = ''): string | null {
469+
if (!missionNeedsScreenshotArtifactEvidence(state)) return null
470+
if (missionHasScreenshotArtifactEvidence(session, state, additionalText)) return null
471+
return 'Mission requires screenshot artifact evidence (upload link or explicit sent screenshot confirmation) before completion.'
472+
}
473+
428474
function upsertMissionTask(session: any, state: MainLoopState, now: number): string | null {
429475
if (!state.goal) return state.missionTaskId || null
430476

@@ -445,7 +491,11 @@ function upsertMissionTask(session: any, state: MainLoopState, now: number): str
445491
blocked: 'failed',
446492
ok: 'completed',
447493
} as const
448-
const mappedStatus = statusMap[state.status]
494+
let mappedStatus = statusMap[state.status]
495+
const completionGateReason = mappedStatus === 'completed'
496+
? getMissionCompletionGateReason(session, state)
497+
: null
498+
if (completionGateReason) mappedStatus = 'running'
449499

450500
let changed = false
451501
const contractLines = buildGoalContractLines(state)
@@ -459,6 +509,7 @@ function upsertMissionTask(session: any, state: MainLoopState, now: number): str
459509
'Autonomous mission goal tracked from main loop.',
460510
`Goal: ${state.goal}`,
461511
state.nextAction ? `Next action: ${state.nextAction}` : '',
512+
completionGateReason ? `Completion gate: ${completionGateReason}` : '',
462513
...contractLines,
463514
...planLines,
464515
].filter(Boolean).join('\n')
@@ -604,6 +655,7 @@ function buildFollowupPrompt(state: MainLoopState, opts?: { hasMemoryTool?: bool
604655
? 'Use memory_tool actively: recall relevant prior notes before acting, and store a concise note after each meaningful step.'
605656
: 'memory_tool is unavailable in this session. Keep concise progress summaries in your status/meta output.',
606657
'If you are blocked by missing credentials, permissions, or policy limits, say exactly what is blocked and the smallest unblock needed.',
658+
'For screenshot/image delivery goals (including scheduled captures), do not report status "ok" until a real artifact exists (upload link or explicit sent-file confirmation).',
607659
'If no meaningful action remains right now, reply exactly HEARTBEAT_OK.',
608660
'Otherwise include a concise human update, then append exactly one [MAIN_LOOP_META] JSON line.',
609661
'Optionally append one [MAIN_LOOP_PLAN] JSON line when you create/revise a plan.',
@@ -657,6 +709,7 @@ export function buildMainLoopHeartbeatPrompt(session: any, fallbackPrompt: strin
657709
? 'Use memory_tool actively: recall relevant prior notes before acting, and store concise notes about progress, constraints, and next step after each meaningful action.'
658710
: 'If memory_tool is unavailable, keep concise state in summary/next_action and continue execution.',
659711
'Use a planner-executor-review loop: keep a concrete step plan, execute one meaningful step, then self-review and either continue or re-plan.',
712+
'For screenshot/image delivery goals (including scheduled captures), do not report status "ok" until a real artifact exists (upload link or explicit sent-file confirmation).',
660713
'If nothing important changed and no action is needed now, reply exactly HEARTBEAT_OK.',
661714
'Otherwise: provide a concise human-readable update, then append exactly one [MAIN_LOOP_META] JSON line.',
662715
'Optionally append one [MAIN_LOOP_PLAN] JSON line when creating/updating plan steps.',
@@ -937,6 +990,19 @@ export function handleMainLoopRunResult(input: HandleMainLoopRunResultInput): Ma
937990
}
938991
}
939992

993+
if (input.internal && state.status === 'ok') {
994+
const completionGateReason = getMissionCompletionGateReason(session, state, input.resultText || '')
995+
if (completionGateReason) {
996+
state.status = 'progress'
997+
if (!state.nextAction || /^no queued action/i.test(state.nextAction)) {
998+
state.nextAction = 'Wait for the next schedule run and verify a screenshot artifact link is delivered.'
999+
}
1000+
appendEvent(state, 'completion_gate', completionGateReason, now)
1001+
appendTimeline(state, 'completion_gate', 'Holding completion until screenshot artifact evidence is observed.', now, state.status)
1002+
appendWorkingMemoryNote(state, `gate:${toOneLine(completionGateReason, 180)}`)
1003+
}
1004+
}
1005+
9401006
state.missionTaskId = upsertMissionTask(session, state, now)
9411007
const shouldWritePeriodicMemory = !!state.summary && state.status === 'progress'
9421008
maybeStoreMissionMemoryNote(session, state, now, input.source, forceMemoryNote || shouldWritePeriodicMemory)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import assert from 'node:assert/strict'
2+
import { test } from 'node:test'
3+
import { validateTaskCompletion } from './task-validation.ts'
4+
import type { BoardTask } from '@/types'
5+
6+
test('validateTaskCompletion fails screenshot delivery tasks without artifact evidence', () => {
7+
const validation = validateTaskCompletion({
8+
title: 'Take screenshot and send it every minute',
9+
description: 'Schedule a screenshot capture and deliver it to the user.',
10+
result: 'Existing schedule verified for taking screenshots every minute. Waiting for next run.',
11+
error: null,
12+
} as Partial<BoardTask>)
13+
14+
assert.equal(validation.ok, false)
15+
assert.ok(validation.reasons.some((reason) => reason.includes('Screenshot delivery task is missing artifact evidence')))
16+
})
17+
18+
test('validateTaskCompletion accepts screenshot delivery tasks with upload artifact evidence', () => {
19+
const validation = validateTaskCompletion({
20+
title: 'Take screenshot and send it',
21+
description: 'Capture Wikipedia and return the file to the user.',
22+
result: 'Captured and sent screenshot successfully: sandbox:/api/uploads/1234-wikipedia.png',
23+
error: null,
24+
} as Partial<BoardTask>)
25+
26+
assert.equal(validation.ok, true)
27+
})

src/lib/server/task-validation.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ const WEAK_RESULT_PATTERNS: RegExp[] = [
2525

2626
const IMPLEMENTATION_HINT = /\b(add|build|create|fix|implement|integrat|refactor|update|write)\b/i
2727
const EXECUTION_EVIDENCE = /\b(changed|updated|added|modified|files?|commands?|tests?|build|lint|typecheck|verified|report)\b/i
28+
const SCREENSHOT_HINT = /\b(screenshot|screen shot|snapshot|capture)\b/i
29+
const DELIVERY_HINT = /\b(send|deliver|return|share|upload|post|message)\b/i
30+
const SCREENSHOT_ARTIFACT_HINT = /(?:sandbox:)?\/api\/uploads\/[^\s)\]]+|https?:\/\/[^\s)\]]+\.(?:png|jpe?g|webp|gif|pdf)\b/i
31+
const SENT_SCREENSHOT_HINT = /\b(sent|shared|uploaded|returned)\b[^.]*\b(screenshot|snapshot|image)\b/i
2832

2933
function normalizeText(value: unknown): string {
3034
if (typeof value !== 'string') return ''
@@ -65,6 +69,15 @@ export function validateTaskCompletion(
6569
}
6670
}
6771

72+
const screenshotTask = SCREENSHOT_HINT.test(title) || SCREENSHOT_HINT.test(description)
73+
const screenshotDeliveryTask = screenshotTask && (DELIVERY_HINT.test(title) || DELIVERY_HINT.test(description))
74+
if (screenshotDeliveryTask) {
75+
const hasScreenshotArtifact = SCREENSHOT_ARTIFACT_HINT.test(result) || SENT_SCREENSHOT_HINT.test(result)
76+
if (!hasScreenshotArtifact) {
77+
reasons.push('Screenshot delivery task is missing artifact evidence (upload link or explicit sent screenshot confirmation).')
78+
}
79+
}
80+
6881
return {
6982
ok: reasons.length === 0,
7083
reasons,

0 commit comments

Comments
 (0)