Add replay test in ci

SplotyCode · SplotyCode · commit ab87add631db · 2026-01-04T16:59:18.000+01:00
diff --git a/engine/build.gradle.kts b/engine/build.gradle.kts
@@ -9,4 +9,7 @@ dependencies {
     implementation(libs.ktor.client.java)
     implementation(libs.ktor.client.content.negotiation)
     implementation(libs.ktor.serialization.json)
+    testImplementation(kotlin("test"))
+    testImplementation(libs.kotlinx.serialization.json)
+    testImplementation(libs.kotlinx.coroutines.core)
 }
diff --git a/engine/src/test/kotlin/de/tuda/stg/securecoder/engine/workflow/EngineLlmReplayTests.kt b/engine/src/test/kotlin/de/tuda/stg/securecoder/engine/workflow/EngineLlmReplayTests.kt
@@ -6,18 +6,25 @@ import de.tuda.stg.securecoder.engine.stream.StreamEvent
 import de.tuda.stg.securecoder.enricher.PromptEnricher
 import de.tuda.stg.securecoder.filesystem.InMemoryFileSystem
 import kotlinx.coroutines.runBlocking
+import kotlinx.serialization.ExperimentalSerializationApi
 import kotlinx.serialization.encodeToString
 import kotlinx.serialization.json.Json
+import kotlinx.serialization.json.decodeFromStream
 import kotlin.test.Test
 import java.nio.file.Files
 import java.nio.file.Path
+import kotlin.test.Ignore
+import kotlin.test.assertIs
+import kotlin.test.assertTrue
 
 class EngineLlmReplayTests {
     private val json = Json { prettyPrint = true; ignoreUnknownKeys = true; encodeDefaults = true }
-    private val logsPath: Path = Path.of("build", "llm_logs", "log.json")
+    private val resourceName = "llm_output.json"
 
     @Test
+    @Ignore
     fun generator_collects_real_llm_responses() = runBlocking {
+        val logsPath: Path = Path.of("src", "test", "resources", resourceName)
         Files.createDirectories(logsPath.parent)
 
         val prompts = listOf(
@@ -27,11 +34,11 @@ class EngineLlmReplayTests {
 
         val models = buildList {
             val apiKey = System.getenv("API_KEy") ?: "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"
-            add("OR:llama3.2:latest" to OpenRouterClient(apiKey, "meta-llama/llama-3.2-3b-instruct"))
-            add("OR:gpt-oss:20b" to OpenRouterClient(apiKey, "openai/gpt-oss-20b"))
-            //val olBase = System.getenv("OLLAMA_URL") ?: "http://127.0.0.1:11434"
-            //add("ollama:llama3.2:latest" to OllamaClient("llama3.2:latest", baseUrl = olBase))
-            //add("ollama:gpt-oss:20b" to OllamaClient("gpt-oss:20b", baseUrl = olBase))
+            //add("OR:llama3.2:latest" to OpenRouterClient(apiKey, "meta-llama/llama-3.2-3b-instruct"))
+            //add("OR:gpt-oss:20b" to OpenRouterClient(apiKey, "openai/gpt-oss-20b"))
+            val olBase = System.getenv("OLLAMA_URL") ?: "http://127.0.0.1:11434"
+            add("ollama:llama3.2:latest" to OllamaClient("llama3.2:latest", baseUrl = olBase))
+            add("ollama:gpt-oss:20b" to OllamaClient("gpt-oss:20b", baseUrl = olBase))
         }
 
         val runs = mutableListOf<LoggedRun>()
@@ -81,12 +88,11 @@ class EngineLlmReplayTests {
     }
 
     @Test
-    fun replay_test_uses_recorded_responses_and_counts_success() = runBlocking {
-        if (!Files.exists(logsPath)) {
-            println("No log file at $logsPath; nothing to replay. Test will be a no-op.")
-            return@runBlocking
-        }
-        val suite = json.decodeFromString<LoggedSuite>(Files.readString(logsPath))
+    @OptIn(ExperimentalSerializationApi::class)
+    fun test_replay() = runBlocking {
+        val resourceStream = this@EngineLlmReplayTests::class.java.classLoader.getResourceAsStream(resourceName)
+            ?: throw IllegalStateException("No $resourceName on classpath")
+        val suite: LoggedSuite = resourceStream.use { json.decodeFromStream(it) }
 
         data class Group(
             val modelName: String,
@@ -95,7 +101,7 @@ class EngineLlmReplayTests {
             var successes: Int  = 0,
             var parseFails: Int  = 0
         )
-
+        assertTrue(suite.runs.isNotEmpty())
         suite.runs
             .groupBy { it.modelName to it.promptKind }
             .map { (key, runs) ->
@@ -112,28 +118,22 @@ class EngineLlmReplayTests {
                         guardians = emptyList(),
                     )
                     group.total++
-                    var l = 0
                     val result = engine.run(run.enginePrompt, fs, onEvent = {
                         if (it !is StreamEvent.InvalidLlmOutputWarning) return@run
                         group.parseFails++
-                        if (l++ >= 2) {
-                            println("=======ERROR=======")
-                            println("=======ERROR=======")
-                            println("=======ERROR=======")
-                            println(it.parseErrors.joinToString("\n"))
-                            println()
-                            println()
-                            println(it.chatExchange.output)
-                        }
+                        //println(it.parseErrors.joinToString("\n"))
+                        //println(it.chatExchange.output)
                     })
                     if (result is EngineResult.Success) {
                         group.successes++
                     }
+                    assertIs<EngineResult.Success>(result)
                 }
                 group
             }
             .forEach {
-                println("Group ${it.modelName} / ${it.promptKind}: replayed runs: ${it.total}, successes: ${it.successes} (${it.parseFails} parse failures)")
+                assertTrue(it.parseFails <= (1.5 * suite.runs.size))
+                //println("Group ${it.modelName} / ${it.promptKind}: replayed runs: ${it.total}, successes: ${it.successes} (${it.parseFails} parse failures)")
             }
     }
 }
diff --git a/engine/src/test/resources/llm_output.json b/engine/src/test/resources/llm_output.json

Original file line number	Diff line number	Diff line change
`@@ -9,4 +9,7 @@ dependencies {`
`9`	`9`	`implementation(libs.ktor.client.java)`
`10`	`10`	`implementation(libs.ktor.client.content.negotiation)`
`11`	`11`	`implementation(libs.ktor.serialization.json)`
	`12`	`+ testImplementation(kotlin("test"))`
	`13`	`+ testImplementation(libs.kotlinx.serialization.json)`
	`14`	`+ testImplementation(libs.kotlinx.coroutines.core)`
`12`	`15`	`}`