Skip to content
This repository was archived by the owner on Jan 9, 2026. It is now read-only.

Commit 023f7e3

Browse files
fix(processor): make log loading and code searching more robust (#23)
* fix(processor.ts): support ISO8601 timestamps 1. Added new regex patterns to match: - ISO8601 timestamp with log level and module path: 2025-04-19T17:49:01.282995Z INFO boomerang_builder::env::build: ... - Rust/Go-style logs with field labels: 2025-04-19T15:04:32.431Z [INFO] server=web ... 2. Enhanced the parseLogLine method with: - Heuristic fallback parsing when no pattern matches - Better timestamp and severity detection - Module/service name extraction 3. Improved the ExtensibleLogParser class with: - Better content validation - Explicit fallback to PlainTextLogParser when no dedicated parser works - More informative logging for diagnostic purposes These changes make the parser more flexible and able to handle a wider variety of log formats without tight coupling to any specific format. * Make code location finding more robust with following changes: - Use Clause Sonnet for code analysis as well - Don't send all logs while doing stack analysis to avoid overflowing context window - Added fallback strategy for matching LLM-generated static strings. In some cases, the LLM returns static strings that still include dynamic components (e.g., variables, IDs, or object wrappers). These are often found at the beginning or end of the predicted string. To improve robustness, we’ve introduced a recursive fallback strategy: If an exact match for the static string fails, we progressively strip words from the start and end of the string until a match is found. --------- Co-authored-by: Priyank Chodisetti <priyank.ch@gmail.com>
1 parent a69e97e commit 023f7e3

File tree

3 files changed

+361
-82
lines changed

3 files changed

+361
-82
lines changed

src/claudeService.ts

Lines changed: 82 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,8 @@ export class ClaudeService {
3030
private apiKey: string | undefined;
3131
private apiEndpoint: string = 'https://api.anthropic.com/v1/messages';
3232

33-
// Use different models for different tasks
34-
private analysisModel: string = 'claude-3-haiku-20240307'; // Fast model for simple analysis
35-
private callerModel: string = 'claude-3-7-sonnet-20250219'; // Use Sonnet for better balance of speed and quality
33+
// Use a single model for all tasks since we're using the same one
34+
private model: string = 'claude-3-7-sonnet-20250219';
3635

3736
private constructor() {
3837
// Load API key from workspace state if available
@@ -52,13 +51,13 @@ export class ClaudeService {
5251
await vscode.workspace.getConfiguration('traceback').update('claudeApiKey', key, true);
5352
}
5453

55-
public async analyzeLog(logMessage: string): Promise<LLMLogAnalysis> {
54+
public async analyzeLog(logMessage: string, language: string): Promise<LLMLogAnalysis> {
5655
if (!this.apiKey) {
5756
throw new Error('Claude API key not set. Please set your API key first.');
5857
}
5958

6059
try {
61-
const response = await this.callClaude(logMessage);
60+
const response = await this.callClaude(logMessage, language);
6261
return response;
6362
} catch (error) {
6463
console.error('Error calling Claude API:', error);
@@ -90,7 +89,7 @@ export class ClaudeService {
9089
}
9190
}
9291

93-
private async callClaude(logMessage: string): Promise<LLMLogAnalysis> {
92+
private async callClaude(logMessage: string, language: string): Promise<LLMLogAnalysis> {
9493
const tools = [{
9594
name: "analyze_log",
9695
description: "Analyze a log message to extract static search string and variables",
@@ -115,44 +114,29 @@ export class ClaudeService {
115114
messages: [{
116115
role: 'user',
117116
content: `Analyze this log message and extract:
118-
1. The static prefix or template part that would be in the source code
117+
1. Think and infer a possibly longest static substring that can be searched in the code base.
119118
2. Key-value pairs of any variables or dynamic values in the log
120119
121120
Log message: "${logMessage}"
122121
123122
Rules for static search string:
124-
- Only include text that is guaranteed to be constant in the source code
125-
- Do NOT include any key-value pair formatting or variable values
126-
- Do NOT include log level indicators like [INFO], [DEBUG], [ERROR], etc. - they often aren't in the source code
127-
- Do NOT include timestamps or date formats as they are typically generated at runtime
128-
- When in doubt, be conservative and include less rather than more
129-
- Focus on the actual message content that would appear in a logging statement
130-
- You MUST return a non-empty static string
123+
- Predicted staticSearchString should be exact substring of logMessage.
124+
- logMessage.substring(staticSearchString) should be true.
125+
- No regular expressions allowed.
131126
132127
Rules for variables:
133128
- Extract all key-value pairs and dynamic values
134129
- Preserve variable names as they appear in the log
135-
- Keep the original data types where clear
136130
137131
Examples:
138132
Input: "[PlaceOrder] user_id=\"3790d414-165b-11f0-8ee4-96dac6adf53a\" user_currency=\"USD\""
139133
Static: "PlaceOrder" (Note: brackets and log level removed)
140134
Variables: {
141135
"user_id": "3790d414-165b-11f0-8ee4-96dac6adf53a",
142136
"user_currency": "USD"
143-
}
144-
145-
Input: "2023-04-17 12:36:39 [INFO] Tracking ID Created: 448ba545-9a1d-464d-83bc-d0c9e1ece0f9"
146-
Static: "Tracking ID Created:" (Note: timestamp, [INFO] removed)
147-
Variables: {
148-
"Tracking ID": "448ba545-9a1d-464d-83bc-d0c9e1ece0f9"
149-
}
150-
151-
Remember: Both staticSearchString and variables fields are required in your response.
152-
If you can't find any static text, return an empty string.
153-
If you can't find any variables, return an empty object.`
137+
}`
154138
}],
155-
model: this.analysisModel,
139+
model: this.model,
156140
max_tokens: 500,
157141
tools: tools,
158142
tool_choice: {
@@ -183,6 +167,9 @@ If you can't find any variables, return an empty object.`
183167
throw new Error(`Claude API error: ${response.statusText}\nDetails: ${JSON.stringify(responseData)}`);
184168
}
185169

170+
console.log("Log Message: ", logMessage);
171+
console.log("Static Search String: ", responseData.content[0].input.staticSearchString);
172+
186173
// Validate response structure
187174
if (!responseData.content ||
188175
!Array.isArray(responseData.content) ||
@@ -215,7 +202,7 @@ If you can't find any variables, return an empty object.`
215202
console.warn('Claude response variables is not an object, using empty object');
216203
toolOutput.variables = {};
217204
}
218-
205+
219206
// Clean up the static search string by removing log level indicators
220207
// This helps with matching source code that doesn't include these markers
221208
if (toolOutput.staticSearchString) {
@@ -224,7 +211,7 @@ If you can't find any variables, return an empty object.`
224211
const originalSearchString = toolOutput.staticSearchString;
225212
// Remove log level indicators
226213
toolOutput.staticSearchString = toolOutput.staticSearchString.replace(logLevelPattern, '');
227-
214+
228215
// Log the change if we modified the search string
229216
if (originalSearchString !== toolOutput.staticSearchString) {
230217
console.log(`Cleaned log level indicators from search string: "${originalSearchString}" → "${toolOutput.staticSearchString}"`);
@@ -244,6 +231,10 @@ If you can't find any variables, return an empty object.`
244231
allLogLines: string[],
245232
potentialCallers: Array<{ filePath: string; lineNumber: number; code: string; functionName: string; }>
246233
): Promise<CallerAnalysis> {
234+
// Filter and limit log lines to prevent prompt too long errors
235+
const MAX_LOG_LINES = 20; // Reasonable limit to prevent context overflow
236+
const filteredLogs = this.filterRelevantLogs(currentLogLine, allLogLines, MAX_LOG_LINES);
237+
247238
const tools = [{
248239
name: "analyze_callers",
249240
description: "Analyze potential callers and rank them based on likelihood",
@@ -296,8 +287,8 @@ If you can't find any variables, return an empty object.`
296287
Current log line: "${currentLogLine}"
297288
Static search string used: "${staticSearchString}"
298289
299-
All log lines in current session:
300-
${allLogLines.map(log => `- ${log}`).join('\n')}
290+
Most relevant log lines from current session:
291+
${filteredLogs.map(log => `- ${log}`).join('\n')}
301292
302293
Potential callers found in codebase:
303294
${potentialCallers.map(caller => `
@@ -307,7 +298,7 @@ Line ${caller.lineNumber}: ${caller.code}
307298
`).join('\n')}
308299
309300
Rules for ranking:
310-
1. Consider the context from all log lines
301+
1. Consider the context from the provided log lines
311302
2. Look for patterns in function names and variable usage
312303
3. Consider the proximity of the code to related functionality
313304
4. Consider common logging patterns and practices
@@ -323,7 +314,7 @@ Return a ranked list of callers, each with:
323314
324315
Use the analyze_callers function to return the results in the exact format required.`
325316
}],
326-
model: this.callerModel, // Use full model for complex analysis
317+
model: this.model, // Use same model for all tasks
327318
max_tokens: 4000, // Keep full token limit for detailed analysis
328319
tools: tools,
329320
tool_choice: {
@@ -364,4 +355,62 @@ Use the analyze_callers function to return the results in the exact format requi
364355
throw error;
365356
}
366357
}
358+
359+
/**
360+
* Filter and limit log lines to the most relevant ones for analysis
361+
* @param currentLogLine The log line being analyzed
362+
* @param allLogLines All available log lines
363+
* @param maxLines Maximum number of log lines to return
364+
* @returns Array of filtered and limited log lines
365+
*/
366+
private filterRelevantLogs(currentLogLine: string, allLogLines: string[], maxLines: number): string[] {
367+
// Find the index of the current log line
368+
const currentIndex = allLogLines.indexOf(currentLogLine);
369+
if (currentIndex === -1) {
370+
return [currentLogLine];
371+
}
372+
373+
// Get surrounding context (prefer more recent logs)
374+
const beforeCount = Math.floor(maxLines * 0.3); // 30% before
375+
const afterCount = Math.floor(maxLines * 0.7); // 70% after
376+
377+
const start = Math.max(0, currentIndex - beforeCount);
378+
const end = Math.min(allLogLines.length, currentIndex + afterCount);
379+
380+
// Get the logs within our window
381+
const contextLogs = allLogLines.slice(start, end);
382+
383+
// If we have room for more logs, try to find similar logs by pattern matching
384+
if (contextLogs.length < maxLines) {
385+
const remainingSlots = maxLines - contextLogs.length;
386+
const patternLogs = this.findSimilarLogs(currentLogLine, allLogLines, contextLogs, remainingSlots);
387+
return [...new Set([...contextLogs, ...patternLogs])];
388+
}
389+
390+
return contextLogs;
391+
}
392+
393+
/**
394+
* Find logs that have similar patterns to the current log line
395+
* @param currentLogLine The log line being analyzed
396+
* @param allLogLines All available log lines
397+
* @param excludeLogs Logs to exclude from the search
398+
* @param maxCount Maximum number of similar logs to return
399+
* @returns Array of similar log lines
400+
*/
401+
private findSimilarLogs(currentLogLine: string, allLogLines: string[], excludeLogs: string[], maxCount: number): string[] {
402+
// Simple similarity check based on word overlap
403+
const currentWords = new Set(currentLogLine.toLowerCase().split(/\s+/));
404+
405+
return allLogLines
406+
.filter(log => !excludeLogs.includes(log)) // Exclude logs we already have
407+
.map(log => {
408+
const words = log.toLowerCase().split(/\s+/);
409+
const overlap = words.filter(word => currentWords.has(word)).length;
410+
return { log, similarity: overlap / Math.max(words.length, currentWords.size) };
411+
})
412+
.sort((a, b) => b.similarity - a.similarity) // Sort by similarity
413+
.slice(0, maxCount) // Take top N
414+
.map(item => item.log);
415+
}
367416
}

src/logExplorer.ts

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,35 @@ export class LogExplorerProvider implements vscode.TreeDataProvider<vscode.TreeI
435435
let analysis = log.claudeAnalysis;
436436
if (!analysis) {
437437
progress.report({ message: 'Analyzing log with Claude...' });
438-
analysis = await this.claudeService.analyzeLog(logMessage);
438+
439+
// Get repository root path
440+
const repoPath = this.context.globalState.get<string>('repoPath');
441+
if (!repoPath) {
442+
vscode.window.showErrorMessage('Repository root path is not set.');
443+
return;
444+
}
445+
446+
// Detect language based on repository files
447+
let language = 'unknown';
448+
try {
449+
if (fs.existsSync(path.join(repoPath, 'package.json'))) {
450+
language = 'TypeScript/JavaScript';
451+
} else if (fs.existsSync(path.join(repoPath, 'requirements.txt')) || fs.existsSync(path.join(repoPath, 'setup.py'))) {
452+
language = 'Python';
453+
} else if (fs.existsSync(path.join(repoPath, 'pom.xml')) || fs.existsSync(path.join(repoPath, 'build.gradle'))) {
454+
language = 'Java';
455+
} else if (fs.existsSync(path.join(repoPath, 'Cargo.toml'))) {
456+
language = 'Rust';
457+
} else if (fs.existsSync(path.join(repoPath, 'go.mod'))) {
458+
language = 'Go';
459+
}
460+
// Add more language detection as needed
461+
} catch (error) {
462+
console.error('Error detecting language:', error);
463+
language = 'unknown';
464+
}
465+
466+
analysis = await this.claudeService.analyzeLog(logMessage, language);
439467
log.claudeAnalysis = analysis;
440468
// Refresh variable explorer after analysis is complete
441469
if (this.variableExplorerProvider) {

0 commit comments

Comments
 (0)