Skip to content

Commit 81b4bef

Browse files
committed
EX-343: refactored code, unnecessary components removed/ EX-344: fix jira comments post error, created new logic for git diff
1 parent f7d4450 commit 81b4bef

File tree

5 files changed

+46
-207
lines changed

5 files changed

+46
-207
lines changed

main.py

Lines changed: 0 additions & 79 deletions
This file was deleted.

src/code_review_agent/cli.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,6 @@ def run_review_logic(
336336

337337
logging.info("🧠 Performing smart dependency analysis to pre-populate context...")
338338

339-
340339
final_context_content = dict(changed_files_content)
341340

342341
full_project_structure = git_utils.get_file_structure(
@@ -451,8 +450,9 @@ def assess(
451450

452451
logging.info("🔍 Gathering data for Jira assessment...")
453452
commit_messages = git_utils.get_commit_messages(repo_path, base_ref, head_ref)
454-
diff_text = git_utils.get_diff(repo_path, base_ref, head_ref)
455453

454+
structured_diff_summary = git_utils.get_structured_diff_summary(repo_path, base_ref, head_ref)
455+
logging.info(f"📊 Generated structured diff summary: {structured_diff_summary}")
456456
# Diagnostics
457457
logging.info(f"[Diag] BITBUCKET_BRANCH={os.environ.get('BITBUCKET_BRANCH')}")
458458
logging.info(f"[Diag] BITBUCKET_COMMIT_MESSAGE={os.environ.get('BITBUCKET_COMMIT_MESSAGE')!r}")
@@ -502,15 +502,14 @@ def assess(
502502
summary = summarizer.summarize_changes_for_jira(
503503
jira_details=jira_details_text,
504504
commit_messages=commit_messages,
505-
diff_text=diff_text,
505+
diff_summary=structured_diff_summary,
506506
llm_config=load_config(repo_path).get('llm', {})
507507
)
508508

509509
if not summary:
510510
logging.warning("Summarizer agent failed to produce a result. Skipping Jira comment.")
511511
return
512512

513-
514513
jira_client.add_assessment_comment(task_id, summary)
515514

516515

src/code_review_agent/git_utils.py

Lines changed: 31 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -25,55 +25,6 @@ def _query_tree(language, tree, query_string):
2525
return list(set(node.text.decode('utf8') for node, _ in captures))
2626

2727

28-
def extract_dependencies_from_content(file_path: str, file_content: str) -> List[str]:
29-
"""
30-
Extracts dependencies using Tree-sitter for universal language analysis.
31-
"""
32-
file_extension = os.path.splitext(file_path)
33-
34-
if file_extension not in LANGUAGES:
35-
return []
36-
37-
language = LANGUAGES[file_extension]
38-
parser = Parser()
39-
parser.set_language(language)
40-
41-
tree = parser.parse(bytes(file_content, "utf8"))
42-
43-
queries = {
44-
'.cs': """
45-
(using_directive (name_colon_qualified_name) @import) ; for C# usings
46-
(class_declaration base_list: (base_list (simple_base_type) @base)) ; for inheritance
47-
""",
48-
'.py': """
49-
(import_statement name: (dotted_name) @import)
50-
(from_import_statement module_name: (dotted_name) @import)
51-
""",
52-
'.ts': "(import_statement source: (string) @import)",
53-
'.tsx': "(import_statement source: (string) @import)",
54-
'.js': "(import_statement source: (string) @import)",
55-
}
56-
57-
query_string = queries.get(file_extension, "")
58-
if not query_string:
59-
return []
60-
61-
raw_imports = _query_tree(language, tree, query_string)
62-
63-
64-
dependencies = set()
65-
for imp in raw_imports:
66-
clean_dep = imp.strip("'\"").strip()
67-
68-
final_part = os.path.basename(clean_dep)
69-
70-
final_part = final_part.split('.')[-1]
71-
72-
final_part = final_part.split(',')[0].strip()
73-
74-
dependencies.add(final_part)
75-
76-
return list(dependencies)
7728

7829
def get_diff(repo_path: str, base_ref: str, head_ref: str) -> str:
7930
"""Calculates the diff between two refs using the merge base strategy."""
@@ -233,39 +184,39 @@ def create_annotated_file(full_content: str, diff_content: str) -> str:
233184
return f"--- FULL FILE CONTENT ---\n{full_content}\n\n--- GIT DIFF ---\n{diff_content}"
234185

235186

236-
def extract_dependencies_from_content(file_path: str, file_content: str) -> List[str]:
187+
def get_structured_diff_summary(repo_path: str, base_ref: str, head_ref: str) -> dict:
237188
"""
238-
Extracts potential module/file dependencies from the content of a file
239-
based on its extension, using regular expressions.
240-
241-
Returns a list of potential filenames or module names to search for.
189+
Analyzes the diff and returns a structured summary of changes.
242190
"""
243-
dependencies = set()
244-
file_extension = os.path.splitext(file_path)[1]
245-
patterns = {
246-
'.cs': [
247-
re.compile(r'^\s*using\s+([\w\.]+);'),
248-
re.compile(r'(?:class|record|interface)\s+\w+\s*:\s*([^\{]+)')
249-
],
250-
'.py': [
251-
re.compile(r'^\s*import\s+([\w\.]+)'),
252-
re.compile(r'^\s*from\s+([\w\.]+)\s+import')
253-
],
254-
'.js': [re.compile(r'from\s+[\'"]([^\'"]+)[\'"]')],
255-
'.ts': [re.compile(r'from\s+[\'"]([^\'"]+)[\'"]')],
256-
'.tsx': [re.compile(r'from\s+[\'"]([^\'"]+)[\'"]')],
191+
summary = {
192+
"files_added": [],
193+
"files_deleted": [],
194+
"files_renamed": [],
195+
"files_modified": [],
196+
"total_insertions": 0,
197+
"total_deletions": 0,
257198
}
199+
try:
200+
repo = git.Repo(repo_path, search_parent_directories=True)
201+
base_commit = repo.commit(base_ref)
202+
head_commit = repo.commit(head_ref)
258203

259-
if file_extension in patterns:
260-
for line in file_content.splitlines():
261-
for pattern in patterns[file_extension]:
262-
match = pattern.search(line)
263-
if match:
264-
matched_string = match.group(1)
265-
potential_deps = [d.strip() for d in matched_string.split(',')]
266-
for dep in potential_deps:
267-
clean_dep = os.path.basename(dep.replace('.', '/')).split('<')[0]
268-
if clean_dep:
269-
dependencies.add(clean_dep)
204+
diff_index = base_commit.diff(head_commit)
270205

271-
return list(dependencies)
206+
for diff in diff_index:
207+
summary["total_insertions"] += diff.a_blob.size if diff.change_type == 'A' else diff.diff.count(b'+')
208+
summary["total_deletions"] += diff.a_blob.size if diff.change_type == 'D' else diff.diff.count(b'-')
209+
210+
if diff.new_file:
211+
summary["files_added"].append(diff.b_path)
212+
elif diff.deleted_file:
213+
summary["files_deleted"].append(diff.a_path)
214+
elif diff.renamed_file:
215+
summary["files_renamed"].append(f"{diff.a_path} -> {diff.b_path}")
216+
else: # Modified
217+
summary["files_modified"].append(diff.a_path)
218+
219+
return summary
220+
except Exception as e:
221+
logger.error(f"Could not get structured diff summary: {e}", exc_info=True)
222+
return {"error": "Could not generate summary."}

src/code_review_agent/summarizer.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,22 @@
99
def summarize_changes_for_jira(
1010
jira_details: str,
1111
commit_messages: str,
12-
diff_text: str,
12+
diff_summary: dict,
1313
llm_config: dict,
1414
) -> MergeSummary | None:
1515

1616
client = get_client(llm_config)
1717
model = llm_config.get('models', {}).get('summarizer', 'google/gemini-pro-1.5')
1818

1919
system_prompt = """
20-
You are an expert AI software analyst. Your task is to analyze a git diff and commit messages to produce a structured summary for a Jira ticket.
21-
22-
**Analysis Instructions:**
23-
1. **Relevance:** Read the Jira task details and the code changes. Rate from 0-100% how relevant the changes are to the task.
24-
2. **Database Changes:** Look for changes in database migration files, entity configurations, or DbContext files. Identify created or modified table names.
25-
3. **API Changes:** Look for changes in Controller files or route definitions. Identify added or modified API endpoints (e.g., "GET /api/users", "POST /api/products/{id}").
26-
4. **Commit Summary:** Read all commit messages and produce a single, concise, high-level summary of the work done.
27-
28-
**CRITICAL OUTPUT FORMATTING RULE:**
29-
Your entire response MUST be a single, valid JSON object that adheres to the `MergeSummary` schema. Do not add any other text.
20+
You are an expert AI software analyst. Your task is to analyze a structured summary of code changes and produce a high-level summary for a Jira ticket.
21+
DO NOT analyze the code itself, only the provided metadata.
22+
23+
CRITICAL OUTPUT FORMATTING RULE:
24+
Your entire response MUST be a single, valid JSON object that adheres to the `MergeSummary` schema.
3025
"""
26+
27+
diff_summary_text = json.dumps(diff_summary, indent=2)
3128

3229
user_prompt = f"""
3330
Please analyze the following data and provide a structured summary.
@@ -39,11 +36,12 @@ def summarize_changes_for_jira(
3936
{commit_messages}
4037
```
4138
42-
**Full Git Diff:**
43-
```diff
44-
{diff_text}
39+
**Structured Summary of Code Changes:**
40+
```json
41+
{diff_summary_text}
4542
```
4643
44+
Based on this metadata, provide your assessment.
4745
Return your findings as a raw JSON object string.
4846
"""
4947

src/tests/test_git_utils.py

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -43,36 +43,6 @@ def test_get_commit_messages(test_repo):
4343
assert "Second commit" in messages
4444

4545

46-
def test_extract_dependencies_from_content_csharp():
47-
"""Tests Tree-sitter dependency extraction for C#."""
48-
file_path = "MyService.cs"
49-
file_content = """
50-
using System.Text;
51-
using Company.Core.Models;
52-
53-
namespace MyNamespace
54-
{
55-
public class MyService : IMyService, IDisposable
56-
{
57-
// ...
58-
}
59-
}
60-
"""
61-
dependencies = git_utils.extract_dependencies_from_content(file_path, file_content)
62-
63-
assert set(dependencies) == {"Text", "Models", "IMyService", "IDisposable"}
64-
65-
def test_extract_dependencies_from_content_python():
66-
"""Tests Tree-sitter dependency extraction for Python."""
67-
file_path = "main.py"
68-
file_content = """
69-
import os
70-
from my_project.utils import helper_function
71-
"""
72-
dependencies = git_utils.extract_dependencies_from_content(file_path, file_content)
73-
assert set(dependencies) == {"os", "utils"}
74-
75-
7646

7747
def test_find_files_by_names(test_repo):
7848
repo_path, _, _ = test_repo

0 commit comments

Comments
 (0)