Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions app/modules/code_provider/github/github_service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import asyncio
import os
import random
import secrets
import re
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Dict, List, Optional, Tuple
Expand Down Expand Up @@ -346,7 +346,7 @@ async def get_repos_for_user(self, user_id: str):
if token_list_str:
tokens = [t.strip() for t in token_list_str.split(",") if t.strip()]
if tokens:
github_oauth_token = random.choice(tokens)
github_oauth_token = secrets.choice(tokens)
logger.info("Using token from GH_TOKEN_LIST as fallback")

# Fall back to CODE_PROVIDER_TOKEN if GH_TOKEN_LIST not available
Expand Down Expand Up @@ -386,6 +386,7 @@ async def get_repos_for_user(self, user_id: str):
}

ssl_context = ssl.create_default_context(cafile=certifi.where())
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
connector = aiohttp.TCPConnector(
ssl=ssl_context,
ttl_dns_cache=300,
Expand Down Expand Up @@ -698,7 +699,7 @@ def get_public_github_instance(cls):

# Use factory to create provider with PAT

token = random.choice(cls.gh_token_list)
token = secrets.choice(cls.gh_token_list)
provider = GitHubProvider()
provider.authenticate({"token": token}, AuthMethod.PERSONAL_ACCESS_TOKEN)
return provider.client
Expand Down
9 changes: 3 additions & 6 deletions app/modules/code_provider/provider_factory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import secrets
from enum import Enum
from typing import Any, Dict, Optional

Expand Down Expand Up @@ -159,13 +160,11 @@ def create_provider(
# Fallback to legacy GH_TOKEN_LIST
token_list_str = os.getenv("GH_TOKEN_LIST", "")
if token_list_str:
import random

tokens = [
t.strip() for t in token_list_str.split(",") if t.strip()
]
if tokens:
token = random.choice(tokens)
token = secrets.choice(tokens)
logger.info(
"Authenticating with GH_TOKEN_LIST (legacy PAT pool)"
)
Expand Down Expand Up @@ -347,8 +346,6 @@ def create_provider_with_fallback(repo_name: str) -> ICodeProvider:
logger.debug(f" - Repr: {token_repr[:50]}...")

if token_list_str:
import random

tokens = [t.strip() for t in token_list_str.split(",") if t.strip()]
logger.debug(f"Parsed {len(tokens)} token(s) from GH_TOKEN_LIST")
if tokens:
Expand All @@ -359,7 +356,7 @@ def create_provider_with_fallback(repo_name: str) -> ICodeProvider:
# Always use GitHub's API endpoint when using GH_TOKEN_LIST
base_url = "https://api.github.com"
provider = GitHubProvider(base_url=base_url)
token = random.choice(tokens)
token = secrets.choice(tokens)

provider.authenticate(
{"token": token}, AuthMethod.PERSONAL_ACCESS_TOKEN
Expand Down
16 changes: 14 additions & 2 deletions app/modules/conversations/conversation/conversation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,20 @@ async def _fetch_structure_with_timeout():
exc_info=True,
)

# Create background task - fire and forget
asyncio.create_task(_fetch_structure_with_timeout())
# Create background task with proper exception handling
fetch_task = asyncio.create_task(_fetch_structure_with_timeout())

def _on_fetch_done(t: asyncio.Task) -> None:
if t.cancelled():
return
try:
exc = t.exception()
except asyncio.CancelledError:
return
if exc is not None:
logger.exception("Failed to fetch project structure", exc_info=exc)

fetch_task.add_done_callback(_on_fetch_done)

await self._add_system_message(conversation_id, project_name, user_id)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import secrets
from app.modules.utils.logger import setup_logger

import random
from typing import Dict, Any, Optional, Type, List
from pydantic import BaseModel, Field
from github import Github
Expand Down Expand Up @@ -92,7 +92,7 @@ def __init__(self, sql_db: Session, user_id: str):
def get_public_github_instance(cls):
if not cls.gh_token_list:
cls.initialize_tokens()
token = random.choice(cls.gh_token_list)
token = secrets.choice(cls.gh_token_list)
return Github(token)

def _get_github_client(self, repo_name: str) -> Github:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os
import secrets
from app.modules.utils.logger import setup_logger

logger = setup_logger(__name__)
import random
from typing import Dict, Any, List, Optional, Type
from pydantic import BaseModel, Field
from github import Github
Expand Down Expand Up @@ -59,7 +59,7 @@ def initialize_tokens(cls):
def get_public_github_instance(cls):
if not cls.gh_token_list:
cls.initialize_tokens()
token = random.choice(cls.gh_token_list)
token = secrets.choice(cls.gh_token_list)
return Github(token)

def _get_github_client(self, repo_name: str) -> Github:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os
import secrets
from app.modules.utils.logger import setup_logger

logger = setup_logger(__name__)
import random
from typing import Dict, Any, Optional, Type, List
from pydantic import BaseModel, Field
from github import Github
Expand Down Expand Up @@ -71,7 +71,7 @@ def __init__(self, sql_db: Session, user_id: str):
def get_public_github_instance(cls):
if not cls.gh_token_list:
cls.initialize_tokens()
token = random.choice(cls.gh_token_list)
token = secrets.choice(cls.gh_token_list)
return Github(token)

def _get_github_client(self, repo_name: str) -> Github:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import asyncio
import os
import secrets
from app.modules.utils.logger import setup_logger

logger = setup_logger(__name__)
import random
from typing import Any, Dict, List, Optional

from github import Github
Expand Down Expand Up @@ -105,7 +105,7 @@ def run(
def get_public_github_instance(cls):
if not cls.gh_token_list:
cls.initialize_tokens()
token = random.choice(cls.gh_token_list)
token = secrets.choice(cls.gh_token_list)
return Github(token)

def _get_github_client(self, repo_name: str) -> Github:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import os
import secrets
from app.modules.utils.logger import setup_logger

logger = setup_logger(__name__)
import random
from typing import Dict, Any, List, Optional, Type
from pydantic import BaseModel, Field
from github import Github
Expand Down Expand Up @@ -62,7 +62,7 @@ def __init__(self, sql_db: Session, user_id: str):
def get_public_github_instance(cls):
if not cls.gh_token_list:
cls.initialize_tokens()
token = random.choice(cls.gh_token_list)
token = secrets.choice(cls.gh_token_list)
return Github(token)

def _get_github_client(self, repo_name: str) -> Github:
Expand Down
85 changes: 56 additions & 29 deletions app/modules/parsing/graph_construction/parsing_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,18 @@

from dotenv import load_dotenv
from fastapi import HTTPException
from sqlalchemy import or_, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, or_
from sqlalchemy.orm import Session
from uuid6 import uuid7

from app.celery.tasks.parsing_tasks import process_parsing
from app.core.config_provider import config_provider
from app.modules.code_provider.code_provider_service import CodeProviderService
from app.modules.conversations.conversation.conversation_model import (
Conversation,
Visibility,
)
from app.modules.parsing.graph_construction.parsing_helper import ParseHelper
from app.modules.parsing.graph_construction.parsing_schema import (
ParsingRequest,
Expand All @@ -22,14 +27,12 @@
validate_parsing_input,
)
from app.modules.parsing.utils.repo_name_normalizer import normalize_repo_name
from app.modules.projects.projects_model import Project
from app.modules.projects.projects_schema import ProjectStatusEnum
from app.modules.projects.projects_service import ProjectService
from app.modules.utils.email_helper import EmailHelper
from app.modules.utils.posthog_helper import PostHogClient
from app.modules.conversations.conversation.conversation_model import Conversation
from app.modules.conversations.conversation.conversation_model import Visibility
from app.modules.projects.projects_model import Project
from app.modules.utils.logger import setup_logger
from app.modules.utils.posthog_helper import PostHogClient

logger = setup_logger(__name__)

Expand All @@ -40,7 +43,7 @@ class ParsingController:
@staticmethod
@validate_parsing_input
async def parse_directory(
repo_details: ParsingRequest, db: AsyncSession, user: Dict[str, Any]
repo_details: ParsingRequest, db: Session, user: Dict[str, Any]
):
if "email" not in user:
user_email = None
Expand Down Expand Up @@ -98,21 +101,10 @@ async def parse_directory(
project_manager,
db,
)

demo_repos = [
"Portkey-AI/gateway",
"crewAIInc/crewAI",
"AgentOps-AI/agentops",
"calcom/cal.com",
"langchain-ai/langchain",
"AgentOps-AI/AgentStack",
"formbricks/formbricks",
]

try:
# Normalize repository name for consistent database lookups
normalized_repo_name = normalize_repo_name(repo_name)
logger.info(
logger.debug(
f"Original repo_name: {repo_name}, Normalized: {normalized_repo_name}"
)

Expand All @@ -123,8 +115,15 @@ async def parse_directory(
repo_path=repo_details.repo_path,
commit_id=repo_details.commit_id,
)

# First check if this is a demo project that hasn't been accessed by this user yet
demo_repos = [
"Portkey-AI/gateway",
"crewAIInc/crewAI",
"AgentOps-AI/agentops",
"calcom/cal.com",
"langchain-ai/langchain",
"AgentOps-AI/AgentStack",
"formbricks/formbricks",
]
if not project and repo_details.repo_name in demo_repos:
existing_project = await project_manager.get_global_project_from_db(
normalized_repo_name,
Expand All @@ -151,11 +150,25 @@ async def parse_directory(
repo_name
)

asyncio.create_task(
task = asyncio.create_task(
CodeProviderService(db).get_project_structure_async(
new_project_id
)
)

def _on_structure_done(t: asyncio.Task) -> None:
if t.cancelled():
return
try:
exc = t.exception()
except asyncio.CancelledError:
return
if exc is not None:
logger.exception(
"Failed to get project structure", exc_info=exc
)

task.add_done_callback(_on_structure_done)
# Duplicate the graph under the new repo ID
await parsing_service.duplicate_graph(
old_project_id, new_project_id
Expand All @@ -165,12 +178,24 @@ async def parse_directory(
await project_manager.update_project_status(
new_project_id, ProjectStatusEnum.READY
)
create_task(
email_task = create_task(
EmailHelper().send_email(
user_email, repo_name, repo_details.branch_name
)
)

def _on_email_done(t: asyncio.Task) -> None:
if t.cancelled():
return
try:
exc = t.exception()
except asyncio.CancelledError:
return
if exc is not None:
logger.exception("Failed to send email", exc_info=exc)

email_task.add_done_callback(_on_email_done)
Comment on lines +181 to +197
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Skip email task when user_email is missing.

user_email is now optional; creating a send task with None will just log background errors. Guard it.

💡 Suggested fix
-                    email_task = create_task(
-                        EmailHelper().send_email(
-                            user_email, repo_name, repo_details.branch_name
-                        )
-                    )
+                    if user_email:
+                        email_task = create_task(
+                            EmailHelper().send_email(
+                                user_email, repo_name, repo_details.branch_name
+                            )
+                        )
 
-                    def _on_email_done(t: asyncio.Task) -> None:
-                        if t.cancelled():
-                            return
-                        try:
-                            exc = t.exception()
-                        except asyncio.CancelledError:
-                            return
-                        if exc is not None:
-                            logger.exception("Failed to send email", exc_info=exc)
-
-                    email_task.add_done_callback(_on_email_done)
+                        def _on_email_done(t: asyncio.Task) -> None:
+                            if t.cancelled():
+                                return
+                            try:
+                                exc = t.exception()
+                            except asyncio.CancelledError:
+                                return
+                            if exc is not None:
+                                logger.exception("Failed to send email", exc_info=exc)
+
+                        email_task.add_done_callback(_on_email_done)
🤖 Prompt for AI Agents
In `@app/modules/parsing/graph_construction/parsing_controller.py` around lines
181 - 197, The code creates an async email task regardless of whether user_email
is present, causing background errors when user_email is None; update the block
around create_task/EmailHelper().send_email so it only constructs email_task and
registers _on_email_done if user_email (or a truthy email) is available — i.e.,
wrap the create_task call, the definition/registration of _on_email_done, and
email_task.add_done_callback in a conditional (if user_email:) so no task or
callback is created when user_email is missing.


return {
"project_id": new_project_id,
"status": ProjectStatusEnum.READY.value,
Expand Down Expand Up @@ -210,8 +235,10 @@ async def parse_directory(
# If project exists but commit doesn't match or status is not READY, reparse
cleanup_graph = True
logger.info(
f"Submitting parsing task for existing project {project_id} "
f"(is_latest={is_latest}, status={project.status})"
"Submitting parsing task for existing project.",
project_id=project_id,
is_latest=is_latest,
status=project.status,
)
process_parsing.delay(
repo_details.model_dump(),
Expand Down Expand Up @@ -258,10 +285,10 @@ async def parse_directory(
async def handle_new_project(
repo_details: ParsingRequest,
user_id: str,
user_email: str,
user_email: str | None,
new_project_id: str,
project_manager: ProjectService,
db: AsyncSession,
db: Session,
):
Comment on lines 286 to 292
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Unused db parameter.

If this is intentionally unused, consider renaming to _db to avoid lint noise.

🧹 Suggested fix
-        db: Session,
+        _db: Session,
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
repo_details: ParsingRequest,
user_id: str,
user_email: str,
user_email: str | None,
new_project_id: str,
project_manager: ProjectService,
db: AsyncSession,
db: Session,
):
repo_details: ParsingRequest,
user_id: str,
user_email: str | None,
new_project_id: str,
project_manager: ProjectService,
_db: Session,
):
🧰 Tools
🪛 Ruff (0.14.14)

[warning] 291-291: Unused static method argument: db

(ARG004)

🤖 Prompt for AI Agents
In `@app/modules/parsing/graph_construction/parsing_controller.py` around lines
286 - 292, The parameter named db: Session is unused in the function that takes
repo_details: ParsingRequest, user_id, user_email, new_project_id,
project_manager: ProjectService, db: Session; rename db to _db to silence lint
warnings (or if callers can be updated, remove the parameter entirely) and
update any call sites or references accordingly so the signature and callers
remain consistent.

response = {
"project_id": new_project_id,
Expand All @@ -278,9 +305,9 @@ async def handle_new_project(
repo_details.commit_id,
repo_details.repo_path,
)
asyncio.create_task(
CodeProviderService(db).get_project_structure_async(new_project_id)
)
# asyncio.create_task(
# CodeProviderService(db).get_project_structure_async(new_project_id)
# )
if not user_email:
user_email = None

Expand Down
Loading