Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 55 additions & 118 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,37 +11,29 @@

import aiohttp
import gradio as gr
from dotenv import load_dotenv
from fastapi import FastAPI
from fastmcp import FastMCP
from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request
from starlette.responses import PlainTextResponse

# Load environment variables from .env file
load_dotenv()
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
SERVER_PORT = int(os.getenv("SERVER_PORT", "8003"))
SERVER_HOST = os.getenv("MCP_SERVER_HOST", "0.0.0.0")
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "random")
GITHUB_API_BASE = os.getenv("GITHUB_API_BASE_URL", "https://api.github.com")

# Configure logging
logging.basicConfig(
level=os.getenv("LOG_LEVEL", "INFO"),
format='%(asctime)s - %(levelname)s - %(message)s'
level=getattr(logging, LOG_LEVEL.upper()),
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)

# Initialize MCP server with a descriptive name
mcp = FastMCP("GitHub Knowledge Vault MCP Server")

# Configuration
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
GITHUB_API_BASE = os.getenv("GITHUB_API_BASE_URL", "https://api.github.com")
MCP_SERVER_PORT = int(os.getenv("MCP_SERVER_PORT", "8003"))

# API Constants
RESULTS_PER_PAGE = 100
SEARCH_RESULTS_LIMIT = 50

# Validate configuration
if not GITHUB_TOKEN:
logger.warning("GITHUB_TOKEN not set. API rate limits will be restricted.")


# ============================================================================
# Helper Functions
Expand Down Expand Up @@ -125,32 +117,6 @@ def determine_content_type(filename: str) -> str:
# ============================================================================

async def get_org_repos(org: str) -> List[Dict[str, Any]]:
"""
Fetch all repositories from a GitHub organization

This tool uses the GitHub Search API to efficiently find repositories
that have a /doc folder, falling back to checking each repo individually
if the search API is unavailable.

Args:
org: GitHub organization name (e.g., "microsoft", "google")

Returns:
List of repository dictionaries with structure:
[
{
"id": "123456",
"name": "repo-name",
"description": "Repository description",
"url": "https://github.com/org/repo",
"hasDocFolder": true
},
...
]

Example:
repos = await get_org_repos("anthropics")
"""
async with aiohttp.ClientSession() as session:
headers = create_headers()

Expand Down Expand Up @@ -256,10 +222,8 @@ async def get_repo_docs(org: str, repo: str) -> List[Dict[str, Any]]:
"name": "README.md",
"path": "doc/README.md",
"type": "markdown",
"size": 1234,
"url": "https://github.com/org/repo/blob/main/doc/README.md",
"download_url": "https://raw.githubusercontent.com/.../README.md",
"sha": "abc123..."
},
...
]
Expand Down Expand Up @@ -312,10 +276,8 @@ async def get_repo_docs(org: str, repo: str) -> List[Dict[str, Any]]:
"name": name,
"path": item["path"],
"type": content_type,
"size": item["size"],
"url": item["html_url"],
"download_url": item.get("download_url", ""),
"sha": item["sha"]
})
else:
skipped += 1
Expand All @@ -341,8 +303,6 @@ async def get_file_content(org: str, repo: str, path: str) -> Dict[str, Any]:
"name": "README.md",
"path": "doc/README.md",
"content": "# Documentation\\n\\nThis is...",
"size": 1234,
"sha": "abc123...",
"encoding": "base64"
}

Expand Down Expand Up @@ -381,38 +341,11 @@ async def get_file_content(org: str, repo: str, path: str) -> Dict[str, Any]:
"name": data["name"],
"path": data["path"],
"content": content,
"size": data["size"],
"sha": data["sha"],
"encoding": data.get("encoding", "base64")
}


async def search_documentation(org: str, query: str) -> List[Dict[str, Any]]:
"""
Search for documentation files across all repositories in an organization

Uses GitHub Code Search API to find matching files in /doc folders

Args:
org: GitHub organization name
query: Search query string (e.g., "authentication", "API", "tutorial")

Returns:
List of search result dictionaries:
[
{
"name": "authentication.md",
"path": "doc/authentication.md",
"repository": "repo-name",
"url": "https://github.com/org/repo/blob/main/doc/auth.md",
"sha": "abc123..."
},
...
]

Example:
results = await search_documentation("anthropics", "streaming")
"""
async with aiohttp.ClientSession() as session:
headers = create_headers()
search_url = f"{GITHUB_API_BASE}/search/code"
Expand Down Expand Up @@ -441,7 +374,6 @@ async def search_documentation(org: str, query: str) -> List[Dict[str, Any]]:
"path": item["path"],
"repository": repo_info.get("name", ""),
"url": item["html_url"],
"sha": item["sha"]
})

logger.info(f"Found {len(results)} matching files")
Expand All @@ -455,10 +387,30 @@ async def search_documentation(org: str, query: str) -> List[Dict[str, Any]]:
@mcp.tool()
async def get_org_repos_tool(org: str) -> List[Dict[str, Any]]:
"""
Fetch all repositories from a GitHub organization, detecting /doc folders
Fetch all repositories from a GitHub organization

This tool uses the GitHub Search API to efficiently find repositories
that have a /doc folder, falling back to checking each repo individually
if the search API is unavailable.

Args:
org: GitHub organization name (e.g., "microsoft", "google")

Returns:
List of repository dictionaries with structure:
[
{
"id": "123456",
"name": "repo-name",
"description": "Repository description",
"url": "https://github.com/org/repo",
"hasDocFolder": true
},
...
]

Example:
repos = await get_org_repos("anthropics")
"""
return await get_org_repos(org)

Expand Down Expand Up @@ -493,9 +445,26 @@ async def search_documentation_tool(org: str, query: str) -> List[Dict[str, Any]
"""
Search for documentation files across all repositories in an organization

Uses GitHub Code Search API to find matching files in /doc folders

Args:
org: GitHub organization name
query: Search query string (e.g., "authentication", "API", "tutorial")

Returns:
List of search result dictionaries:
[
{
"name": "authentication.md",
"path": "doc/authentication.md",
"repository": "repo-name",
"url": "https://github.com/org/repo/blob/main/doc/auth.md",
},
...
]

Example:
results = await search_documentation("anthropics", "streaming")
"""
return await search_documentation(org, query)

Expand Down Expand Up @@ -568,17 +537,6 @@ async def content_resource(org: str, repo: str, path: str) -> str:
file_data = await get_file_content(org, repo, path)
return file_data["content"]


# ============================================================================
# ASGI Application (for HTTP/remote deployment)
# ============================================================================

# Create MCP ASGI application
mcp_app = mcp.http_app()

# Create main FastAPI app
main_app = FastAPI(title="GitHub MCP Server")

# Create Gradio Interface
def create_gradio_interface():
"""Create Gradio interface to display MCP server information"""
Expand Down Expand Up @@ -690,7 +648,7 @@ async def check_mcp_status():
### Server Information

**GitHub Token:** {'✅ Configured' if GITHUB_TOKEN else '❌ Not configured'}
**Port:** {MCP_SERVER_PORT}
**Port:** {SERVER_PORT}
**API Base:** {GITHUB_API_BASE}

### Links
Expand All @@ -703,40 +661,19 @@ async def check_mcp_status():

return demo

main_app.mount("/mcp", mcp_app)

@main_app.get("/health")
def health():
return {"status": "ok"}
# Add health check endpoint
@mcp.custom_route("/health", methods=["GET"])
async def health_check(request: Request) -> PlainTextResponse:
return PlainTextResponse("OK")

# Create Gradio blocks
gradio_blocks = create_gradio_interface()
app = gr.mount_gradio_app(main_app, gradio_blocks, path="/")

# Add CORS middleware for remote access
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allow all origins; restrict in production if needed
allow_credentials=True,
allow_methods=["GET", "POST", "OPTIONS"],
allow_headers=["*"],
expose_headers=["mcp-session-id", "mcp-protocol-version"],
max_age=86400,
)


# ============================================================================
# Main Entry Point
# ============================================================================

if __name__ == "__main__":
import uvicorn

logger.info("=" * 60)
logger.info("Starting GitHub MCP Server...")
logger.info("=" * 60)
logger.info(f" Token configured: {'Yes' if GITHUB_TOKEN else 'No'}")
logger.info(f" Port: {MCP_SERVER_PORT}")
logger.info(f" MCP endpoint: /mcp/")
logger.info("=" * 60)

uvicorn.run(app, host="0.0.0.0", port=MCP_SERVER_PORT)
#gradio_blocks.launch(share=True)
mcp.run()