claude issues addressed

dimavrem22 · dimavrem22 · commit cfefe46b1492 · 2026-02-11T18:37:38.000-05:00
diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
@@ -11,6 +11,7 @@
 from __future__ import annotations
 
 import json
+import threading
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 from pathlib import Path
@@ -30,6 +31,7 @@
 from bluebox.data_models.llms.vendors import LLMModel, OpenAIModel
 from bluebox.data_models.routine.routine import RoutineExecutionRequest, RoutineInfo
 from bluebox.utils.code_execution_sandbox import execute_python_sandboxed
+from bluebox.utils.infra_utils import read_file_lines
 from bluebox.utils.llm_utils import token_optimized
 from bluebox.utils.logger import get_logger
 
@@ -71,7 +73,7 @@ class BlueBoxAgent(AbstractAgent):
 
         ## Inspecting the Workspace
         - Use `list_workspace_files` to see all files in the workspace (raw/, outputs/, etc.).
-        - Use `read_workspace_file` to read any file by relative path (e.g. "raw/routine_results_2024-01-15_143052_abc.json" or "outputs/results.csv"). Use optional start_line/end_line to read specific line ranges for large files.
+        - Use `read_workspace_file` to read any file by relative path (e.g. "raw/25-01-15-143052-routine_result_1.json" or "outputs/results.csv"). Use optional start_line/end_line to read specific line ranges for large files.
 
         ## Important Rules
         - You ONLY have routine tools, code execution, and file inspection tools. Do not tell the user you can browse, click, type, or interact with web pages directly.
@@ -117,6 +119,8 @@ def __init__(
         self._raw_dir = self._workspace_dir / "raw"
         self._outputs_dir = self._workspace_dir / "outputs"
         self._routine_cache: dict[str, RoutineInfo] = {}
+        self._execution_counter: int = 0
+        self._counter_lock = threading.Lock()
 
         super().__init__(
             emit_message_callable=emit_message_callable,
@@ -245,9 +249,11 @@ def save_result(result: dict[str, Any]) -> dict[str, Any]:
             """Save a single routine result to a JSON file in raw/."""
             try:
                 self._raw_dir.mkdir(parents=True, exist_ok=True)
-                rid = result.get("routine_id", "unknown")
-                timestamp = datetime.now().strftime("%Y-%m-%d_%H%M%S")
-                output_path = self._raw_dir / f"routine_results_{timestamp}_{rid}.json"
+                with self._counter_lock:
+                    self._execution_counter += 1
+                    idx = self._execution_counter
+                timestamp = datetime.now().strftime("%y-%m-%d-%H%M%S")
+                output_path = self._raw_dir / f"{timestamp}-routine_result_{idx}.json"
                 output_path.write_text(json.dumps(result, indent=2, default=str))
                 result["output_file"] = str(output_path)
                 logger.info("Routine result saved to %s", output_path)
@@ -445,37 +451,11 @@ def _read_workspace_file(
         # Resolve and validate path stays within workspace
         resolved = (self._workspace_dir / path).resolve()
         workspace_resolved = self._workspace_dir.resolve()
-        if not str(resolved).startswith(str(workspace_resolved) + "/") and resolved != workspace_resolved:
-            return {"error": f"Access denied: '{path}' is outside the workspace directory"}
-
-        if not resolved.exists():
-            return {"error": f"File not found: {path}"}
-        if not resolved.is_file():
-            return {"error": f"Not a file: {path}"}
-
         try:
-            lines = resolved.read_text().splitlines()
-        except OSError as e:
-            return {"error": f"Failed to read file: {e}"}
-
-        total_lines = len(lines)
-
-        # Apply line range
-        if start_line is not None or end_line is not None:
-            s = (start_line or 1) - 1  # Convert to 0-based
-            e = end_line or total_lines
-            lines = lines[s:e]
-            line_range = f"lines {s + 1}-{min(e, total_lines)} of {total_lines}"
-        else:
-            # Cap output at 200 lines to avoid blowing up context
-            if total_lines > 200:
-                lines = lines[:200]
-                line_range = f"lines 1-200 of {total_lines} (truncated, use start_line/end_line for more)"
-            else:
-                line_range = f"all {total_lines} lines"
+            resolved.relative_to(workspace_resolved)
+        except ValueError:
+            return {"error": f"Access denied: '{path}' is outside the workspace directory"}
 
-        return {
-            "path": path,
-            "line_range": line_range,
-            "content": "\n".join(lines),
-        }
+        result = read_file_lines(resolved, start_line=start_line, end_line=end_line)
+        result["path"] = path
+        return result
diff --git a/bluebox/scripts/agent_http_adapter.py b/bluebox/scripts/agent_http_adapter.py
@@ -78,7 +78,7 @@
 
 def discover_agent_classes() -> dict[str, type]:
     """Build registry of all available AbstractAgent subclasses by class name."""
-    from bluebox.agents.super_discovery_agent import RoutineDiscoveryAgentBeta
+    from bluebox.agents.routine_discovery_agent_beta import RoutineDiscoveryAgentBeta
     from bluebox.agents.bluebox_agent import BlueBoxAgent
 
     # Import all specialist modules to trigger __init_subclass__ registration
diff --git a/bluebox/utils/code_execution_sandbox.py b/bluebox/utils/code_execution_sandbox.py
@@ -26,6 +26,7 @@
 import subprocess
 import sys
 import time
+import uuid
 from typing import Any
 
 from bluebox.config import Config
@@ -110,6 +111,12 @@ def register_lambda_executor(fn: Any) -> None:
     "delattr", "breakpoint", "input", "memoryview",
 )
 
+# Sensitive system paths that must never be used as work_dir
+SENSITIVE_PATH_PREFIXES: tuple[str, ...] = (
+    "/etc", "/var", "/usr", "/bin", "/sbin",
+    "/boot", "/proc", "/sys", "/dev",
+)
+
 
 def _is_docker_available() -> bool:
     """
@@ -200,7 +207,7 @@ def _execute_in_docker(
 """
 
     # Generate unique container name for cleanup on timeout
-    container_name = f"bluebox-sandbox-{os.getpid()}-{int(time.time() * 1000)}"
+    container_name = f"bluebox-sandbox-{os.getpid()}-{uuid.uuid4().hex[:8]}"
 
     docker_cmd = [
         "docker", "run",
@@ -430,6 +437,12 @@ def execute_python_sandboxed(
     if not code:
         return {"error": "No code provided"}
 
+    # Validate work_dir before passing to any backend
+    if work_dir:
+        work_dir = os.path.abspath(work_dir)
+        if any(work_dir == p or work_dir.startswith(p + os.sep) for p in SENSITIVE_PATH_PREFIXES):
+            return {"error": f"work_dir points to a sensitive system path: {work_dir}"}
+
     # Check for blocked patterns (allow open() when work_dir is set)
     safety_error = check_code_safety(code, allow_file_io=bool(work_dir))
     if safety_error:
diff --git a/bluebox/utils/infra_utils.py b/bluebox/utils/infra_utils.py
@@ -7,6 +7,7 @@
 import shutil
 import zipfile
 from pathlib import Path
+from typing import Any
 
 import requests
 
@@ -85,6 +86,69 @@ def extract_zip(zip_path: Path, extract_to: Path) -> bool:
         return False
 
 
+def read_file_lines(
+    file_path: Path,
+    start_line: int | None = None,
+    end_line: int | None = None,
+    max_lines: int = 200,
+) -> dict[str, Any]:
+    """
+    Read a text file with optional line range, streaming to avoid loading
+    the entire file into memory.
+
+    Args:
+        file_path: Resolved path to the file.
+        start_line: Optional 1-based start line (inclusive).
+        end_line: Optional 1-based end line (inclusive).
+        max_lines: Maximum lines to return when no range is specified.
+
+    Returns:
+        Dict with "content", "line_range", or "error" on failure.
+    """
+    if not file_path.exists():
+        return {"error": f"File not found: {file_path}"}
+    if not file_path.is_file():
+        return {"error": f"Not a file: {file_path}"}
+
+    has_range = start_line is not None or end_line is not None
+    s = (start_line or 1) - 1  # 0-based start index
+    upper = end_line if has_range and end_line is not None else None
+
+    lines: list[str] = []
+    total_lines = 0
+    try:
+        with file_path.open("r") as f:
+            for i, raw in enumerate(f):
+                total_lines = i + 1
+                if i >= s and (upper is None or i < upper):
+                    if not has_range and len(lines) >= max_lines:
+                        continue
+                    lines.append(raw.rstrip("\n"))
+                if upper is not None and total_lines >= upper:
+                    remaining = sum(1 for _ in f)
+                    total_lines += remaining
+                    break
+    except OSError as e:
+        return {"error": f"Failed to read file: {e}"}
+
+    if has_range:
+        e = end_line or total_lines
+        line_range = f"lines {s + 1}-{min(e, total_lines)} of {total_lines}"
+    else:
+        if total_lines > max_lines:
+            line_range = (
+                f"lines 1-{max_lines} of {total_lines} "
+                "(truncated, use start_line/end_line for more)"
+            )
+        else:
+            line_range = f"all {total_lines} lines"
+
+    return {
+        "line_range": line_range,
+        "content": "\n".join(lines),
+    }
+
+
 def resolve_glob_patterns(
     patterns: list[str],
     extensions: set[str] | None = None,
diff --git a/tests/unit/test_code_execution_sandbox.py b/tests/unit/test_code_execution_sandbox.py
@@ -6,6 +6,7 @@
 
 import os
 import subprocess
+from pathlib import Path
 from unittest.mock import patch, MagicMock
 
 import pytest
@@ -14,6 +15,7 @@
     BLOCKED_MODULES,
     BLOCKED_PATTERNS,
     BLOCKED_BUILTINS,
+    SENSITIVE_PATH_PREFIXES,
     check_code_safety,
     create_safe_builtins,
     execute_python_sandboxed,
@@ -1259,6 +1261,42 @@ def test_docker_mode_passes_work_dir(self) -> None:
             sandbox_module._docker_available = None
 
 
+class TestWorkDirValidation:
+    """Tests for work_dir validation in execute_python_sandboxed."""
+
+    @pytest.mark.parametrize("prefix", SENSITIVE_PATH_PREFIXES)
+    def test_rejects_sensitive_prefix_exact(self, prefix: str) -> None:
+        """Every entry in SENSITIVE_PATH_PREFIXES should be rejected as work_dir."""
+        result = execute_python_sandboxed("print('hi')", work_dir=prefix)
+        assert "error" in result
+        assert "sensitive system path" in result["error"]
+
+    @pytest.mark.parametrize("prefix", SENSITIVE_PATH_PREFIXES)
+    def test_rejects_sensitive_prefix_subdir(self, prefix: str) -> None:
+        """Subdirectories under sensitive prefixes should also be rejected."""
+        result = execute_python_sandboxed("print('hi')", work_dir=f"{prefix}/subdir")
+        assert "error" in result
+        assert "sensitive system path" in result["error"]
+
+    def test_normalizes_path_with_dotdot(self) -> None:
+        """Paths with '..' that resolve to a sensitive prefix should be rejected."""
+        result = execute_python_sandboxed("print('hi')", work_dir="/etc/../etc/nginx")
+        assert "error" in result
+        assert "sensitive system path" in result["error"]
+
+    def test_allows_tmp(self, tmp_path: Path) -> None:
+        """A normal temp directory should pass validation."""
+        import bluebox.utils.code_execution_sandbox as sandbox_module
+        original_mode = sandbox_module.SANDBOX_MODE
+        try:
+            sandbox_module.SANDBOX_MODE = "blocklist"
+            result = execute_python_sandboxed("print('ok')", work_dir=str(tmp_path))
+            assert "error" not in result
+            assert "ok" in result["output"]
+        finally:
+            sandbox_module.SANDBOX_MODE = original_mode
+
+
 class TestDockerExecutionWorkDir:
     """Tests for Docker execution with work_dir."""
 
diff --git a/tests/unit/test_read_workspace_file.py b/tests/unit/test_read_workspace_file.py
diff --git a/tests/unit/utils/test_infra_utils.py b/tests/unit/utils/test_infra_utils.py