feat: add binary streaming support for large file downloads #4310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

lucasgomide wants to merge 4 commits into main from lg-support-binary-stream-integration

+342 −14

lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/__init__.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -13,10 +13,16 @@ @@
     from crewai_tools.tools.crewai_platform_tools.crewai_platform_tools import (
         CrewaiPlatformTools,
     )
+    from crewai_tools.tools.crewai_platform_tools.file_hook import (
+        process_file_markers,
+        register_file_processing_hook,
+    )
     __all__ = [
         "CrewAIPlatformActionTool",
         "CrewaiPlatformToolBuilder",
         "CrewaiPlatformTools",
+        "process_file_markers",
+        "register_file_processing_hook",
     ]

lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_action_tool.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,8 @@ @@
     import json
     import os
+    import re
+    import tempfile
     from typing import Any
     from crewai.tools import BaseTool
@@ Expand All / @@ -14,6 +16,26 @@ @@
         get_platform_integration_token,
     )
+    _FILE_MARKER_PREFIX = "__CREWAI_FILE__"
+    _MIME_TO_EXTENSION = {
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
+        "application/vnd.ms-excel": ".xls",
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
+        "application/msword": ".doc",
+        "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
+        "application/vnd.ms-powerpoint": ".ppt",
+        "application/pdf": ".pdf",
+        "image/png": ".png",
+        "image/jpeg": ".jpg",
+        "image/gif": ".gif",
+        "image/webp": ".webp",
+        "text/plain": ".txt",
+        "text/csv": ".csv",
+        "application/json": ".json",
+        "application/zip": ".zip",
+    }
     class CrewAIPlatformActionTool(BaseTool):
         action_name: str = Field(default="", description="The name of the action")
@@ Expand Down Expand Up / @@ -71,10 +93,18 @@ def _run(self, **kwargs: Any) -> str: @@
                     url=api_url,
                     headers=headers,
                     json=payload,
-                    timeout=60,
+                    timeout=300,
+                    stream=True,
                     verify=os.environ.get("CREWAI_FACTORY", "false").lower() != "true",
                 )
+                content_type = response.headers.get("Content-Type", "")
+                # Check if response is binary (non-JSON)
+                if "application/json" not in content_type:
+                    return self._handle_binary_response(response)
+                # Normal JSON response
                 data = response.json()
                 if not response.ok:
                     if isinstance(data, dict):
@@ Expand All / @@ -91,3 +121,49 @@ def _run(self, **kwargs: Any) -> str: @@
             except Exception as e:
                 return f"Error executing action {self.action_name}: {e!s}"
+        def _handle_binary_response(self, response: requests.Response) -> str:
+            """Handle binary streaming response from the API.
+            Streams the binary content to a temporary file and returns a marker
+            that can be processed by the file hook to inject the file into the
+            LLM context.
+            Args:
+                response: The streaming HTTP response with binary content.
+            Returns:
+                A file marker string in the format:
+                __CREWAI_FILE__:filename:content_type:file_path
+            """
+            content_type = response.headers.get("Content-Type", "application/octet-stream")
+            filename = self._extract_filename_from_headers(response.headers)
+            extension = self._get_file_extension(content_type, filename)
+            with tempfile.NamedTemporaryFile(
+                delete=False, suffix=extension, prefix="crewai_"
+            ) as tmp_file:
+                for chunk in response.iter_content(chunk_size=8192):
+                    tmp_file.write(chunk)
+                tmp_path = tmp_file.name
+            return f"{_FILE_MARKER_PREFIX}:{filename}:{content_type}:{tmp_path}"
+        def _extract_filename_from_headers(
+            self, headers: requests.structures.CaseInsensitiveDict
+        ) -> str:
+            content_disposition = headers.get("Content-Disposition", "")
+            if content_disposition:
+                match = re.search(r'filename="?([^";\s]+)"?', content_disposition)
+                if match:
+                    return match.group(1)
+            return "downloaded_file"
+        def _get_file_extension(self, content_type: str, filename: str) -> str:
+            if "." in filename:
+                return "." + filename.rsplit(".", 1)[-1]
+            base_content_type = content_type.split(";")[0].strip()
+            return _MIME_TO_EXTENSION.get(base_content_type, "")

lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tools.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,9 @@ @@
     from crewai_tools.tools.crewai_platform_tools.crewai_platform_tool_builder import (
         CrewaiPlatformToolBuilder,
     )
+    from crewai_tools.tools.crewai_platform_tools.file_hook import (
+        register_file_processing_hook,
+    )
     logger = logging.getLogger(__name__)
@@ Expand All / @@ -22,6 +25,8 @@ def CrewaiPlatformTools( # noqa: N802 @@
         Returns:
             A list of BaseTool instances for platform actions
         """
+        register_file_processing_hook()
         builder = CrewaiPlatformToolBuilder(apps=apps)
         return builder.tools()  # type: ignore

lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/file_hook.py

-Original file line number
+Diff line change
@@ -0,0 +1,132 @@
+    """File processing hook for CrewAI Platform Tools.
+    This module provides a hook that processes file markers returned by platform tools
+    and injects the files into the LLM context for native file handling.
+    """
+    from __future__ import annotations
+    import logging
+    import os
+    from typing import TYPE_CHECKING
+    if TYPE_CHECKING:
+        from crewai.hooks.tool_hooks import ToolCallHookContext
+    logger = logging.getLogger(__name__)
+    _FILE_MARKER_PREFIX = "__CREWAI_FILE__"
+    _hook_registered = False
+    def process_file_markers(context: ToolCallHookContext) -> str | None:
+        """Process file markers in tool results and inject files into context.
+        This hook detects file markers returned by platform tools (e.g., download_file)
+        and converts them into FileInput objects that are attached to the hook context.
+        The agent executor will then inject these files into the tool message for
+        native LLM file handling.
+        The marker format is:
+            __CREWAI_FILE__:filename:content_type:file_path
+        Args:
+            context: The tool call hook context containing the tool result.
+        Returns:
+            A human-readable message if a file was processed, None otherwise.
+        """
+        result = context.tool_result
+        if not result or not result.startswith(_FILE_MARKER_PREFIX):
+            return None
+        try:
+            parts = result.split(":", 3)
+            if len(parts) < 4:
+                logger.warning(f"Invalid file marker format: {result[:100]}")
+                return None
+            _, filename, content_type, file_path = parts
+            if not os.path.isfile(file_path):
+                logger.error(f"File not found: {file_path}")
+                return f"Error: Downloaded file not found at {file_path}"
+            try:
+                from crewai_files import File
+            except ImportError:
+                logger.warning(
+                    "crewai_files not installed. File will not be attached to LLM context."
+                )
+                return (
+                    f"Downloaded file: {filename} ({content_type}). "
+                    f"File saved at: {file_path}. "
+                    "Note: Install crewai_files for native LLM file handling."
+                )
+            file = File(source=file_path, content_type=content_type, filename=filename)
+            context.files = {filename: file}
+            file_size = os.path.getsize(file_path)
+            size_str = _format_file_size(file_size)
+            return f"Downloaded file: {filename} ({content_type}, {size_str}). File is attached for LLM analysis."
+        except Exception as e:
+            logger.exception(f"Error processing file marker: {e}")
+            return f"Error processing downloaded file: {e}"
+    def _format_file_size(size_bytes: int) -> str:
+        """Format file size in human-readable format.
+        Args:
+            size_bytes: Size in bytes.
+        Returns:
+            Human-readable size string.
+        """
+        if size_bytes < 1024:
+            return f"{size_bytes} bytes"
+        elif size_bytes < 1024 * 1024:
+            return f"{size_bytes / 1024:.1f} KB"
+        elif size_bytes < 1024 * 1024 * 1024:
+            return f"{size_bytes / (1024 * 1024):.1f} MB"
+        else:
+            return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
+    def register_file_processing_hook() -> bool:
+        """Register the file processing hook globally.
+        This function should be called once during application initialization
+        to enable automatic file injection for platform tools.
+        Returns:
+            True if the hook was registered, False if it was already registered
+            or if registration failed.
+        """
+        global _hook_registered
+        if _hook_registered:
+            logger.debug("File processing hook already registered")
+            return False
+        try:
+            from crewai.hooks import register_after_tool_call_hook
+            register_after_tool_call_hook(process_file_markers)
+            _hook_registered = True
+            logger.info("File processing hook registered successfully")
+            return True
+        except ImportError:
+            logger.warning(
+                "crewai.hooks not available. File processing hook not registered."
+            )
+            return False
+        except Exception as e:
+            logger.exception(f"Failed to register file processing hook: {e}")
+            return False

lib/crewai-tools/tests/tools/crewai_platform_tools/test_crewai_platform_tools.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ @@
     from unittest.mock import Mock, patch
     from crewai_tools.tools.crewai_platform_tools import CrewaiPlatformTools
+    from crewai_tools.tools.crewai_platform_tools import file_hook
     class TestCrewaiPlatformTools(unittest.TestCase):
@@ Expand Down Expand Up / @@ -113,3 +114,64 @@ def test_crewai_platform_tools_no_token(self): @@
                 with self.assertRaises(ValueError) as context:
                     CrewaiPlatformTools(apps=["github"])
                 assert "No platform integration token found" in str(context.exception)
+        @patch.dict("os.environ", {"CREWAI_PLATFORM_INTEGRATION_TOKEN": "test_token"})
+        @patch(
+            "crewai_tools.tools.crewai_platform_tools.crewai_platform_tool_builder.requests.get"
+        )
+        @patch(
+            "crewai_tools.tools.crewai_platform_tools.crewai_platform_tools.register_file_processing_hook"
+        )
+        def test_crewai_platform_tools_registers_file_hook(
+            self, mock_register_hook, mock_get
+        ):
+            mock_response = Mock()
+            mock_response.raise_for_status.return_value = None
+            mock_response.json.return_value = {"actions": {"github": []}}
+            mock_get.return_value = mock_response
+            CrewaiPlatformTools(apps=["github"])
+            mock_register_hook.assert_called_once()
+    class TestFileHook(unittest.TestCase):
+        def setUp(self):
+            file_hook._hook_registered = False
+        def tearDown(self):
+            file_hook._hook_registered = False
+        @patch("crewai.hooks.register_after_tool_call_hook")
+        def test_register_hook_is_idempotent(self, mock_register):
+            """Test hook registration succeeds once and is idempotent."""
+            assert file_hook.register_file_processing_hook() is True
+            assert file_hook._hook_registered is True
+            mock_register.assert_called_once_with(file_hook.process_file_markers)
+            # Second call should return False and not register again
+            assert file_hook.register_file_processing_hook() is False
+            mock_register.assert_called_once()
+        def test_process_file_markers_ignores_non_file_results(self):
+            """Test that non-file-marker results return None."""
+            test_cases = [
+                None,  # Empty result
+                "Regular tool output",  # Non-marker
+                "__CREWAI_FILE__:incomplete",  # Invalid format (missing parts)
+            ]
+            for tool_result in test_cases:
+                mock_context = Mock()
+                mock_context.tool_result = tool_result
+                assert file_hook.process_file_markers(mock_context) is None
+        def test_format_file_size(self):
+            """Test file size formatting across units."""
+            cases = [
+                (500, "500 bytes"),
+                (1024, "1.0 KB"),
+                (1536, "1.5 KB"),
+                (1024 * 1024, "1.0 MB"),
+                (1024 * 1024 * 1024, "1.0 GB"),
+            ]
+            for size_bytes, expected in cases:
+                assert file_hook._format_file_size(size_bytes) == expected

lib/crewai/src/crewai/agents/agent_adapters/openai_agents/openai_agent_tool_adapter.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,7 @@ @@
     )
     from crewai.tools import BaseTool
     from crewai.utilities.import_utils import require
+    from crewai.utilities.pydantic_schema_utils import force_additional_properties_false
     from crewai.utilities.string_utils import sanitize_tool_name
@@ Expand Down Expand Up @@
             for tool in tools:
                 schema: dict[str, Any] = tool.args_schema.model_json_schema()
-                schema.update({"additionalProperties": False, "type": "object"})
+                schema = force_additional_properties_false(schema)
+                schema.update({"type": "object"})
                 openai_tool: OpenAIFunctionTool = cast(
                     OpenAIFunctionTool,
@@ Expand Down @@

lib/crewai/src/crewai/agents/crew_agent_executor.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -930,6 +930,10 @@ def _handle_native_tool_calls( @@
                 "name": func_name,
                 "content": result,
             }
+            if after_hook_context.files:
+                tool_message["files"] = after_hook_context.files
             self.messages.append(tool_message)
             # Log the tool execution
@@ Expand Down @@

lib/crewai/src/crewai/experimental/agent_executor.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -814,6 +814,10 @@ def execute_native_tool( @@
                     "name": func_name,
                     "content": result,
                 }
+                if after_hook_context.files:
+                    tool_message["files"] = after_hook_context.files
                 self.state.messages.append(tool_message)
                 # Log the tool execution
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: add binary streaming support for large file downloads #4310

Diff view

Diff view

There are no files selected for viewing

lucasgomide Jan 30, 2026

Uh oh!

Uh oh!

Uh oh!

Uh oh!

feat: add binary streaming support for large file downloads #4310

Are you sure you want to change the base?

feat: add binary streaming support for large file downloads #4310

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

lucasgomide Jan 30, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!