Skip to content

Commit a329831

Browse files
Otto-AGPTntindle
andauthored
feat(backend): Add ClamAV scanning for local file paths (#11988)
## Context From PR #11796 review discussion. Files processed by the video blocks (downloads, uploads, generated videos) should be scanned through ClamAV for malware detection. ## Problem `store_media_file()` in `backend/util/file.py` already scans: - `workspace://` references - Cloud storage paths - Data URIs (`data:...`) - HTTP/HTTPS URLs **But local file paths were NOT scanned.** The `else` branch only verified the file exists. This gap affected video processing blocks (e.g., `LoopVideoBlock`, `AddAudioToVideoBlock`) that: 1. Download/receive input media 2. Process it locally (loop, add audio, etc.) 3. Write output to temp directory 4. Call `store_media_file(output_filename, ...)` with a local path → **skipped virus scanning** ## Solution Added virus scanning to the local file path branch: ```python # Virus scan the local file before any further processing local_content = target_path.read_bytes() if len(local_content) > MAX_FILE_SIZE_BYTES: raise ValueError(...) await scan_content_safe(local_content, filename=sanitized_file) ``` ## Changes - `backend/util/file.py` - Added ~7 lines to scan local files (consistent with other input types) - `backend/util/file_test.py` - Added 2 test cases for local file scanning ## Risk Assessment - **Low risk:** Single point of change, follows existing pattern - **Backwards compatible:** No API changes - **Fail-safe:** If scanning fails, file is rejected (existing behavior) Closes SECRT-1904 Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
1 parent 98dd1a9 commit a329831

File tree

2 files changed

+105
-0
lines changed

2 files changed

+105
-0
lines changed

autogpt_platform/backend/backend/util/file.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,14 @@ def _strip_base_prefix(absolute_path: Path, base: Path) -> str:
342342
if not target_path.is_file():
343343
raise ValueError(f"Local file does not exist: {target_path}")
344344

345+
# Virus scan the local file before any further processing
346+
local_content = target_path.read_bytes()
347+
if len(local_content) > MAX_FILE_SIZE_BYTES:
348+
raise ValueError(
349+
f"File too large: {len(local_content)} bytes > {MAX_FILE_SIZE_BYTES} bytes"
350+
)
351+
await scan_content_safe(local_content, filename=sanitized_file)
352+
345353
# Return based on requested format
346354
if return_format == "for_local_processing":
347355
# Use when processing files locally with tools like ffmpeg, MoviePy, PIL

autogpt_platform/backend/backend/util/file_test.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,3 +247,100 @@ async def test_store_media_file_cloud_retrieval_error(self):
247247
execution_context=make_test_context(graph_exec_id=graph_exec_id),
248248
return_format="for_local_processing",
249249
)
250+
251+
@pytest.mark.asyncio
252+
async def test_store_media_file_local_path_scanned(self):
253+
"""Test that local file paths are scanned for viruses."""
254+
graph_exec_id = "test-exec-123"
255+
local_file = "test_video.mp4"
256+
file_content = b"fake video content"
257+
258+
with patch(
259+
"backend.util.file.get_cloud_storage_handler"
260+
) as mock_handler_getter, patch(
261+
"backend.util.file.scan_content_safe"
262+
) as mock_scan, patch(
263+
"backend.util.file.Path"
264+
) as mock_path_class:
265+
266+
# Mock cloud storage handler - not a cloud path
267+
mock_handler = MagicMock()
268+
mock_handler.is_cloud_path.return_value = False
269+
mock_handler_getter.return_value = mock_handler
270+
271+
# Mock virus scanner
272+
mock_scan.return_value = None
273+
274+
# Mock file system operations
275+
mock_base_path = MagicMock()
276+
mock_target_path = MagicMock()
277+
mock_resolved_path = MagicMock()
278+
279+
mock_path_class.return_value = mock_base_path
280+
mock_base_path.mkdir = MagicMock()
281+
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
282+
mock_target_path.resolve.return_value = mock_resolved_path
283+
mock_resolved_path.is_relative_to.return_value = True
284+
mock_resolved_path.is_file.return_value = True
285+
mock_resolved_path.read_bytes.return_value = file_content
286+
mock_resolved_path.relative_to.return_value = Path(local_file)
287+
mock_resolved_path.name = local_file
288+
289+
result = await store_media_file(
290+
file=MediaFileType(local_file),
291+
execution_context=make_test_context(graph_exec_id=graph_exec_id),
292+
return_format="for_local_processing",
293+
)
294+
295+
# Verify virus scan was called for local file
296+
mock_scan.assert_called_once_with(file_content, filename=local_file)
297+
298+
# Result should be the relative path
299+
assert str(result) == local_file
300+
301+
@pytest.mark.asyncio
302+
async def test_store_media_file_local_path_virus_detected(self):
303+
"""Test that infected local files raise VirusDetectedError."""
304+
from backend.api.features.store.exceptions import VirusDetectedError
305+
306+
graph_exec_id = "test-exec-123"
307+
local_file = "infected.exe"
308+
file_content = b"malicious content"
309+
310+
with patch(
311+
"backend.util.file.get_cloud_storage_handler"
312+
) as mock_handler_getter, patch(
313+
"backend.util.file.scan_content_safe"
314+
) as mock_scan, patch(
315+
"backend.util.file.Path"
316+
) as mock_path_class:
317+
318+
# Mock cloud storage handler - not a cloud path
319+
mock_handler = MagicMock()
320+
mock_handler.is_cloud_path.return_value = False
321+
mock_handler_getter.return_value = mock_handler
322+
323+
# Mock virus scanner to detect virus
324+
mock_scan.side_effect = VirusDetectedError(
325+
"EICAR-Test-File", "File rejected due to virus detection"
326+
)
327+
328+
# Mock file system operations
329+
mock_base_path = MagicMock()
330+
mock_target_path = MagicMock()
331+
mock_resolved_path = MagicMock()
332+
333+
mock_path_class.return_value = mock_base_path
334+
mock_base_path.mkdir = MagicMock()
335+
mock_base_path.__truediv__ = MagicMock(return_value=mock_target_path)
336+
mock_target_path.resolve.return_value = mock_resolved_path
337+
mock_resolved_path.is_relative_to.return_value = True
338+
mock_resolved_path.is_file.return_value = True
339+
mock_resolved_path.read_bytes.return_value = file_content
340+
341+
with pytest.raises(VirusDetectedError):
342+
await store_media_file(
343+
file=MediaFileType(local_file),
344+
execution_context=make_test_context(graph_exec_id=graph_exec_id),
345+
return_format="for_local_processing",
346+
)

0 commit comments

Comments
 (0)