Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 79 additions & 1 deletion src/writer/app_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import concurrent.futures
import hashlib
import importlib.util
import io
import logging
Expand All @@ -8,6 +9,7 @@
import multiprocessing.connection
import multiprocessing.synchronize
import os
import pathlib
import shutil
import signal
import subprocess
Expand All @@ -16,7 +18,7 @@
import threading
import zipfile
from types import ModuleType
from typing import Any, Callable, Dict, List, Optional, Union, cast
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast

import watchdog.events
from pydantic import ValidationError
Expand Down Expand Up @@ -691,6 +693,76 @@ def on_any_event(self, event) -> None:
return
self.update_callback()

class ProjectHashLogHandler(watchdog.events.PatternMatchingEventHandler):
def __init__(self, app_path: str, wf_project_context: WfProjectContext, patterns: List[str]):
super().__init__(patterns=patterns)
self.wf_project_context = wf_project_context

for file in pathlib.Path(app_path).rglob("*"):
if file.is_dir():
continue
file_hash = hashlib.md5()
try:
with open(file, 'rb') as f:
while chunk := f.read(8192):
file_hash.update(chunk)
self.wf_project_context.file_hashes[str(file.absolute())] = file_hash.hexdigest()
except Exception as e:
logging.warning(f"Failed to hash {file}: {e}")

self.project_hash = ""
self._log_hash()

def _calculate_project_hash(self) -> str:
project_hash = hashlib.md5()
for filename in sorted(self.wf_project_context.file_hashes.keys()):
file_hash = self.wf_project_context.file_hashes[filename]
project_hash.update(bytes.fromhex(file_hash))
return project_hash.hexdigest()

def _process_file(self, file_path) -> None:
try:
file_hash = hashlib.md5()
with open(file_path, 'rb') as f:
while chunk := f.read(8192):
file_hash.update(chunk)
self.wf_project_context.file_hashes[file_path] = file_hash.hexdigest()
self._log_hash()
except FileNotFoundError:
return
except Exception as e:
logging.warning(f"Failed to hash {file_path}: {e}")

def _log_hash(self) -> None:
previous_project_hash = self.project_hash
self.project_hash = self._calculate_project_hash()
if previous_project_hash != self.project_hash:
logging.debug(f"Project hash: {self.project_hash}")

def on_modified(self, event: Union[watchdog.events.DirModifiedEvent, watchdog.events.FileModifiedEvent]):
if not event.is_directory:
self._process_file(event.src_path)

def on_created(self, event: Union[watchdog.events.DirCreatedEvent, watchdog.events.FileCreatedEvent]):
if not event.is_directory:
self._process_file(event.src_path)
else:
for sub_event in watchdog.events.generate_sub_created_events(event.src_path):
self.on_created(sub_event)

def on_moved(self, event: Union[watchdog.events.DirMovedEvent, watchdog.events.FileMovedEvent]):
if not event.is_directory:
self.wf_project_context.file_hashes.pop(event.src_path, None)
self._process_file(event.dest_path)
else:
for sub_event in watchdog.events.generate_sub_moved_events(event.src_path, event.dest_path):
self.on_moved(sub_event)

def on_deleted(self, event: Union[watchdog.events.DirDeletedEvent, watchdog.events.FileDeletedEvent]):
if not event.is_directory:
self.wf_project_context.file_hashes.pop(event.src_path, None)
self._log_hash()
Comment on lines +761 to +764
Copy link
Collaborator Author

@UladzislauK-Writer UladzislauK-Writer Dec 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked, deleting a directory (at least from UI) causes everything inside it to fire an event. So we don't need to handle directory events recursively



class ThreadSafeAsyncEvent(asyncio.Event):
"""Asyncio event adapted to be thread-safe."""
Expand Down Expand Up @@ -824,6 +896,12 @@ def _start_fs_observer(self):
path=self.app_path,
recursive=True,
)
if logging.getLogger().isEnabledFor(logging.DEBUG):
self.observer.schedule(
ProjectHashLogHandler(self.app_path, self.wf_project_context, patterns=["*"]),
path=self.app_path,
recursive=True,
)
# See _install_requirements docstring for info
# self.observer.schedule(
# FileEventHandler(self._install_requirements, patterns=["requirements.txt"]),
Expand Down
156 changes: 155 additions & 1 deletion tests/backend/test_app_runner.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import asyncio
import hashlib
import threading
from types import SimpleNamespace

import pytest
from writer.app_runner import AppRunner
import watchdog.events
from writer.app_runner import AppRunner, ProjectHashLogHandler
from writer.ss_types import (
EventRequest,
InitSessionRequest,
Expand Down Expand Up @@ -345,3 +348,154 @@ async def test_handle_event_should_return_result_of_event_handler_execution(

# Then
assert res.payload.result["result"] is not None



@pytest.fixture
def wf_project_context():
return SimpleNamespace(file_hashes={})


@pytest.fixture
def sample_app(tmp_path):
"""
Creates a directory structure:
app/
a.txt
sub/
b.txt
"""
app = tmp_path / "app"
app.mkdir()

(app / "a.txt").write_text("hello")
sub = app / "sub"
sub.mkdir()
(sub / "b.txt").write_text("world")

return app


class TestProjectHashLogHandler:
@staticmethod
def md5_of_bytes(data: bytes) -> str:
h = hashlib.md5()
h.update(data)
return h.hexdigest()

def test_initial_hashing(self, sample_app, wf_project_context):
handler = ProjectHashLogHandler(
app_path=str(sample_app),
wf_project_context=wf_project_context,
patterns=["*"],
)

expected = {
str((sample_app / "a.txt").absolute()): self.md5_of_bytes(b"hello"),
str((sample_app / "sub" / "b.txt").absolute()): self.md5_of_bytes(b"world"),
}

assert wf_project_context.file_hashes == expected
assert handler.project_hash == handler._calculate_project_hash()


def test_project_hash_is_order_independent(self, sample_app, wf_project_context):
handler = ProjectHashLogHandler(
app_path=str(sample_app),
wf_project_context=wf_project_context,
patterns=["*"],
)

original_hash = handler.project_hash

# Reinsert hashes in reverse order
items = list(wf_project_context.file_hashes.items())
wf_project_context.file_hashes.clear()
for k, v in reversed(items):
wf_project_context.file_hashes[k] = v

assert handler._calculate_project_hash() == original_hash


def test_on_modified_updates_file_hash(self, sample_app, wf_project_context):
handler = ProjectHashLogHandler(
app_path=str(sample_app),
wf_project_context=wf_project_context,
patterns=["*"],
)

file_path = sample_app / "a.txt"
file_path.write_text("changed")

event = watchdog.events.FileModifiedEvent(str(file_path))
handler.on_modified(event)

assert wf_project_context.file_hashes[str(file_path)] == self.md5_of_bytes(b"changed")


def test_on_created_adds_file(self, sample_app, wf_project_context):
handler = ProjectHashLogHandler(
app_path=str(sample_app),
wf_project_context=wf_project_context,
patterns=["*"],
)

new_file = sample_app / "new.txt"
new_file.write_text("new")

event = watchdog.events.FileCreatedEvent(str(new_file))
handler.on_created(event)

assert str(new_file) in wf_project_context.file_hashes
assert wf_project_context.file_hashes[str(new_file)] == self.md5_of_bytes(b"new")


def test_on_deleted_removes_file(self, sample_app, wf_project_context):
handler = ProjectHashLogHandler(
app_path=str(sample_app),
wf_project_context=wf_project_context,
patterns=["*"],
)

file_path = sample_app / "a.txt"
event = watchdog.events.FileDeletedEvent(str(file_path))

handler.on_deleted(event)

assert str(file_path) not in wf_project_context.file_hashes


def test_on_moved_updates_hashes(self, sample_app, wf_project_context):
handler = ProjectHashLogHandler(
app_path=str(sample_app),
wf_project_context=wf_project_context,
patterns=["*"],
)

src = sample_app / "a.txt"
dest = sample_app / "a_renamed.txt"
src.rename(dest)

event = watchdog.events.FileMovedEvent(
src_path=str(src),
dest_path=str(dest),
)

handler.on_moved(event)

assert str(src) not in wf_project_context.file_hashes
assert str(dest) in wf_project_context.file_hashes
assert wf_project_context.file_hashes[str(dest)] == self.md5_of_bytes(b"hello")


def test_process_file_missing_is_ignored(self, sample_app, wf_project_context):
handler = ProjectHashLogHandler(
app_path=str(sample_app),
wf_project_context=wf_project_context,
patterns=["*"],
)

missing_file = sample_app / "missing.txt"

handler._process_file(str(missing_file))
assert str(missing_file) not in wf_project_context.file_hashes