feat(recon): implement keyword normalization for recon contributions

XargonWan · XargonWan · commit 76987d2ea62d · 2026-02-13T22:28:33.000+09:00
diff --git a/core/recon.py b/core/recon.py
@@ -269,6 +269,44 @@ async def _build_recon_history_texts_async(
     return local_text, global_text
 
 
+async def _normalize_keywords_list(raw_keywords: List[str] | None) -> List[str]:
+    """Normalize Recon keywords into single-word tokens.
+
+    Rules:
+    - split on underscores, hyphens and non-alphanumeric chars
+    - split camelCase boundaries (e.g. behaviorChange -> behavior, change)
+    - lowercase, strip and dedupe while preserving order
+    - return an empty list if input is None or no valid tokens
+    """
+    if not raw_keywords:
+        return []
+
+    import re
+
+    def _split_camel(s: str) -> List[str]:
+        # Insert space between lower->upper transitions then split
+        parts = re.sub('([a-z0-9])([A-Z])', r"\1 \2", s).split()
+        return parts
+
+    seen = set()
+    out: List[str] = []
+    for k in raw_keywords:
+        if not k:
+            continue
+        # replace non-alnum with space, then split camelCase
+        k = str(k).strip()
+        k = re.sub(r"[^0-9A-Za-z]+", " ", k)
+        for part in k.split():
+            for sub in _split_camel(part):
+                tok = sub.strip().lower()
+                if not tok:
+                    continue
+                if tok not in seen:
+                    seen.add(tok)
+                    out.append(tok)
+    return out
+
+
 async def gather_recon_contributions(
     message=None,
     context_memory=None,
@@ -407,6 +445,9 @@ async def gather_recon_contributions(
         return []
 
     # Dispatch responses to plugins
+    # normalize keywords into single-word tokens before dispatching to plugins
+    norm_keywords = await _normalize_keywords_list(keywords)
+
     for plugin in recon_plugins:
         key = plugin.get_recon_key()
         plugin_name = plugin.__class__.__name__
@@ -418,7 +459,7 @@ async def gather_recon_contributions(
                 context_memory=context_memory,
                 text=text,
                 tags=tags,
-                keywords=keywords,
+                keywords=norm_keywords,
                 max_results=max_results,
             )
         except Exception as e:
diff --git a/tests/test_prompt_recon_injection.py b/tests/test_prompt_recon_injection.py
@@ -76,3 +76,50 @@ async def fake_gather_recon_contributions(**kwargs):
 
     # Clean up registry
     PLUGIN_REGISTRY.pop("fake_recon_injection", None)
+
+
+@pytest.mark.asyncio
+async def test_recon_keyword_normalization(monkeypatch):
+    """Keywords passed into gather_recon_contributions must be normalized into single-word tokens."""
+    recorded = {}
+
+    class KWPlugin:
+        def get_recon_key(self):
+            return "KW"
+
+        def get_recon_instruction(self):
+            return "Return keywords"
+
+        async def parse_recon_response(self, data, **kwargs):
+            # record what keywords the core passed to us
+            recorded['keywords'] = kwargs.get('keywords')
+            return []
+
+    from core.core_initializer import PLUGIN_REGISTRY
+    PLUGIN_REGISTRY['kw_plugin_test'] = KWPlugin()
+
+    import core.recon as recon_mod
+
+    # Call gather_recon_contributions with compound keywords
+    contribs = await recon_mod.gather_recon_contributions(
+        message=None,
+        context_memory=None,
+        text="test",
+        tags=None,
+        keywords=["narrative_part", "behavior_change", "locale_update"],
+        max_results=3,
+    )
+
+    # Plugin should have received normalized single-word tokens (split on '_' and lowercased)
+    assert 'keywords' in recorded, "plugin did not receive keywords"
+    assert recorded['keywords'] == [
+        "narrative",
+        "part",
+        "behavior",
+        "change",
+        "locale",
+        "update",
+    ]
+
+    # cleanup
+    PLUGIN_REGISTRY.pop('kw_plugin_test', None)