Skip to content

Commit 76987d2

Browse files
committed
feat(recon): implement keyword normalization for recon contributions
1 parent b3792d6 commit 76987d2

File tree

2 files changed

+89
-1
lines changed

2 files changed

+89
-1
lines changed

core/recon.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,44 @@ async def _build_recon_history_texts_async(
269269
return local_text, global_text
270270

271271

272+
async def _normalize_keywords_list(raw_keywords: List[str] | None) -> List[str]:
273+
"""Normalize Recon keywords into single-word tokens.
274+
275+
Rules:
276+
- split on underscores, hyphens and non-alphanumeric chars
277+
- split camelCase boundaries (e.g. behaviorChange -> behavior, change)
278+
- lowercase, strip and dedupe while preserving order
279+
- return an empty list if input is None or no valid tokens
280+
"""
281+
if not raw_keywords:
282+
return []
283+
284+
import re
285+
286+
def _split_camel(s: str) -> List[str]:
287+
# Insert space between lower->upper transitions then split
288+
parts = re.sub('([a-z0-9])([A-Z])', r"\1 \2", s).split()
289+
return parts
290+
291+
seen = set()
292+
out: List[str] = []
293+
for k in raw_keywords:
294+
if not k:
295+
continue
296+
# replace non-alnum with space, then split camelCase
297+
k = str(k).strip()
298+
k = re.sub(r"[^0-9A-Za-z]+", " ", k)
299+
for part in k.split():
300+
for sub in _split_camel(part):
301+
tok = sub.strip().lower()
302+
if not tok:
303+
continue
304+
if tok not in seen:
305+
seen.add(tok)
306+
out.append(tok)
307+
return out
308+
309+
272310
async def gather_recon_contributions(
273311
message=None,
274312
context_memory=None,
@@ -407,6 +445,9 @@ async def gather_recon_contributions(
407445
return []
408446

409447
# Dispatch responses to plugins
448+
# normalize keywords into single-word tokens before dispatching to plugins
449+
norm_keywords = await _normalize_keywords_list(keywords)
450+
410451
for plugin in recon_plugins:
411452
key = plugin.get_recon_key()
412453
plugin_name = plugin.__class__.__name__
@@ -418,7 +459,7 @@ async def gather_recon_contributions(
418459
context_memory=context_memory,
419460
text=text,
420461
tags=tags,
421-
keywords=keywords,
462+
keywords=norm_keywords,
422463
max_results=max_results,
423464
)
424465
except Exception as e:

tests/test_prompt_recon_injection.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,50 @@ async def fake_gather_recon_contributions(**kwargs):
7676

7777
# Clean up registry
7878
PLUGIN_REGISTRY.pop("fake_recon_injection", None)
79+
80+
81+
@pytest.mark.asyncio
82+
async def test_recon_keyword_normalization(monkeypatch):
83+
"""Keywords passed into gather_recon_contributions must be normalized into single-word tokens."""
84+
recorded = {}
85+
86+
class KWPlugin:
87+
def get_recon_key(self):
88+
return "KW"
89+
90+
def get_recon_instruction(self):
91+
return "Return keywords"
92+
93+
async def parse_recon_response(self, data, **kwargs):
94+
# record what keywords the core passed to us
95+
recorded['keywords'] = kwargs.get('keywords')
96+
return []
97+
98+
from core.core_initializer import PLUGIN_REGISTRY
99+
PLUGIN_REGISTRY['kw_plugin_test'] = KWPlugin()
100+
101+
import core.recon as recon_mod
102+
103+
# Call gather_recon_contributions with compound keywords
104+
contribs = await recon_mod.gather_recon_contributions(
105+
message=None,
106+
context_memory=None,
107+
text="test",
108+
tags=None,
109+
keywords=["narrative_part", "behavior_change", "locale_update"],
110+
max_results=3,
111+
)
112+
113+
# Plugin should have received normalized single-word tokens (split on '_' and lowercased)
114+
assert 'keywords' in recorded, "plugin did not receive keywords"
115+
assert recorded['keywords'] == [
116+
"narrative",
117+
"part",
118+
"behavior",
119+
"change",
120+
"locale",
121+
"update",
122+
]
123+
124+
# cleanup
125+
PLUGIN_REGISTRY.pop('kw_plugin_test', None)

0 commit comments

Comments
 (0)