diff --git a/neuro_san/internals/run_context/langchain/llms/default_llm_info.hocon b/neuro_san/internals/run_context/langchain/llms/default_llm_info.hocon index 5e4500019..1a7eccf2e 100644 --- a/neuro_san/internals/run_context/langchain/llms/default_llm_info.hocon +++ b/neuro_san/internals/run_context/langchain/llms/default_llm_info.hocon @@ -746,6 +746,7 @@ "num_gpu": null, "num_thread": null, "num_predict": null, # Will use max_tokens + "reasoning": null, # Controls the reasoning/thinking mode for supported models. If None (Default), The model will use its default reasoning behavior. "repeat_last_n": null, "repeat_penalty": null, "temperature": null, diff --git a/neuro_san/internals/run_context/langchain/llms/standard_langchain_llm_factory.py b/neuro_san/internals/run_context/langchain/llms/standard_langchain_llm_factory.py index e5885ec87..f494a1c1d 100644 --- a/neuro_san/internals/run_context/langchain/llms/standard_langchain_llm_factory.py +++ b/neuro_san/internals/run_context/langchain/llms/standard_langchain_llm_factory.py @@ -185,6 +185,7 @@ def create_base_chat_model(self, config: Dict[str, Any], num_gpu=config.get("num_gpu"), num_thread=config.get("num_thread"), num_predict=config.get("num_predict", config.get("max_tokens")), + reasoning=config.get("reasoning"), repeat_last_n=config.get("repeat_last_n"), repeat_penalty=config.get("repeat_penalty"), temperature=config.get("temperature"),