From 153471d8a76df61e6b5b996525b865f9a1b309b0 Mon Sep 17 00:00:00 2001 From: Mohamed ASSOUKTI Date: Tue, 19 Aug 2025 10:56:44 +0200 Subject: [PATCH 1/7] [DERCBOT-1609] Structuring the LLM response --- .../model/genai/BotRAGConfigurationDTO.kt | 6 - .../src/test/kotlin/service/RAGServiceTest.kt | 2 - .../service/RAGValidationServiceTest.kt | 1 - .../dialogs-list-filters.component.ts | 20 +- .../dialogs-list/dialogs-list.component.ts | 4 + .../models/engines-configurations.ts | 201 +++++++++++++- .../rag/rag-settings/models/rag-settings.ts | 3 - .../rag-settings/rag-settings.component.html | 74 ------ .../rag-settings.component.spec.ts | 34 +-- .../rag-settings/rag-settings.component.ts | 100 +------ .../web/src/app/shared/bot-shared.service.ts | 1 + .../chat-ui-message-debug.component.html | 21 +- .../chat-ui-message-debug.component.scss | 21 +- .../admin/bot/rag/BotRAGConfiguration.kt | 2 - .../kotlin/engine/config/RAGAnswerHandler.kt | 159 +++++------- .../kotlin/BotRAGConfigurationMongoDAOTest.kt | 8 +- .../orchestratorclient/responses/Models.kt | 20 +- .../responses/RAGResponse.kt | 3 +- .../models/llm/OllamaLLMSetting.kt | 2 - .../utils/VectorStoreUtils.kt | 2 +- .../models/rag/rag_models.py | 96 ++++++- .../gen_ai_orchestrator/routers/rag_router.py | 28 +- .../routers/responses/responses.py | 7 +- .../callback_handlers_factory.py | 2 - .../langfuse_callback_handler_factory.py | 9 +- .../services/langchain/rag_chain.py | 245 ++++++++++++------ .../observability/observabilty_service.py | 4 +- .../server/tests/services/test_rag_chain.py | 181 +++++++++---- 28 files changed, 733 insertions(+), 523 deletions(-) diff --git a/bot/admin/server/src/main/kotlin/model/genai/BotRAGConfigurationDTO.kt b/bot/admin/server/src/main/kotlin/model/genai/BotRAGConfigurationDTO.kt index 1214b137ad..5afb90aba9 100644 --- a/bot/admin/server/src/main/kotlin/model/genai/BotRAGConfigurationDTO.kt +++ b/bot/admin/server/src/main/kotlin/model/genai/BotRAGConfigurationDTO.kt @@ -42,8 +42,6 @@ data class BotRAGConfigurationDTO( val emSetting: EMSettingDTO, val indexSessionId: String? = null, val indexName: String? = null, - val noAnswerSentence: String, - val noAnswerStoryId: String? = null, val documentsRequired: Boolean = true, val debugEnabled: Boolean, val maxDocumentsRetrieved: Int, @@ -63,8 +61,6 @@ data class BotRAGConfigurationDTO( emSetting = configuration.emSetting.toDTO(), indexSessionId = configuration.indexSessionId, indexName = configuration.generateIndexName(), - noAnswerSentence = configuration.noAnswerSentence, - noAnswerStoryId = configuration.noAnswerStoryId, documentsRequired = configuration.documentsRequired, debugEnabled = configuration.debugEnabled, maxDocumentsRetrieved = configuration.maxDocumentsRetrieved, @@ -101,8 +97,6 @@ data class BotRAGConfigurationDTO( dto = emSetting, ), indexSessionId = indexSessionId, - noAnswerSentence = noAnswerSentence, - noAnswerStoryId = noAnswerStoryId, documentsRequired = documentsRequired, debugEnabled = debugEnabled, maxDocumentsRetrieved = maxDocumentsRetrieved, diff --git a/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt b/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt index f2a2e36cde..698f4c5381 100644 --- a/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt +++ b/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt @@ -97,7 +97,6 @@ class RAGServiceTest : AbstractTest() { model = "model", apiBase = "url", ), - noAnswerSentence = "No answer sentence", documentsRequired = true, debugEnabled = false, maxDocumentsRetrieved = 2, @@ -211,7 +210,6 @@ class RAGServiceTest : AbstractTest() { Assertions.assertEquals(PROVIDER, captured.questionAnsweringLlmSetting!!.provider.name) Assertions.assertEquals(TEMPERATURE, captured.questionAnsweringLlmSetting!!.temperature) Assertions.assertEquals(PROMPT, captured.questionAnsweringPrompt!!.template) - Assertions.assertEquals(null, captured.noAnswerStoryId) } TestCase("Save valid RAG Configuration that does not exist yet").given( diff --git a/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt b/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt index 0bdca1b629..edff4ee726 100644 --- a/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt +++ b/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt @@ -90,7 +90,6 @@ class RAGValidationServiceTest { questionAnsweringLlmSetting = openAILLMSetting, questionAnsweringPrompt = PromptTemplate(template = "How to bike in the rain"), emSetting = azureOpenAIEMSetting, - noAnswerSentence = " No answer sentence", documentsRequired = true, debugEnabled = false, maxDocumentsRetrieved = 2, diff --git a/bot/admin/web/src/app/analytics/dialogs/dialogs-list/dialogs-list-filters/dialogs-list-filters.component.ts b/bot/admin/web/src/app/analytics/dialogs/dialogs-list/dialogs-list-filters/dialogs-list-filters.component.ts index be812aa671..4272512856 100644 --- a/bot/admin/web/src/app/analytics/dialogs/dialogs-list/dialogs-list-filters/dialogs-list-filters.component.ts +++ b/bot/admin/web/src/app/analytics/dialogs/dialogs-list/dialogs-list-filters/dialogs-list-filters.component.ts @@ -14,7 +14,7 @@ * limitations under the License. */ -import { Component, EventEmitter, Input, OnInit, Output } from '@angular/core'; +import { Component, EventEmitter, Input, OnDestroy, OnInit, Output } from '@angular/core'; import { FormControl, FormGroup } from '@angular/forms'; import { ConnectorType } from '../../../../core/model/configuration'; import { Subject, debounceTime, take, takeUntil } from 'rxjs'; @@ -67,7 +67,7 @@ export type DialogListFilters = ExtractFormControlTyping; templateUrl: './dialogs-list-filters.component.html', styleUrl: './dialogs-list-filters.component.scss' }) -export class DialogsListFiltersComponent implements OnInit { +export class DialogsListFiltersComponent implements OnInit, OnDestroy { private readonly destroy$: Subject = new Subject(); private lastEmittedValue: Partial | null = null; @@ -109,8 +109,9 @@ export class DialogsListFiltersComponent implements OnInit { this.lastEmittedValue = { ...this.form.value }; } - this.form.valueChanges.pipe(debounceTime(800), takeUntil(this.destroy$)).subscribe(() => { + this.form.valueChanges.pipe(debounceTime(500), takeUntil(this.destroy$)).subscribe(() => { this.submitFiltersChange(); + this.persisteDisplayTests(); }); } @@ -143,10 +144,15 @@ export class DialogsListFiltersComponent implements OnInit { submitFiltersChange(): void { const formValue = this.form.value; - if (JSON.stringify(formValue) !== JSON.stringify(this.lastEmittedValue)) { - this.onFilter.emit(formValue); - this.lastEmittedValue = { ...formValue }; - } + this.onFilter.emit(formValue); + } + + persisteDisplayTests(): void { + const displayTests = this.getFormControl('displayTests')?.value; + this.botSharedService.session_storage = { + ...this.botSharedService.session_storage, + ...{ dialogs: { ...this.botSharedService.session_storage?.dialogs, displayTests } } + }; } resetControl(ctrl: FormControl, input?: HTMLInputElement): void { diff --git a/bot/admin/web/src/app/analytics/dialogs/dialogs-list/dialogs-list.component.ts b/bot/admin/web/src/app/analytics/dialogs/dialogs-list/dialogs-list.component.ts index 28001c0089..71862b0443 100644 --- a/bot/admin/web/src/app/analytics/dialogs/dialogs-list/dialogs-list.component.ts +++ b/bot/admin/web/src/app/analytics/dialogs/dialogs-list/dialogs-list.component.ts @@ -73,6 +73,10 @@ export class DialogsListComponent implements OnInit, OnChanges, OnDestroy { } ngOnInit() { + if (this.botSharedService.session_storage?.dialogs?.displayTests) { + this.filters.displayTests = this.botSharedService.session_storage.dialogs.displayTests; + } + this.state.configurationChange.pipe(takeUntil(this.destroy$)).subscribe(() => { this.refresh(); }); diff --git a/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts b/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts index 925980ad0e..758ababaa3 100644 --- a/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts +++ b/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts @@ -27,35 +27,208 @@ import { PromptDefinitionFormatter } from '../../../shared/model/ai-settings'; -export const QuestionCondensingDefaultPrompt: string = `Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.`; +export const QuestionCondensingDefaultPrompt: string = `You are a helpful assistant that reformulates questions. + +You are given: +- The conversation history between the user and the assistant +- The most recent user question + +Your task: +- Reformulate the user’s latest question into a clear, standalone query. +- Incorporate relevant context from the conversation history. +- Do NOT answer the question. +- If the history does not provide additional context, keep the question as is. + +Return only the reformulated question.`; export const QuestionAnsweringDefaultPrompt: string = `# TOCK (The Open Conversation Kit) chatbot -## General context +## General Context You are a chatbot designed to provide short conversational messages in response to user queries. +Your job is to surface the right information from provided context. + +### Forbidden Topics + +- **Out of Scope**: + - Topics unrelated to the business domain (e.g., personal life advice, unrelated industries). + - Requests for unsupported features (e.g., "How do I integrate with [UnsupportedTool]?"). + +- **Toxic/Offensive Content**: + - Hate speech, harassment, or discriminatory language. + - Illegal activities or unethical requests (e.g., "How do I bypass security protocols?"). + +- **Personal/Private Matters**: + - User-specific data (e.g., personal identification, private conversations). + - Internal or confidential company information (e.g., unreleased product details). + +- **Regulated Topics**: + - Medical, legal, or financial advice (e.g., "What’s the best treatment for [condition]?"). + - Speculative or unverified claims (e.g., "Is [Product X] better than competitors?"). + +### Answer Style + +- **Tone**: neutral, kind, “you” address, light humor when appropriate. +- **Language**: Introduce technical jargon only when strictly necessary and briefly define it. +- **Structure**: Use short sentences, bold or bullet points for key ideas, headings to separate the main sections, and fenced \`code\` blocks for examples. +- **Style**: Direct and technical tone, with **bold** for important concepts. +- **Formatting**: Mandatory Markdown, with line breaks for readability. +- **Examples**: Include a concrete example (code block or CLI command) for each feature. + +### Guidelines + +1. If the question is unclear, politely request rephrasing. +2. If the docs lack or don’t cover the answer, reply with \`"status": "not_found_in_context"\`. +3. Conclude with: + - “Does this help?” + - Offer to continue on the same topic, switch topics, or contact support. + +### Verification Steps + +Before responding, ensure: + +- The documentation actually addresses the question. +- Your answer is consistent with the docs. + +## Technical Instructions: + +You must respond STRICTLY in valid JSON format (no extra text, no explanations). +Use only the following context and the rules below to respond the question. + +### Rules for JSON output: + +- If the answer is found in the context: + - "status": "found_in_context" + - "answer": the best possible answer in {{ locale }} + - "display_answer": "true" + - "redirection_intent": null -## Guidelines +- If the answer is NOT found in the context: + - "status": "not_found_in_context" + - "answer": + - The "answer" must not be a generic refusal. Instead, generate a helpful and intelligent response: + - If a similar or related element exists in the context (e.g., another product, service, or regulation with a close name, date, or wording), suggest it naturally in the answer. + - If no similar element exists, politely acknowledge the lack of information while encouraging clarification or rephrasing. + - Always ensure the response is phrased in a natural and user-friendly way, rather than a dry "not found in context". + - "display_answer": "true" + - "redirection_intent": null -Incorporate any relevant details from the provided context into your answers, ensuring they are directly related to the user's query. +- If the question is forbidden or offensive: + - "status": "out_of_scope" + - "answer": + - Generate a polite response explaining why a reponse can't be done. + - "topic": "Out of scope or offensive question" + - "display_answer": "true" + - "redirection_intent": null -## Style and format +- If the question is small talk: + Only to conversational rituals such as greetings (e.g., “hello”, “hi”) and farewells or leave-takings (e.g., “goodbye”, “see you”), you may ignore the context and generate a natural small-talk response in the "answer". + - "status": "small_talk" + - "topic": "greetings" + - "display_answer": "true" + - "redirection_intent": null -Your tone is empathetic, informative and polite. +### Confidence score: -## Additional instructions +Gives a confidence score between 0 and 1 on the relevance of the answer provided to the user's question: -Use the following pieces of retrieved context to answer the question. -If you dont know the answer, answer (exactly) with "{{no_answer}}". -Answer in {{locale}}. +- "confidence_score": -## Context +### Users question understanding: -{{context}} +Explain in one sentence what you understood from the user's question: -## Question +- "understanding": "" + +### Context usage tracing requirements (MANDATORY): + +- You MUST include **every** chunk from the input context in the "context_usage" array, in the same order they appear. **No chunk may be omitted**. +- If explicit chunk identifiers are present in the context, use them. +- For each chunk object: + - "chunk": "" + - "used_in_response": + - "true" if the chunk contributed + - "false" if the chunk didn't contributed + - "sentences": [""] — leave empty \`[]\` if none. + - "reason": "null" if the chunk contributed; otherwise a concise explanation of why this chunk is not relevant to the question (e.g., "general background only", "different product", "no data for the asked period", etc.). +- If there are zero chunks in the context, return \`"context": []\`. + +### Topic Identification & Suggestion Rules (MANDATORY): + +#### Rules for Topic Assignment + +- If the question explicitly matches a predefined topic, use: + - \`"topic": ""\` + - \`"suggested_topics": []\` + +- If the question does not match any predefined topic, use the \`unknown\` topic and provide 1 relevant and concise new topic suggestion in "suggested_topics": + - \`"topic": "unknown"\` + - \`"suggested_topics": [""]\` + +#### Predefined topics (use EXACT spelling, no variations): + +- \`Concepts and Definitions\` +- \`Processes and Methods\` +- \`Tools and Technologies\` +- \`Rules and Regulations\` +- \`Examples and Use Cases\` +- \`Resources and References\` + +## Context: + +{{ context }} + +## User question + +You are given the conversation history between a user and an assistant: + +- analyze the conversation history to understand the context and the user’s intent +- use this context to correctly interpret the user’s final question +- answer only the final user question below in a relevant and contextualized way + +Conversation history: +{{ chat_history }} + +User’s final question: +{{ question }} + +## Output format (JSON only): + +Return your response in the following format: + +\`\`\`json +{ + "status": "found_in_context" | "not_found_in_context" | "small_talk" | "out_of_scope", + "answer": "", + "display_answer": true | false, + "confidence_score": "", + "topic": "" | "greetings" | "Out of scope or offensive question" | "unknown", + "suggested_topics": ["", ""], + "understanding": "", + "redirection_intent": null, + "context_usage": [ + { + "chunk": "1", + "sentences": ["SENTENCE_1", "SENTENCE_2"], + "used_in_response": true | false, + "reason": null + }, + { + "chunk": "2", + "sentences": [], + "used_in_response": true | false, + "reason": "General description; no details related to the question." + }, + { + "chunk": "3", + "sentences": ["SENTENCE_X"], + "used_in_response": true | false, + "reason": null + } + ] +} +\`\`\` -{{question}} `; export const QuestionCondensing_prompt: ProvidersConfigurationParam[] = [ diff --git a/bot/admin/web/src/app/rag/rag-settings/models/rag-settings.ts b/bot/admin/web/src/app/rag/rag-settings/models/rag-settings.ts index 52b4dbb9fc..e605ec6561 100644 --- a/bot/admin/web/src/app/rag/rag-settings/models/rag-settings.ts +++ b/bot/admin/web/src/app/rag/rag-settings/models/rag-settings.ts @@ -24,9 +24,6 @@ export interface RagSettings { debugEnabled: boolean; - noAnswerSentence: string; - noAnswerStoryId: string | null; - questionCondensingLlmSetting: llmSetting; questionCondensingPrompt: PromptDefinition; maxMessagesFromHistory: number; diff --git a/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.html b/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.html index 2ecc1b9a2c..cf3527fa9f 100644 --- a/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.html +++ b/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.html @@ -430,80 +430,6 @@

Rag settings

- - Conversation flow - -
-
- - - -
-
- - - - - - - - - - {{ option.name }}  (disabled) - - -
-
-
-
- Settings deletion diff --git a/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.spec.ts b/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.spec.ts index 75cebbc001..4a8dca8248 100644 --- a/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.spec.ts +++ b/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.spec.ts @@ -18,8 +18,6 @@ import { NO_ERRORS_SCHEMA } from '@angular/core'; import { ComponentFixture, TestBed } from '@angular/core/testing'; import { NbToastrService } from '@nebular/theme'; import { of } from 'rxjs'; -import { BotService } from '../../bot/bot-service'; -import { StoryDefinitionConfigurationSummary } from '../../bot/model/story'; import { RestService } from '../../core-nlp/rest/rest.service'; import { StateService } from '../../core-nlp/state.service'; import { BotConfigurationService } from '../../core/bot-configuration.service'; @@ -28,22 +26,6 @@ import { RagSettings } from './models'; import { RagSettingsComponent } from './rag-settings.component'; -const stories = [ - { - _id: '123456789abcdefghijkl', - storyId: 'teststory', - botId: 'new_assistant', - intent: { - name: 'testintent' - }, - currentType: 'simple', - name: 'Test story', - category: 'faq', - description: '', - lastEdited: '2023-07-31T14:48:21.291Z' - } as unknown as StoryDefinitionConfigurationSummary -]; - const settings = { id: 'abcdefghijkl123456789', namespace: 'app', @@ -64,9 +46,7 @@ const settings = { embeddingModelName: 'text-embedding-ada-002', embeddingApiKey: 'Embedding OpenAI API Key', embeddingApiVersion: '2023-03-15-preview' - }, - noAnswerSentence: 'No answer sentence', - noAnswerStoryId: 'null' + } } as unknown as RagSettings; describe('RagSettingsComponent', () => { @@ -77,12 +57,6 @@ describe('RagSettingsComponent', () => { await TestBed.configureTestingModule({ declarations: [RagSettingsComponent], providers: [ - { - provide: BotService, - useValue: { - searchStories: () => of(stories) - } - }, { provide: StateService, useValue: { @@ -118,10 +92,6 @@ describe('RagSettingsComponent', () => { expect(component).toBeTruthy(); }); - it('should load stories', () => { - expect(component.availableStories).toEqual(stories); - }); - it('should load settings', () => { expect(component.settingsBackup).toEqual(settings); @@ -130,7 +100,7 @@ describe('RagSettingsComponent', () => { delete cleanedSettings['botId']; const cleanedFormValue = deepCopy(component.form.getRawValue()); - delete cleanedFormValue.params.apiKey; + delete cleanedFormValue.questionAnsweringLlmSetting.apiKey; expect(cleanedFormValue as unknown).toEqual(cleanedSettings as unknown); }); diff --git a/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.ts b/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.ts index 0a7f41f8a3..c906ffc48e 100644 --- a/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.ts +++ b/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.ts @@ -16,11 +16,9 @@ import { Component, OnDestroy, OnInit, TemplateRef, ViewChild } from '@angular/core'; import { FormControl, FormGroup, Validators } from '@angular/forms'; -import { debounceTime, forkJoin, Observable, of, Subject, take, takeUntil, pairwise } from 'rxjs'; +import { debounceTime, forkJoin, Observable, Subject, takeUntil, pairwise } from 'rxjs'; import { NbDialogRef, NbDialogService, NbToastrService, NbWindowService } from '@nebular/theme'; -import { BotService } from '../../bot/bot-service'; -import { StoryDefinitionConfiguration, StorySearchQuery } from '../../bot/model/story'; import { RestService } from '../../core-nlp/rest/rest.service'; import { StateService } from '../../core-nlp/state.service'; import { EnginesConfigurations, QuestionCondensing_prompt, QuestionAnswering_prompt } from './models/engines-configurations'; @@ -47,9 +45,6 @@ interface RagSettingsForm { debugEnabled: FormControl; - noAnswerSentence: FormControl; - noAnswerStoryId: FormControl; - indexSessionId: FormControl; indexName: FormControl; @@ -89,10 +84,6 @@ export class RagSettingsComponent implements OnInit, OnDestroy { questionAnswering_prompt = QuestionAnswering_prompt; - availableStories: StoryDefinitionConfiguration[]; - - filteredStories$: Observable; - settingsBackup: RagSettings; isSubmitted: boolean = false; @@ -103,7 +94,6 @@ export class RagSettingsComponent implements OnInit, OnDestroy { @ViewChild('importModal') importModal: TemplateRef; constructor( - private botService: BotService, private state: StateService, private rest: RestService, private toastrService: NbToastrService, @@ -176,14 +166,9 @@ export class RagSettingsComponent implements OnInit, OnDestroy { this.configurations = confs; if (confs.length) { - forkJoin([this.getStoriesLoader(), this.getRagSettingsLoader()]).subscribe((res) => { - this.availableStories = res[0]; - - const settings = res[1]; + forkJoin([this.getRagSettingsLoader()]).subscribe((res) => { + const settings = res[0]; if (settings?.id) { - if (!settings.noAnswerStoryId) { - settings.noAnswerStoryId = null; - } this.settingsBackup = deepCopy(settings); setTimeout(() => { this.initForm(settings); @@ -209,9 +194,6 @@ export class RagSettingsComponent implements OnInit, OnDestroy { debugEnabled: new FormControl({ value: undefined, disabled: !this.canRagBeActivated() }), - noAnswerSentence: new FormControl(undefined, [Validators.required]), - noAnswerStoryId: new FormControl(undefined), - indexSessionId: new FormControl(undefined), indexName: new FormControl(undefined), @@ -256,13 +238,6 @@ export class RagSettingsComponent implements OnInit, OnDestroy { return this.form.get('maxMessagesFromHistory') as FormControl; } - get noAnswerSentence(): FormControl { - return this.form.get('noAnswerSentence') as FormControl; - } - get noAnswerStoryId(): FormControl { - return this.form.get('noAnswerStoryId') as FormControl; - } - get indexSessionId(): FormControl { return this.form.get('indexSessionId') as FormControl; } @@ -283,11 +258,6 @@ export class RagSettingsComponent implements OnInit, OnDestroy { return this.isSubmitted ? this.form.valid : this.form.dirty; } - get getCurrentStoryLabel(): string { - const currentStory = this.availableStories?.find((story) => story.storyId === this.noAnswerStoryId.value); - return currentStory?.name || ''; - } - accordionItemsExpandedState: Map; isAccordionItemsExpanded(itemName: string): boolean { @@ -455,66 +425,6 @@ export class RagSettingsComponent implements OnInit, OnDestroy { return EnginesConfigurations[AiEngineSettingKeyName.emSetting].find((e) => e.key === this.emProvider.value); } - private getStoriesLoader(): Observable { - return this.botService - .getStories( - new StorySearchQuery( - this.state.currentApplication.namespace, - this.state.currentApplication.name, - this.state.currentLocale, - 0, - 10000, - undefined, - undefined, - false - ) - ) - .pipe(take(1)); - } - - isStoryEnabled(story: StoryDefinitionConfiguration): boolean { - for (let i = 0; i < story.features.length; i++) { - if (!story.features[i].enabled && !story.features[i].switchToStoryId && !story.features[i].endWithStoryId) { - return false; - } - } - return true; - } - - storySelectedChange(storyId: string): void { - this.noAnswerStoryId.patchValue(storyId); - this.form.markAsDirty(); - } - - onStoryChange(value: string): void { - if (value?.trim() == '') { - this.removeNoAnswerStoryId(); - } - } - - removeNoAnswerStoryId(): void { - this.noAnswerStoryId.patchValue(null); - this.form.markAsDirty(); - } - - filterStoriesList(e: string): void { - this.filteredStories$ = of(this.availableStories.filter((optionValue) => optionValue.name.toLowerCase().includes(e.toLowerCase()))); - } - - storyInputFocus(): void { - this.filteredStories$ = of(this.availableStories); - } - - storyInputBlur(e: FocusEvent): void { - setTimeout(() => { - // timeout needed to avoid reseting input and filtered stories when clicking on autocomplete suggestions (which fires blur event) - const target: HTMLInputElement = e.target as HTMLInputElement; - target.value = this.getCurrentStoryLabel; - - this.filteredStories$ = of(this.availableStories); - }, 100); - } - cancel(): void { this.initForm(this.settingsBackup); } @@ -529,14 +439,10 @@ export class RagSettingsComponent implements OnInit, OnDestroy { delete formValue['emProvider']; formValue.namespace = this.state.currentApplication.namespace; formValue.botId = this.state.currentApplication.name; - formValue.noAnswerStoryId = this.noAnswerStoryId.value === 'null' ? null : this.noAnswerStoryId.value; const url = `/gen-ai/bots/${this.state.currentApplication.name}/configuration/rag`; this.rest.post(url, formValue, null, null, true).subscribe({ next: (ragSettings: RagSettings) => { - if (!ragSettings.noAnswerStoryId) { - ragSettings.noAnswerStoryId = null; - } this.settingsBackup = ragSettings; this.indexName.reset(); diff --git a/bot/admin/web/src/app/shared/bot-shared.service.ts b/bot/admin/web/src/app/shared/bot-shared.service.ts index 5fae05656e..dcf933be48 100644 --- a/bot/admin/web/src/app/shared/bot-shared.service.ts +++ b/bot/admin/web/src/app/shared/bot-shared.service.ts @@ -25,6 +25,7 @@ import { AdminConfiguration } from './model/conf'; export interface TockSimpleSessionStorage { test: { debug: boolean; sourceWithContent?: boolean }; + dialogs: { displayTests: boolean }; } @Injectable({ diff --git a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html index a9a3c82749..361645b06d 100644 --- a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html +++ b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html @@ -16,8 +16,25 @@
- {{ message.text }} DEBUG +
Status: {{ message.data.answer?.status }}
+ +
+ Topic: {{ message.data.answer?.topic }} +
+ +
+ Suggested topics: + + {{ suggestion }} + +
+ +
+ Redirection intent: {{ message.data.answer?.redirection_intent }} +
+ +
Confidence score:: {{ message.data.answer?.confidence_score }}
diff --git a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.scss b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.scss index b0b18a6758..5f59982a64 100644 --- a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.scss +++ b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.scss @@ -19,21 +19,26 @@ :host { display: flex; - justify-content: center; + // justify-content: center; .debug { + width: 100%; cursor: pointer; + font-size: 0.75rem; color: var(--chat-message-sender-text-color); + border-top: 1px dashed var(--chat-message-sender-text-color); border-bottom: 1px dashed var(--chat-message-sender-text-color); + background-color: var(--background-basic-color-3); - height: 11px; - margin-bottom: 10px; + margin-left: 1em; + // height: 11px; + // margin-bottom: 10px; - span { - background: var(--card-background-color); - padding: 0 5px; - border-radius: 0.5rem; - } + // span { + // background: var(--card-background-color); + // padding: 0 5px; + // border-radius: 0.5rem; + // } } } diff --git a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt index aee3ad8d86..6f2c1af38e 100644 --- a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt +++ b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt @@ -35,8 +35,6 @@ data class BotRAGConfiguration( val llmSetting: LLMSetting? = null, val emSetting: EMSetting, val indexSessionId: String? = null, - val noAnswerSentence: String, - val noAnswerStoryId: String? = null, val documentsRequired: Boolean = true, val debugEnabled: Boolean = false, val maxDocumentsRetrieved: Int = 4, diff --git a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt index cfa550317e..5f8d5c0fa1 100644 --- a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt +++ b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt @@ -16,11 +16,9 @@ package ai.tock.bot.engine.config -import ai.tock.bot.admin.bot.rag.BotRAGConfiguration import ai.tock.bot.admin.indicators.IndicatorValues import ai.tock.bot.admin.indicators.Indicators import ai.tock.bot.admin.indicators.metric.MetricType -import ai.tock.bot.definition.RAGStoryDefinition import ai.tock.bot.definition.StoryDefinition import ai.tock.bot.engine.BotBus import ai.tock.bot.engine.BotRepository @@ -34,9 +32,9 @@ import ai.tock.genai.orchestratorclient.requests.ChatMessage import ai.tock.genai.orchestratorclient.requests.ChatMessageType import ai.tock.genai.orchestratorclient.requests.DialogDetails import ai.tock.genai.orchestratorclient.requests.RAGRequest +import ai.tock.genai.orchestratorclient.responses.LLMAnswer import ai.tock.genai.orchestratorclient.responses.ObservabilityInfo import ai.tock.genai.orchestratorclient.responses.RAGResponse -import ai.tock.genai.orchestratorclient.responses.TextWithFootnotes import ai.tock.genai.orchestratorclient.retrofit.GenAIOrchestratorBusinessError import ai.tock.genai.orchestratorclient.retrofit.GenAIOrchestratorValidationError import ai.tock.genai.orchestratorclient.services.RAGService @@ -64,7 +62,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { BotRepository.saveMetric(createMetric(MetricType.STORY_HANDLED)) // Call RAG Api - Gen AI Orchestrator - val (answer, debug, noAnswerStory, observabilityInfo) = rag(this) + val (answer, footnotes, debug, redirectStory, observabilityInfo) = rag(this) // Add debug data if available and if debugging is enabled if (debug != null) { @@ -72,37 +70,42 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { sendDebugData("RAG", debug) } - // Handle the RAG answer - if (noAnswerStory == null && answer != null) { - logger.info { "Send RAG answer." } + val modifiedObservabilityInfo = observabilityInfo?.let { updateObservabilityInfo(this, it) } - val modifiedObservabilityInfo = observabilityInfo?.let { updateObservabilityInfo(this, it) } + // Footnotes building + val preparedFootnotes = + footnotes?.map { + Footnote( + it.identifier, + it.title, + it.url, + if (action.metadata.sourceWithContent) it.content else null, + it.score, + ) + }?.toMutableList() ?: mutableListOf() + + // Identifying text to be sent + val textToSend = if (answer?.displayAnswer == true) answer.answer.orEmpty() else "" - send( + // Send SendSentenceWithFootnotes + logger.info { "Send RAG answer." } + send( + action = SendSentenceWithFootnotes( - botId, - connectorId, - userId, - text = answer.text, - footnotes = - answer.footnotes.map { - Footnote( - it.identifier, - it.title, - it.url, - if (action.metadata.sourceWithContent) it.content else null, - it.score, - ) - }.toMutableList(), - // modifiedObservabilityInfo includes the public langfuse URL if filled. - metadata = ActionMetadata(isGenAiRagAnswer = true, observabilityInfo = modifiedObservabilityInfo), + playerId = botId, + applicationId = connectorId, + recipientId = userId, + text = textToSend, + footnotes = preparedFootnotes, + metadata = + ActionMetadata( + isGenAiRagAnswer = true, + observabilityInfo = modifiedObservabilityInfo, + ), ), - ) - } else { - logger.info { "No RAG answer to send, because a noAnswerStory is returned." } - } + ) - noAnswerStory + redirectStory } } @@ -122,7 +125,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { } /** - * Manage story redirection when no answer redirection is filled + * Manage story redirection * Use the handler of the configured story otherwise launch default unknown story * @param botBus the bot Bus * @param response the RAG response @@ -131,54 +134,9 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { botBus: BotBus, response: RAGResponse?, ): StoryDefinition? { - return with(botBus) { - botDefinition.ragConfiguration?.let { ragConfig -> - if (response?.answer?.text.equals(ragConfig.noAnswerSentence, ignoreCase = true)) { - // Save no answer metric - saveRagMetric(IndicatorValues.NO_ANSWER) - - // Switch to no answer story if configured - if (!ragConfig.noAnswerStoryId.isNullOrBlank()) { - logger.info { "The RAG response is equal to the configured no-answer sentence, so switch to the no-answer story." } - getNoAnswerRAGStory(ragConfig) - } else { - null - } - } else { - // Save success metric - saveRagMetric(IndicatorValues.SUCCESS) - null - } - } - } - } - - /** - * Switch to the configured no-answer story if exists. - * Switch to the default unknown story otherwise. - * @param ragConfig: The RAG configuration - */ - private fun BotBus.getNoAnswerRAGStory(ragConfig: BotRAGConfiguration): StoryDefinition { - val noAnswerStory: StoryDefinition - val noAnswerStoryId = ragConfig.noAnswerStoryId - if (!noAnswerStoryId.isNullOrBlank()) { - logger.info { "A no-answer story $noAnswerStoryId is configured, so run it." } - noAnswerStory = - botDefinition.findStoryDefinitionById(noAnswerStoryId, connectorId).let { - // Prevent infinite loop when the noAnswerStory is removed or disabled - if (it.id == RAGStoryDefinition.RAG_STORY_NAME) { - logger.info { "The no-answer story is removed or disabled, so run the default unknown story." } - botDefinition.unknownStory - } else { - it - } - } - } else { - logger.info { "No no-answer story is configured, so run the default unknown story." } - noAnswerStory = botDefinition.unknownStory + return response?.answer?.redirectionIntent?.let { + botBus.botDefinition.findStoryDefinition(it, "") } - - return noAnswerStory } /** @@ -235,7 +193,6 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { mapOf( "question" to action.toString(), "locale" to userPreferences.locale.displayLanguage, - "no_answer" to ragConfiguration.noAnswerSentence, ), ), embeddingQuestionEmSetting = ragConfiguration.emSetting, @@ -249,27 +206,36 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { debug = action.metadata.debugEnabled || ragConfiguration.debugEnabled, ) + if (response?.answer?.status.equals("not_found_in_context", ignoreCase = true)) { + // Save no answer metric + saveRagMetric(IndicatorValues.NO_ANSWER) + } else { + // Save success metric + saveRagMetric(IndicatorValues.SUCCESS) + } + // Handle RAG response - return RAGResult(response?.answer, response?.debug, ragStoryRedirection(this, response), response?.observabilityInfo) + return RAGResult( + response?.answer, + response?.footnotes, + response?.debug, + ragStoryRedirection(this, response), + response?.observabilityInfo, + ) } catch (exc: Exception) { logger.error { exc } // Save failure metric saveRagMetric(IndicatorValues.FAILURE) - return if (exc is GenAIOrchestratorBusinessError && exc.error.info.error == "APITimeoutError") { - logger.info { "The APITimeoutError is raised, so switch to the no-answer story." } - RAGResult(noAnswerStory = getNoAnswerRAGStory(ragConfiguration)) - } else { - RAGResult( - answer = TextWithFootnotes(text = technicalErrorMessage), - debug = - when (exc) { - is GenAIOrchestratorBusinessError -> RAGError(exc.message, exc.error) - is GenAIOrchestratorValidationError -> RAGError(exc.message, exc.detail) - else -> RAGError(errorMessage = exc.message) - }, - ) - } + return RAGResult( + answer = LLMAnswer(status = "error", answer = technicalErrorMessage), + debug = + when (exc) { + is GenAIOrchestratorBusinessError -> RAGError(exc.message, exc.error) + is GenAIOrchestratorValidationError -> RAGError(exc.message, exc.detail) + else -> RAGError(errorMessage = exc.message) + }, + ) } } } @@ -330,9 +296,10 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { * Aggregation of RAG answer, debug and the no answer Story. */ data class RAGResult( - val answer: TextWithFootnotes? = null, + val answer: LLMAnswer? = null, + val footnotes: List? = null, val debug: Any? = null, - val noAnswerStory: StoryDefinition? = null, + val redirectStory: StoryDefinition? = null, val observabilityInfo: ObservabilityInfo? = null, ) diff --git a/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt b/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt index 9e4de4a6c1..bad7061d6c 100644 --- a/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt +++ b/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt @@ -59,7 +59,6 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() { model = "modelName1", baseUrl = "https://api.openai.com/v1", ), - noAnswerSentence = "no answer sentence", ) BotRAGConfigurationMongoDAO.save(config) @@ -90,7 +89,6 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() { baseUrl = "https://api.openai.com/v1", model = "modelName1", ), - noAnswerSentence = "no answer sentence1", ) val config2 = @@ -113,7 +111,6 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() { baseUrl = "https://api.openai.com/v1", model = "modelName1", ), - noAnswerSentence = "no answer sentence1", ) assertNotEquals(config1, config2) @@ -121,11 +118,11 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() { BotRAGConfigurationMongoDAO.save(config1) BotRAGConfigurationMongoDAO.save(config2) - BotRAGConfigurationMongoDAO.save(config1.copy(noAnswerSentence = "New no answer sentence")) + BotRAGConfigurationMongoDAO.save(config1.copy(documentsRequired = true)) val configBDD = BotRAGConfigurationMongoDAO.findByNamespaceAndBotId("namespace1", "botId1") - assertEquals(config1.copy(noAnswerSentence = "New no answer sentence"), configBDD) + assertEquals(config1.copy(documentsRequired = true), configBDD) } @Test @@ -150,7 +147,6 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() { baseUrl = "https://api.openai.com/v1", model = "modelName1", ), - noAnswerSentence = "no answer sentence", ) BotRAGConfigurationMongoDAO.save(config) diff --git a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/Models.kt b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/Models.kt index 09017b9523..95f765d00f 100644 --- a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/Models.kt +++ b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/Models.kt @@ -16,9 +16,23 @@ package ai.tock.genai.orchestratorclient.responses -data class TextWithFootnotes( - val text: String, - val footnotes: List = emptyList(), +data class ChunkSentences( + val chunk: String? = null, + val sentences: List? = emptyList(), + val usedInResponse: Boolean? = false, + val reason: String? = null, +) + +data class LLMAnswer( + val status: String?, + val answer: String?, + val confidenceScore: Double? = 0.0, + val displayAnswer: Boolean? = true, + val topic: String? = null, + val suggestedTopics: List? = null, + val understanding: String? = null, + val redirectionIntent: String? = null, + val contextUsage: List? = null, ) data class Footnote( diff --git a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/RAGResponse.kt b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/RAGResponse.kt index a4dfde4c3e..fb699fd7f3 100644 --- a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/RAGResponse.kt +++ b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/RAGResponse.kt @@ -17,7 +17,8 @@ package ai.tock.genai.orchestratorclient.responses data class RAGResponse( - val answer: TextWithFootnotes, + val answer: LLMAnswer, + val footnotes: List = emptyList(), val debug: Any? = null, val observabilityInfo: ObservabilityInfo? = null, ) diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt index 430db23ede..295da45356 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt @@ -27,5 +27,3 @@ data class OllamaLLMSetting( return this.copy(temperature = temperature) } } - -// TODO MASS : Check Compile + TU (car dernier commit) diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt index 379d3a9c4a..0c25fa1aae 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt @@ -26,7 +26,7 @@ import ai.tock.shared.property private val vectorStore = property( name = "tock_gen_ai_orchestrator_vector_store_provider", - defaultValue = VectorStoreProvider.OpenSearch.name, + defaultValue = VectorStoreProvider.PGVector.name, ) typealias DocumentIndexName = String diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py index 4ab0519edd..5682c99ee9 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py @@ -28,9 +28,15 @@ class Source(BaseModel): """A source model, used to associate document sources with the QA response""" title: str = Field(description='Source title', examples=['Tock Documentation']) - url: Optional[AnyUrl] = Field(description='Source url', examples=['https://doc.tock.ai/tock/'], default=None) - content: str = Field(description='Source content', examples=['Tock: The Open Conversation Kit']) - score: Optional[float] = Field(description='The compressor score', examples=[0.9149009585380554], default=None) + url: Optional[AnyUrl] = Field( + description='Source url', examples=['https://doc.tock.ai/tock/'], default=None + ) + content: str = Field( + description='Source content', examples=['Tock: The Open Conversation Kit'] + ) + score: Optional[float] = Field( + description='The compressor score', examples=[0.9149009585380554], default=None + ) def __eq__(self, other): """ @@ -55,14 +61,77 @@ class Footnote(Source): identifier: str = Field(description='Footnote identifier', examples=['1']) -class TextWithFootnotes(BaseModel): - """Text with its footnotes. Used for RAG response""" +class ChunkInfos(BaseModel): + """A model representing information about a chunk used in the RAG context.""" - text: str = Field( - description='Text with footnotes used to list outside sources', - examples=['This is page content [1], and this is more content [2]'], + chunk: Optional[str] = Field( + description='Unique identifier of the chunk.', + examples=['cd6d8221-ba9f-44da-86ee-0e25a3c9a5c7'], + default=None, + ) + sentences: Optional[List[str]] = Field( + description='List of verbatim sentences from the chunk that were used by the LLM.', + default=None, + ) + used_in_response: Optional[bool] = Field( + description='It indicates whether the chunk is actually used to generate the response.', + examples=[True], + default=False, + ) + reason: Optional[str] = Field( + description='Reason why the chunk was not used (e.g., irrelevant, general background).', + default=None, + ) + + +class LLMAnswer(BaseModel): + """ + A model representing the structured answer generated by the LLM + in response to a user query, based on the provided RAG context. + """ + + status: Optional[str] = Field( + description='The status of the answer generation. ' + "Possible values: 'found_in_context', 'not_found_in_context', 'small_talk', " + 'or other case-specific codes.', + default=None, + ) + answer: Optional[str] = Field( + description="The textual answer generated by the LLM, in the user's locale.", + default=None, + ) + confidence_score: Optional[float] = Field( + description="Confidence score assigned to the answer provided to the user's question", + examples=[0.93], + default=None, + ) + display_answer: Optional[bool] = Field( + description='It indicates if we should display answer or not.', + examples=[True], + default=False, + ) + topic: Optional[str] = Field( + description='The main topic assigned to the answer. Must be one of the predefined list ' + "of topics, or 'unknown' if no match is possible.", + default=None, + ) + suggested_topics: Optional[List[str]] = Field( + description='A list of suggested alternative or related topics, ' + "used when the main topic is 'unknown'.", + default=None, + ) + understanding: Optional[str] = Field( + description='LLM understanding of user query.', + default=None, + ) + redirection_intent: Optional[str] = Field( + description='The intent to use to redirect the conversation flow.', default=None + ) + context_usage: Optional[List[ChunkInfos]] = Field( + description='The list of chunks from the context that contributed to or were considered ' + "in the LLM's answer. Each entry contains identifiers, sentences, and reasons.", + default=None, ) - footnotes: set[Footnote] = Field(description='Set of footnotes') @unique @@ -142,10 +211,13 @@ class RAGDebugData(QADebugData): question_condensing_prompt: Optional[str] = Field( description='The prompt of the question rephrased with the history of the conversation.', - examples=['Given the following conversation, rephrase the follow up question to be a standalone question.'], + examples=[ + 'Given the following conversation, rephrase the follow up question to be a standalone question.' + ], ) question_condensing_history: list[ChatMessage] = Field( - description="Conversation history, used to reformulate the user's question.") + description="Conversation history, used to reformulate the user's question." + ) condensed_question: Optional[str] = Field( description='The question rephrased with the history of the conversation.', examples=['Hello, how to plan a trip to Morocco ?'], @@ -156,4 +228,4 @@ class RAGDebugData(QADebugData): 'Question: Hello, how to plan a trip to Morocco ?. Answer in French.' ], ) - answer: str = Field(description='The RAG answer.') + answer: LLMAnswer = Field(description='The RAG answer.') diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/rag_router.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/rag_router.py index 4cc8f961a8..ecc8f500eb 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/rag_router.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/rag_router.py @@ -40,21 +40,27 @@ @rag_router.post('') -async def ask_rag(http_request: Request, request: RAGRequest, debug: bool = False) -> RAGResponse: +async def ask_rag( + http_request: Request, request: RAGRequest, debug: bool = False +) -> RAGResponse: """ ## Ask a RAG System Ask question to a RAG System, and return answer by using a knowledge base (documents) """ # Check the consistency of the Vector Store Provider with the request body - validate_vector_store_rag_query(http_request, request.vector_store_setting, request.document_search_params) + validate_vector_store_rag_query( + http_request, request.vector_store_setting, request.document_search_params + ) # execute RAG return await rag(request, debug) + def validate_vector_store_rag_query( - http_request: Request, - vector_store_setting: VectorStoreSetting, - vector_store_search_params: DocumentSearchParams): + http_request: Request, + vector_store_setting: VectorStoreSetting, + vector_store_search_params: DocumentSearchParams, +): """ Check the consistency of the Vector Store Provider with the request body Args: @@ -73,11 +79,15 @@ def validate_vector_store_rag_query( vector_store_provider = vector_store_setting.provider if vector_store_provider != vector_store_search_params.provider: - logger.error('Inconsistency between vector store provider and document search parameters (%s Vs %s)', - vector_store_provider.value, vector_store_search_params.provider.value) + logger.error( + 'Inconsistency between vector store provider and document search parameters (%s Vs %s)', + vector_store_provider.value, + vector_store_search_params.provider.value, + ) raise AIProviderBadRequestException( create_error_info_bad_request( - request=http_request, + http_request=http_request, provider=vector_store_search_params.provider, - cause='Inconsistency between vector store provider and document search parameters') + cause='Inconsistency between vector store provider and document search parameters', + ) ) diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/responses/responses.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/responses/responses.py index 50d52e57ef..1b7b2ab60b 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/responses/responses.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/responses/responses.py @@ -30,7 +30,7 @@ from gen_ai_orchestrator.models.observability.observability_provider import ( ObservabilityProvider, ) -from gen_ai_orchestrator.models.rag.rag_models import Source, TextWithFootnotes +from gen_ai_orchestrator.models.rag.rag_models import Source, LLMAnswer, Footnote from gen_ai_orchestrator.models.vector_stores.vectore_store_provider import ( VectorStoreProvider, ) @@ -127,9 +127,10 @@ class ObservabilityInfo(BaseModel): class RAGResponse(BaseModel): """The RAG response model""" - answer: TextWithFootnotes = Field( - description='The RAG answer, with outside sources.' + answer: Optional[LLMAnswer] = Field( + description='The RAG answer' ) + footnotes: set[Footnote] = Field(description='Set of footnotes') debug: Optional[Any] = Field( description='Debug data', examples=[{'action': 'retrieve', 'result': 'OK', 'errors': []}], diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/callback_handlers/callback_handlers_factory.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/callback_handlers/callback_handlers_factory.py index 45bb6c5293..81e1dd40b1 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/callback_handlers/callback_handlers_factory.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/callback_handlers/callback_handlers_factory.py @@ -49,5 +49,3 @@ def check_observability_setting(self) -> bool: :raises BusinessException: For incorrect setting """ pass - - diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/callback_handlers/langfuse_callback_handler_factory.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/callback_handlers/langfuse_callback_handler_factory.py index 737f391e9d..f2c0900e22 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/callback_handlers/langfuse_callback_handler_factory.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/callback_handlers/langfuse_callback_handler_factory.py @@ -120,9 +120,7 @@ def check_observability_setting(self) -> bool: except ApiError as exc: logger.error(exc) - raise GenAIObservabilityErrorException( - create_error_info_langfuse(exc) - ) + raise GenAIObservabilityErrorException(create_error_info_langfuse(exc)) return True def _fetch_settings(self) -> dict: @@ -140,7 +138,10 @@ def _fetch_settings(self) -> dict: def _get_httpx_client(self) -> Optional[Client]: langfuse_settings = self._fetch_settings() - if ProxyServerType.AWS_LAMBDA == application_settings.observability_proxy_server: + if ( + ProxyServerType.AWS_LAMBDA + == application_settings.observability_proxy_server + ): """ This AWSLambda proxy is used when the architecture implemented for the Langfuse observability tool places it behind an API Gateway which requires its diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py index 82f8b55246..df5ac562ce 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py @@ -17,10 +17,12 @@ It uses LangChain to perform a Conversational Retrieval Chain """ +import json import logging import time from functools import partial from logging import ERROR, WARNING +from operator import itemgetter from typing import List, Optional from langchain.retrievers.contextual_compression import ( @@ -29,10 +31,13 @@ from langchain_community.chat_message_histories import ChatMessageHistory from langchain_core.callbacks import BaseCallbackHandler from langchain_core.documents import Document -from langchain_core.output_parsers import StrOutputParser +from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.output_parsers import JsonOutputParser, StrOutputParser from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.prompts import PromptTemplate as LangChainPromptTemplate from langchain_core.runnables import ( + RunnableConfig, + RunnableLambda, RunnableParallel, RunnablePassthrough, RunnableSerializable, @@ -63,10 +68,10 @@ from gen_ai_orchestrator.models.rag.rag_models import ( ChatMessageType, Footnote, + LLMAnswer, RAGDebugData, RAGDocument, RAGDocumentMetadata, - TextWithFootnotes, ) from gen_ai_orchestrator.routers.requests.requests import RAGRequest from gen_ai_orchestrator.routers.responses.responses import RAGResponse @@ -104,7 +109,7 @@ async def execute_rag_chain( Args: request: The RAG request debug: True if RAG data debug should be returned with the response. - custom_observability_handler: Custom observability handler + custom_observability_handler: Custom observability handler (Used in the tooling run_experiment.py script) Returns: The RAG response (Answer and document sources) """ @@ -112,7 +117,9 @@ async def execute_rag_chain( logger.info('RAG chain - Start of execution...') start_time = time.time() - conversational_retrieval_chain = create_rag_chain(request=request) + conversational_retrieval_chain = create_rag_chain( + request=request, vector_db_async_mode=False # TODO MASS + ) message_history = ChatMessageHistory() session_id = None @@ -173,39 +180,44 @@ async def execute_rag_chain( metadata=metadata, ), ) + llm_answer = LLMAnswer(**response['answer']) # RAG Guard - rag_guard(inputs, response, request.documents_required) + rag_guard(inputs, llm_answer, response, request.documents_required) # Guardrail if request.guardrail_setting: guardrail = get_guardrail_factory( setting=request.guardrail_setting ).get_parser() - guardrail_output = guardrail.parse(response['answer']) + guardrail_output = guardrail.parse(llm_answer.answer) check_guardrail_output(guardrail_output) # Calculation of RAG processing time rag_duration = '{:.2f}'.format(time.time() - start_time) logger.info('RAG chain - End of execution. (Duration : %s seconds)', rag_duration) + # Group contexts by chunk id + contexts_by_chunk = { + ctx.chunk: ctx + for ctx in (llm_answer.context_usage or []) + if ctx.used_in_response + } + # Returning RAG response return RAGResponse( - answer=TextWithFootnotes( - text=response['answer'], - footnotes=set( - map( - lambda doc: Footnote( - identifier=doc.metadata['id'], - title=doc.metadata['title'], - url=doc.metadata['source'], - content=get_source_content(doc), - score=doc.metadata.get('retriever_score', None), - ), - response['documents'], - ) - ), - ), + answer=llm_answer, + footnotes={ + Footnote( + identifier=doc.metadata['id'], + title=doc.metadata['title'], + url=doc.metadata['source'], + content=get_source_content(doc), + score=doc.metadata.get('retriever_score', None), + ) + for doc in response['documents'] + if doc.metadata['id'] in contexts_by_chunk + }, observability_info=get_observability_info( observability_handler, ObservabilityTrace.RAG.value if observability_handler is not None else None, @@ -284,31 +296,75 @@ def create_rag_chain( if question_condensing_llm_factory is not None: question_condensing_llm = question_condensing_llm_factory.get_language_model() question_answering_llm = question_answering_llm_factory.get_language_model() - rag_prompt = build_rag_prompt(request) - # Construct the RAG chain using the prompt and LLM, - # This chain will consume the documents retrieved by the retriever as input. - rag_chain = construct_rag_chain(question_answering_llm, rag_prompt) + # Fallback in case of missing condensing LLM setting using the answering LLM setting. + if question_condensing_llm is not None: + condensing_llm = question_condensing_llm + else: + condensing_llm = question_answering_llm # Build the chat chain for question contextualization chat_chain = build_question_condensation_chain( - question_condensing_llm - if question_condensing_llm is not None - else question_answering_llm, - request.question_condensing_prompt, + condensing_llm, request.question_condensing_prompt ) + rag_prompt = build_rag_prompt(request) # Function to contextualize the question based on chat history contextualize_question_fn = partial(contextualize_question, chat_chain=chat_chain) - # Final RAG chain with retriever and source documents - rag_chain_with_retriever = ( - contextualize_question_fn - | RunnableParallel({'documents': retriever, 'question': RunnablePassthrough()}) - | RunnablePassthrough.assign(answer=rag_chain) + # Calculate the condensed question + with_condensed_question = RunnableParallel( + { + 'condensed_question': contextualize_question_fn, + 'question': itemgetter('question'), + 'chat_history': itemgetter('chat_history'), + } + ) + + def retrieve_with_variants(inputs): + variants = [ + # inputs["question"], Deactivated. It's an example to prove the multi retriever process + inputs['condensed_question'] + ] + docs = [] + for v in variants: + docs.extend(retriever.invoke(v)) + # Deduplicate docs + unique_docs = {d.metadata['id']: d for d in docs} + + # TODO [DERCBOT-1649] Apply the RRF Algo on unique_docs. + return list(unique_docs.values()) + + # Build the RAG inputs + rag_inputs = with_condensed_question | RunnableParallel( + { + 'question': itemgetter('condensed_question'), + 'chat_history': itemgetter('chat_history'), + 'documents': RunnableLambda(retrieve_with_variants), + } ) - return rag_chain_with_retriever + return rag_inputs | RunnablePassthrough.assign( + answer=( + { + 'context': lambda x: json.dumps( + [ + { + 'chunk_id': doc.metadata['id'], + 'chunk_text': doc.page_content, + } + for doc in x['documents'] + ], + ensure_ascii=False, + indent=2, + ), + 'chat_history': format_chat_history, + } + | rag_prompt + | question_answering_llm + | JsonOutputParser(pydantic_object=LLMAnswer, name='rag_chain_output') + ) + ) def build_rag_prompt(request: RAGRequest) -> LangChainPromptTemplate: @@ -322,23 +378,14 @@ def build_rag_prompt(request: RAGRequest) -> LangChainPromptTemplate: ) -def construct_rag_chain(llm, rag_prompt): - """ - Construct the RAG chain from LLM and prompt. - """ - return ( - { - 'context': lambda inputs: '\n\n'.join( - doc.page_content for doc in inputs['documents'] - ), - 'question': lambda inputs: inputs[ - 'question' - ], # Override the user's original question with the condensed one - } - | rag_prompt - | llm - | StrOutputParser(name='rag_chain_output') - ) +def format_chat_history(x): + messages = [] + for msg in x['chat_history']: + if isinstance(msg, HumanMessage): + messages.append({'user': msg.content}) + elif isinstance(msg, AIMessage): + messages.append({'assistant': msg.content}) + return json.dumps(messages, ensure_ascii=False, indent=2) def build_question_condensation_chain( @@ -347,14 +394,27 @@ def build_question_condensation_chain( """ Build the chat chain for contextualizing questions. """ + # TODO deprecated : All Gen configurations are supposed to have this prompt now. It is mandatory in the RAG configuration. if prompt is None: # Default prompt prompt = PromptTemplate( formatter=PromptFormatter.F_STRING, inputs={}, - template='Given a chat history and the latest user question which might reference context in \ -the chat history, formulate a standalone question which can be understood without the chat history. \ -Do NOT answer the question, just reformulate it if needed and otherwise return it as is.', + template=""" +You are a helpful assistant that reformulates questions. + +You are given: +- The conversation history between the user and the assistant +- The most recent user question + +Your task: +- Reformulate the user’s latest question into a clear, standalone query. +- Incorporate relevant context from the conversation history. +- Do NOT answer the question. +- If the history does not provide additional context, keep the question as is. + +Return only the reformulated question. +""", ) return ( @@ -379,50 +439,55 @@ def contextualize_question(inputs: dict, chat_chain) -> str: return inputs['question'] -def rag_guard(inputs, response, documents_required): +def rag_guard(question, answer, response, documents_required): """ Validates the RAG system's response based on the presence or absence of source documents and the `documentsRequired` setting. Args: - inputs: question answering prompt inputs + question: user question + answer: the LLM answer response: the RAG response documents_required (bool): Specifies whether documents are mandatory for the response. """ - no_docs_retrieved = response['documents'] == [] - no_docs_but_required = no_docs_retrieved and documents_required - chain_can_give_no_answer_reply = 'no_answer' in inputs - chain_reply_no_answer = False - - if chain_can_give_no_answer_reply: - chain_reply_no_answer = response['answer'] == inputs['no_answer'] - - if no_docs_but_required: - if chain_can_give_no_answer_reply and chain_reply_no_answer: - # We expect the chain to use its non-response value, and it has done so, which is the expected behavior. - return - # Everything else isn't expected - message = 'The RAG system cannot provide an answer when no documents are found and documents are required' - rag_log(level=ERROR, message=message, inputs=inputs, response=response) + if ( + documents_required + and answer.status == 'found_in_context' + and len(response['documents']) == 0 + ): + message = 'No documents were retrieved, yet an answer was attempted.' + rag_log( + level=ERROR, + message=message, + question=question, + answer=answer.answer, + response=response, + ) raise GenAIGuardCheckException(ErrorInfo(cause=message)) - if chain_reply_no_answer and not no_docs_retrieved: - # If the chain responds with its non-response value and the documents are retrieved, - # so we remove them from the RAG response. - message = 'The RAG gives no answer for user question, but some documents has been found!' - rag_log(level=WARNING, message=message, inputs=inputs, response=response) + if answer.status == 'not_found_in_context' and len(response['documents']) > 0: + # If the answer is not found in context and some documents are retrieved, so we remove them from the RAG response. + message = 'No answer found in the retrieved context. The documents are therefore removed from the RAG response.' + rag_log( + level=WARNING, + message=message, + question=question, + answer=answer.answer, + response=response, + ) response['documents'] = [] -def rag_log(level, message, inputs, response): +def rag_log(level, message, question, answer, response): """ RAG logging Args: level: logging level message: message to log - inputs: question answering prompt inputs + question: question answering prompt inputs + answer: LLM answer response: the RAG response """ @@ -432,9 +497,9 @@ def rag_log(level, message, inputs, response): 'RAG chain - question="%(question)s", answer="%(answer)s", documents="%(documents)s"', { 'message': message, - 'question': inputs['question'], - 'answer': response['answer'], - 'documents': response['documents'], + 'question': question, + 'answer': answer, + 'documents': len(response['documents']), }, ) @@ -447,6 +512,9 @@ def get_rag_documents(handler: RAGCallbackHandler) -> List[RAGDocument]: handler: the RAG Callback Handler """ + if handler.records['documents'] is None: + return [] + return [ # Get first 100 char of content RAGDocument( @@ -457,6 +525,17 @@ def get_rag_documents(handler: RAGCallbackHandler) -> List[RAGDocument]: ] +def get_llm_answer(rag_chain_output) -> LLMAnswer: + if rag_chain_output is None: + return LLMAnswer() + + return LLMAnswer( + **json.loads( + rag_chain_output.strip().removeprefix('```json').removesuffix('```').strip() + ) + ) + + def get_rag_debug_data( request: RAGRequest, records_callback_handler: RAGCallbackHandler, rag_duration ) -> RAGDebugData: @@ -475,7 +554,7 @@ def get_rag_debug_data( documents=get_rag_documents(records_callback_handler), document_index_name=request.document_index_name, document_search_params=request.document_search_params, - answer=records_callback_handler.records['rag_chain_output'], + answer=get_llm_answer(records_callback_handler.records['rag_chain_output']), duration=rag_duration, ) diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/observability/observabilty_service.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/observability/observabilty_service.py index ceb33e0c1b..f81bcd57a9 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/observability/observabilty_service.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/observability/observabilty_service.py @@ -42,7 +42,9 @@ def check_observability_setting(setting: ObservabilitySetting) -> bool: True for a valid Observability setting. Raise exception otherwise. """ - logger.info('Get the Callback handler Factory, then check the Observability setting.') + logger.info( + 'Get the Callback handler Factory, then check the Observability setting.' + ) return get_callback_handler_factory(setting).check_observability_setting() diff --git a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py index 6aeedf86a6..000e483226 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py +++ b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py @@ -29,6 +29,7 @@ from gen_ai_orchestrator.models.guardrail.bloomz.bloomz_guardrail_setting import ( BloomzGuardrailSetting, ) +from gen_ai_orchestrator.models.rag.rag_models import LLMAnswer from gen_ai_orchestrator.routers.requests.requests import RAGRequest from gen_ai_orchestrator.services.langchain import rag_chain from gen_ai_orchestrator.services.langchain.factories.langchain_factory import ( @@ -43,19 +44,25 @@ ) -@patch('gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post') -@patch('gen_ai_orchestrator.services.langchain.factories.langchain_factory.get_compressor_factory') -@patch('gen_ai_orchestrator.services.langchain.factories.langchain_factory.get_callback_handler_factory') +@patch( + 'gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post' +) +@patch( + 'gen_ai_orchestrator.services.langchain.factories.langchain_factory.get_compressor_factory' +) +@patch( + 'gen_ai_orchestrator.services.langchain.factories.langchain_factory.get_callback_handler_factory' +) @patch('gen_ai_orchestrator.services.langchain.rag_chain.create_rag_chain') @patch('gen_ai_orchestrator.services.langchain.rag_chain.RAGCallbackHandler') @patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_guard') @patch('gen_ai_orchestrator.services.langchain.rag_chain.RAGResponse') -@patch('gen_ai_orchestrator.services.langchain.rag_chain.TextWithFootnotes') @patch('gen_ai_orchestrator.services.langchain.rag_chain.RAGDebugData') +@patch('gen_ai_orchestrator.services.langchain.rag_chain.get_llm_answer') @pytest.mark.asyncio async def test_rag_chain( + mocked_get_llm_answer, mocked_rag_debug_data, - mocked_text_with_footnotes, mocked_rag_response, mocked_rag_guard, mocked_callback_init, @@ -70,13 +77,19 @@ async def test_rag_chain( 'dialog': { 'history': [ {'text': 'Hello, how can I do this?', 'type': 'HUMAN'}, - {'text': 'you can do this with the following method ....', 'type': 'AI'} + { + 'text': 'you can do this with the following method ....', + 'type': 'AI', + }, ], - 'tags': [] + 'tags': [], }, 'question_answering_llm_setting': { 'provider': 'OpenAI', - 'api_key': {'type': 'Raw', 'secret': 'ab7***************************A1IV4B'}, + 'api_key': { + 'type': 'Raw', + 'secret': 'ab7***************************A1IV4B', + }, 'temperature': 1.2, 'model': 'gpt-3.5-turbo', }, @@ -92,15 +105,18 @@ async def test_rag_chain( {question} Answer in {locale}:""", - 'inputs' : { + 'inputs': { 'question': 'How to get started playing guitar ?', 'no_answer': 'Sorry, I don t know.', 'locale': 'French', - } + }, }, 'embedding_question_em_setting': { 'provider': 'OpenAI', - 'api_key': {'type': 'Raw', 'secret': 'ab7***************************A1IV4B'}, + 'api_key': { + 'type': 'Raw', + 'secret': 'ab7***************************A1IV4B', + }, 'model': 'text-embedding-ada-002', }, 'document_index_name': 'my-index-name', @@ -154,11 +170,23 @@ async def test_rag_chain( AIMessage(content='you can do this with the following method ....'), ], } - docs = [Document( - page_content='some page content', - metadata={'id':'123-abc', 'title':'my-title', 'source': None}, - )] - response = {'answer': 'an answer from llm', 'documents': docs} + docs = [ + Document( + page_content='some page content', + metadata={'id': '123-abc', 'title': 'my-title', 'source': None}, + ) + ] + response = { + 'answer': { + 'status': '', + 'answer': 'an answer from llm', + 'topic': None, + 'suggested_topics': None, + 'context': [], + }, + 'documents': docs, + } + llm_answer = LLMAnswer(**response['answer']) # Setup mock factories/init return value observability_factory_instance = mocked_get_callback_handler_factory.return_value @@ -191,27 +219,26 @@ async def test_rag_chain( ) # Assert the response is build using the expected settings mocked_rag_response.assert_called_once_with( - # TextWithFootnotes must be mocked or mapping the footnotes will fail - answer=mocked_text_with_footnotes( - text=mocked_rag_answer['answer'], footnotes=[] - ), + answer=llm_answer, + footnotes=set(), debug=mocked_rag_debug_data(request, mocked_rag_answer, mocked_callback, 1), - observability_info=None - ) - mocked_get_document_compressor_factory( - setting=request.compressor_setting + observability_info=None, ) + mocked_get_document_compressor_factory(setting=request.compressor_setting) # Assert the rag guardrail is called mocked_guardrail_parse.assert_called_once_with( os.path.join(request.guardrail_setting.api_base, 'guardrail'), - json={'text': [mocked_rag_answer['answer']]}, + json={'text': [mocked_rag_answer['answer']['answer']]}, ) # Assert the rag guard is called mocked_rag_guard.assert_called_once_with( - inputs, response, request.documents_required + inputs, llm_answer, response, request.documents_required ) -@patch('gen_ai_orchestrator.services.langchain.impls.guardrail.bloomz_guardrail.requests.post') + +@patch( + 'gen_ai_orchestrator.services.langchain.impls.guardrail.bloomz_guardrail.requests.post' +) def test_guardrail_parse_succeed_with_toxicities_encountered( mocked_guardrail_response, ): @@ -220,7 +247,15 @@ def test_guardrail_parse_succeed_with_toxicities_encountered( provider='BloomzGuardrail', max_score=0.5, api_base='http://test-guard.com' ) ).get_parser() - rag_response = {'answer': 'This is a sample text.'} + rag_response = { + 'answer': { + 'status': '', + 'answer': 'This is a sample text.', + 'topic': None, + 'suggested_topics': None, + 'context': [], + } + } mocked_response = MagicMock() mocked_response.status_code = 200 @@ -236,11 +271,11 @@ def test_guardrail_parse_succeed_with_toxicities_encountered( } mocked_guardrail_response.return_value = mocked_response - guardrail_output = guardrail.parse(rag_response['answer']) + guardrail_output = guardrail.parse(rag_response['answer']['answer']) mocked_guardrail_response.assert_called_once_with( os.path.join(guardrail.endpoint, 'guardrail'), - json={'text': [rag_response['answer']]}, + json={'text': [rag_response['answer']['answer']]}, ) assert guardrail_output == { 'content': 'This is a sample text.', @@ -249,14 +284,24 @@ def test_guardrail_parse_succeed_with_toxicities_encountered( } -@patch('gen_ai_orchestrator.services.langchain.impls.guardrail.bloomz_guardrail.requests.post') +@patch( + 'gen_ai_orchestrator.services.langchain.impls.guardrail.bloomz_guardrail.requests.post' +) def test_guardrail_parse_fail(mocked_guardrail_response): guardrail = get_guardrail_factory( BloomzGuardrailSetting( provider='BloomzGuardrail', max_score=0.5, api_base='http://test-guard.com' ) ).get_parser() - rag_response = {'answer': 'This is a sample text.'} + rag_response = { + 'answer': { + 'status': '', + 'answer': 'This is a sample text.', + 'topic': None, + 'suggested_topics': None, + 'context': [], + } + } mocked_response = MagicMock() mocked_response.status_code = 500 @@ -266,15 +311,17 @@ def test_guardrail_parse_fail(mocked_guardrail_response): HTTPError, match=f"Error {mocked_response.status_code}. Bloomz guardrail didn't respond as expected.", ): - guardrail.parse(rag_response['answer']) + guardrail.parse(rag_response['answer']['answer']) mocked_guardrail_response.assert_called_once_with( os.path.join(guardrail.endpoint, 'guardrail'), - json={'text': [rag_response['answer']]}, + json={'text': [rag_response['answer']['answer']]}, ) -@patch('gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post') +@patch( + 'gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post' +) def test_compress_documents_should_succeed(mocked_rerank): bloomz_reranker = BloomzRerank(label='entailement', endpoint='http://example.com') documents = [ @@ -351,7 +398,9 @@ def test_compress_documents_should_succeed(mocked_rerank): ] -@patch('gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post') +@patch( + 'gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post' +) def test_compress_documents_with_unknown_label(mocked_rerank): bloomz_reranker = BloomzRerank(label='unknown_label', endpoint='http://example.com') documents = [ @@ -414,59 +463,87 @@ def test_check_guardrail_output_is_ok(): @patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_log') def test_rag_guard_fails_if_no_docs_in_valid_answer(mocked_log): - inputs = {'no_answer': "Sorry, I don't know."} + question = 'Hi!' response = { - 'answer': 'a valid answer', + 'answer': {'status': 'found_in_context', 'answer': 'a valid answer'}, 'documents': [], } try: - rag_chain.rag_guard(inputs, response,documents_required=True) + rag_chain.rag_guard( + question, LLMAnswer(**response['answer']), response, documents_required=True + ) except Exception as e: assert isinstance(e, GenAIGuardCheckException) @patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_log') def test_rag_guard_accepts_no_answer_even_with_docs(mocked_log): - inputs = {'no_answer': "Sorry, I don't know."} + question = 'Hi!' response = { - 'answer': "Sorry, I don't know.", + 'answer': { + 'status': 'not_found_in_context', + 'answer': 'Sorry, I don t know.', + 'context': [ + { + 'chunk': 1, + 'sentences': ['str1'], + } + ], + }, 'documents': ['a doc as a string'], } - rag_chain.rag_guard(inputs, response, documents_required=True) + rag_chain.rag_guard( + question, LLMAnswer(**response['answer']), response, documents_required=True + ) + # No answer found in the retrieved context. The documents are therefore removed from the RAG response. assert response['documents'] == [] @patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_log') def test_rag_guard_valid_answer_with_docs(mocked_log): - inputs = {'no_answer': "Sorry, I don't know."} + question = 'Hi!' response = { - 'answer': 'a valid answer', + 'answer': { + 'status': 'found_in_context', + 'answer': 'a valid answer', + }, 'documents': ['doc1', 'doc2'], } - rag_chain.rag_guard(inputs, response, documents_required=True) + rag_chain.rag_guard( + question, LLMAnswer(**response['answer']), response, documents_required=True + ) assert response['documents'] == ['doc1', 'doc2'] + @patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_log') def test_rag_guard_no_answer_with_no_docs(mocked_log): - inputs = {'no_answer': "Sorry, I don't know."} + question = 'Hi!' response = { - 'answer': "Sorry, I don't know.", + 'answer': {'status': 'not_found_in_context', 'answer': 'Sorry, I don t know.'}, 'documents': [], } - rag_chain.rag_guard(inputs, response, documents_required=True) + rag_chain.rag_guard( + question, LLMAnswer(**response['answer']), response, documents_required=True + ) assert response['documents'] == [] + @patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_log') def test_rag_guard_without_no_answer_input(mocked_log): """Test that __rag_guard handles missing no_answer input correctly.""" - inputs = {} # No 'no_answer' key + question = 'Hi!' response = { - 'answer': 'some answer', + 'answer': { + 'status': 'found_in_context', + 'answer': 'a valid answer', + }, 'documents': [], } with pytest.raises(GenAIGuardCheckException) as exc: - rag_chain.rag_guard(inputs, response, documents_required=True) + rag_chain.rag_guard( + question, LLMAnswer(**response['answer']), response, documents_required=True + ) mocked_log.assert_called_once() - assert isinstance(exc.value, GenAIGuardCheckException) \ No newline at end of file + assert isinstance(exc.value, GenAIGuardCheckException) From b4facf748b42b37c7a8677ebb555dcfc2a897ef3 Mon Sep 17 00:00:00 2001 From: Mohamed ASSOUKTI Date: Tue, 3 Feb 2026 17:08:21 +0100 Subject: [PATCH 2/7] [DERCBOT-1609] w --- .../model/genai/BotRAGConfigurationDTO.kt | 3 - .../src/test/kotlin/service/RAGServiceTest.kt | 1 - .../service/RAGValidationServiceTest.kt | 1 - .../admin/bot/rag/BotRAGConfiguration.kt | 1 - .../kotlin/engine/config/RAGAnswerHandler.kt | 1 - .../kotlin/BotRAGConfigurationMongoDAOTest.kt | 4 +- .../orchestratorclient/requests/RAGRequest.kt | 1 - .../routers/requests/requests.py | 39 +++--- .../services/langchain/rag_chain.py | 104 +++++---------- .../server/tests/services/test_rag_chain.py | 119 +++--------------- 10 files changed, 71 insertions(+), 203 deletions(-) diff --git a/bot/admin/server/src/main/kotlin/model/genai/BotRAGConfigurationDTO.kt b/bot/admin/server/src/main/kotlin/model/genai/BotRAGConfigurationDTO.kt index 5afb90aba9..adef95304d 100644 --- a/bot/admin/server/src/main/kotlin/model/genai/BotRAGConfigurationDTO.kt +++ b/bot/admin/server/src/main/kotlin/model/genai/BotRAGConfigurationDTO.kt @@ -42,7 +42,6 @@ data class BotRAGConfigurationDTO( val emSetting: EMSettingDTO, val indexSessionId: String? = null, val indexName: String? = null, - val documentsRequired: Boolean = true, val debugEnabled: Boolean, val maxDocumentsRetrieved: Int, val maxMessagesFromHistory: Int, @@ -61,7 +60,6 @@ data class BotRAGConfigurationDTO( emSetting = configuration.emSetting.toDTO(), indexSessionId = configuration.indexSessionId, indexName = configuration.generateIndexName(), - documentsRequired = configuration.documentsRequired, debugEnabled = configuration.debugEnabled, maxDocumentsRetrieved = configuration.maxDocumentsRetrieved, maxMessagesFromHistory = configuration.maxMessagesFromHistory, @@ -97,7 +95,6 @@ data class BotRAGConfigurationDTO( dto = emSetting, ), indexSessionId = indexSessionId, - documentsRequired = documentsRequired, debugEnabled = debugEnabled, maxDocumentsRetrieved = maxDocumentsRetrieved, maxMessagesFromHistory = maxMessagesFromHistory, diff --git a/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt b/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt index 698f4c5381..d34d8ac5c0 100644 --- a/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt +++ b/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt @@ -97,7 +97,6 @@ class RAGServiceTest : AbstractTest() { model = "model", apiBase = "url", ), - documentsRequired = true, debugEnabled = false, maxDocumentsRetrieved = 2, maxMessagesFromHistory = 2, diff --git a/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt b/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt index edff4ee726..bfa0dd9631 100644 --- a/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt +++ b/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt @@ -90,7 +90,6 @@ class RAGValidationServiceTest { questionAnsweringLlmSetting = openAILLMSetting, questionAnsweringPrompt = PromptTemplate(template = "How to bike in the rain"), emSetting = azureOpenAIEMSetting, - documentsRequired = true, debugEnabled = false, maxDocumentsRetrieved = 2, maxMessagesFromHistory = 2, diff --git a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt index 6f2c1af38e..18b6552f48 100644 --- a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt +++ b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt @@ -35,7 +35,6 @@ data class BotRAGConfiguration( val llmSetting: LLMSetting? = null, val emSetting: EMSetting, val indexSessionId: String? = null, - val documentsRequired: Boolean = true, val debugEnabled: Boolean = false, val maxDocumentsRetrieved: Int = 4, val maxMessagesFromHistory: Int = 5, diff --git a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt index 5f8d5c0fa1..d37fa6b0f9 100644 --- a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt +++ b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt @@ -201,7 +201,6 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { compressorSetting = botDefinition.documentCompressorConfiguration?.setting, vectorStoreSetting = vectorStoreSetting, observabilitySetting = botDefinition.observabilityConfiguration?.setting, - documentsRequired = ragConfiguration.documentsRequired, ), debug = action.metadata.debugEnabled || ragConfiguration.debugEnabled, ) diff --git a/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt b/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt index bad7061d6c..bc55f21568 100644 --- a/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt +++ b/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt @@ -118,11 +118,11 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() { BotRAGConfigurationMongoDAO.save(config1) BotRAGConfigurationMongoDAO.save(config2) - BotRAGConfigurationMongoDAO.save(config1.copy(documentsRequired = true)) + BotRAGConfigurationMongoDAO.save(config1.copy(debugEnabled = true)) val configBDD = BotRAGConfigurationMongoDAO.findByNamespaceAndBotId("namespace1", "botId1") - assertEquals(config1.copy(documentsRequired = true), configBDD) + assertEquals(config1.copy(debugEnabled = true), configBDD) } @Test diff --git a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGRequest.kt b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGRequest.kt index 4ae3eb2749..43a47de2dc 100644 --- a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGRequest.kt +++ b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGRequest.kt @@ -34,5 +34,4 @@ data class RAGRequest( val compressorSetting: DocumentCompressorSetting?, val vectorStoreSetting: VectorStoreSetting?, val observabilitySetting: ObservabilitySetting?, - val documentsRequired: Boolean = true, ) diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py index 238d74da2a..5a25f40ed1 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py @@ -66,7 +66,9 @@ class ObservabilityProviderSettingStatusRequest(BaseModel): class DocumentCompressorProviderSettingStatusRequest(BaseModel): """The request for the Document Compressor Provider Setting Status""" - setting: DocumentCompressorSetting = Field(description='The Document Compressor Provider setting to be checked.') + setting: DocumentCompressorSetting = Field( + description='The Document Compressor Provider setting to be checked.' + ) class BaseRequest(BaseModel): @@ -144,21 +146,28 @@ class VectorStoreProviderSettingStatusRequest(BaseModel): default=None, ) + class DialogDetails(BaseModel): """The dialog details model""" dialog_id: Optional[str] = Field( description='The dialog/session ID, attached to the observability traces if ' - 'the observability provider support it.', - default=None, examples=['uuid-0123']) + 'the observability provider support it.', + default=None, + examples=['uuid-0123'], + ) user_id: Optional[str] = Field( description='The user ID, attached to the observability traces if the observability provider support it', - default=None, examples=['address@mail.com']) + default=None, + examples=['address@mail.com'], + ) history: list[ChatMessage] = Field( - description="Conversation history, used to reformulate the user's question.") + description="Conversation history, used to reformulate the user's question." + ) tags: list[str] = Field( description='List of tags, attached to the observability trace, if the observability provider support it.', - examples=[['my-Tag']]) + examples=[['my-Tag']], + ) class RAGRequest(BaseRequest): @@ -166,15 +175,16 @@ class RAGRequest(BaseRequest): dialog: Optional[DialogDetails] = Field(description='The user dialog details.') question_condensing_llm_setting: Optional[LLMSetting] = Field( - description="LLM setting, used to condense the user's question.", default=None) + description="LLM setting, used to condense the user's question.", default=None + ) question_condensing_prompt: Optional[PromptTemplate] = Field( description='Prompt template, used to create a prompt with inputs for jinja and fstring format', - default = None + default=None, ) question_answering_llm_setting: LLMSetting = Field( description='LLM setting, used to perform a QA Prompt.' ) - question_answering_prompt : PromptTemplate = Field( + question_answering_prompt: PromptTemplate = Field( description='Prompt template, used to create a prompt with inputs for jinja and fstring format' ) guardrail_setting: Optional[GuardrailSetting] = Field( @@ -182,8 +192,8 @@ class RAGRequest(BaseRequest): ) documents_required: Optional[bool] = Field( description='Specifies whether the presence of documents is mandatory for generating answers. ' - 'If set to True, the system will only provide answers when relevant documents are found. ' - 'If set to False, the system can respond without requiring document sources. Default is True.', + 'If set to True, the system will only provide answers when relevant documents are found. ' + 'If set to False, the system can respond without requiring document sources. Default is True.', default=True, ) @@ -191,7 +201,7 @@ class RAGRequest(BaseRequest): 'json_schema_extra': { 'examples': [ { - 'dialog' : { + 'dialog': { 'history': [ {'text': 'Hello, how can I do this?', 'type': 'HUMAN'}, { @@ -212,7 +222,7 @@ class RAGRequest(BaseRequest): 'question_answering_prompt': { 'formatter': 'f-string', 'template': """Use the following context to answer the question at the end. -If you don't know the answer, just say {no_answer}. +If you don't know the answer, just say I don't know. Context: {context} @@ -223,9 +233,8 @@ class RAGRequest(BaseRequest): Answer in {locale}:""", 'inputs': { 'question': 'How to get started playing guitar ?', - 'no_answer': 'Sorry, I don t know.', 'locale': 'French', - } + }, }, 'embedding_question_em_setting': { 'provider': 'OpenAI', diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py index df5ac562ce..e585db67f4 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py @@ -21,7 +21,6 @@ import logging import time from functools import partial -from logging import ERROR, WARNING from operator import itemgetter from typing import List, Optional @@ -32,11 +31,14 @@ from langchain_core.callbacks import BaseCallbackHandler from langchain_core.documents import Document from langchain_core.messages import AIMessage, HumanMessage -from langchain_core.output_parsers import JsonOutputParser, StrOutputParser +from langchain_core.output_parsers import ( + JsonOutputParser, + PydanticOutputParser, + StrOutputParser, +) from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.prompts import PromptTemplate as LangChainPromptTemplate from langchain_core.runnables import ( - RunnableConfig, RunnableLambda, RunnableParallel, RunnablePassthrough, @@ -44,7 +46,6 @@ ) from langchain_core.runnables.config import RunnableConfig from langchain_core.vectorstores import VectorStoreRetriever -from langfuse import get_client, propagate_attributes from typing_extensions import Any from gen_ai_orchestrator.errors.exceptions.exceptions import ( @@ -122,18 +123,21 @@ async def execute_rag_chain( ) message_history = ChatMessageHistory() - session_id = None - user_id = None - tags = [] + metadata = {} + if request.dialog: for msg in request.dialog.history: if ChatMessageType.HUMAN == msg.type: message_history.add_user_message(msg.text) else: message_history.add_ai_message(msg.text) - session_id = request.dialog.dialog_id - user_id = request.dialog.user_id - tags = request.dialog.tags or [] + + if request.dialog.user_id is not None: + metadata['langfuse_user_id'] = request.dialog.user_id + if request.dialog.dialog_id is not None: + metadata['langfuse_session_id'] = request.dialog.dialog_id + if request.dialog.tags: + metadata['langfuse_tags'] = request.dialog.tags logger.debug( 'RAG chain - Use chat history: %s', @@ -165,14 +169,6 @@ async def execute_rag_chain( ) callback_handlers.append(observability_handler) - metadata = {} - if user_id is not None: - metadata['langfuse_user_id'] = user_id - if session_id is not None: - metadata['langfuse_session_id'] = session_id - if tags: - metadata['langfuse_tags'] = tags - response = await conversational_retrieval_chain.ainvoke( input=inputs, config=RunnableConfig( @@ -180,10 +176,8 @@ async def execute_rag_chain( metadata=metadata, ), ) - llm_answer = LLMAnswer(**response['answer']) - # RAG Guard - rag_guard(inputs, llm_answer, response, request.documents_required) + llm_answer: LLMAnswer = response['answer'] # Guardrail if request.guardrail_setting: @@ -204,20 +198,22 @@ async def execute_rag_chain( if ctx.used_in_response } + footnotes = { + Footnote( + identifier=doc.metadata['id'], + title=doc.metadata['title'], + url=doc.metadata['source'], + content=get_source_content(doc), + score=doc.metadata.get('retriever_score', None), + ) + for doc in response['documents'] + if doc.metadata['id'] in contexts_by_chunk + } + # Returning RAG response return RAGResponse( answer=llm_answer, - footnotes={ - Footnote( - identifier=doc.metadata['id'], - title=doc.metadata['title'], - url=doc.metadata['source'], - content=get_source_content(doc), - score=doc.metadata.get('retriever_score', None), - ) - for doc in response['documents'] - if doc.metadata['id'] in contexts_by_chunk - }, + footnotes=footnotes, observability_info=get_observability_info( observability_handler, ObservabilityTrace.RAG.value if observability_handler is not None else None, @@ -344,6 +340,8 @@ def retrieve_with_variants(inputs): } ) + parser = PydanticOutputParser(pydantic_object=LLMAnswer, name='rag_chain_output') + return rag_inputs | RunnablePassthrough.assign( answer=( { @@ -362,7 +360,7 @@ def retrieve_with_variants(inputs): } | rag_prompt | question_answering_llm - | JsonOutputParser(pydantic_object=LLMAnswer, name='rag_chain_output') + | parser ) ) @@ -439,46 +437,6 @@ def contextualize_question(inputs: dict, chat_chain) -> str: return inputs['question'] -def rag_guard(question, answer, response, documents_required): - """ - Validates the RAG system's response based on the presence or absence of source documents - and the `documentsRequired` setting. - - Args: - question: user question - answer: the LLM answer - response: the RAG response - documents_required (bool): Specifies whether documents are mandatory for the response. - """ - - if ( - documents_required - and answer.status == 'found_in_context' - and len(response['documents']) == 0 - ): - message = 'No documents were retrieved, yet an answer was attempted.' - rag_log( - level=ERROR, - message=message, - question=question, - answer=answer.answer, - response=response, - ) - raise GenAIGuardCheckException(ErrorInfo(cause=message)) - - if answer.status == 'not_found_in_context' and len(response['documents']) > 0: - # If the answer is not found in context and some documents are retrieved, so we remove them from the RAG response. - message = 'No answer found in the retrieved context. The documents are therefore removed from the RAG response.' - rag_log( - level=WARNING, - message=message, - question=question, - answer=answer.answer, - response=response, - ) - response['documents'] = [] - - def rag_log(level, message, question, answer, response): """ RAG logging diff --git a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py index 000e483226..ef16f03eb9 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py +++ b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py @@ -55,7 +55,6 @@ ) @patch('gen_ai_orchestrator.services.langchain.rag_chain.create_rag_chain') @patch('gen_ai_orchestrator.services.langchain.rag_chain.RAGCallbackHandler') -@patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_guard') @patch('gen_ai_orchestrator.services.langchain.rag_chain.RAGResponse') @patch('gen_ai_orchestrator.services.langchain.rag_chain.RAGDebugData') @patch('gen_ai_orchestrator.services.langchain.rag_chain.get_llm_answer') @@ -64,7 +63,6 @@ async def test_rag_chain( mocked_get_llm_answer, mocked_rag_debug_data, mocked_rag_response, - mocked_rag_guard, mocked_callback_init, mocked_create_rag_chain, mocked_get_callback_handler_factory, @@ -96,7 +94,7 @@ async def test_rag_chain( 'question_answering_prompt': { 'formatter': 'f-string', 'template': """Use the following context to answer the question at the end. -If you don't know the answer, just say {no_answer}. +If you don't know the answer, just say I don't know. Context: {context} @@ -107,7 +105,6 @@ async def test_rag_chain( Answer in {locale}:""", 'inputs': { 'question': 'How to get started playing guitar ?', - 'no_answer': 'Sorry, I don t know.', 'locale': 'French', }, }, @@ -177,16 +174,18 @@ async def test_rag_chain( ) ] response = { - 'answer': { - 'status': '', - 'answer': 'an answer from llm', - 'topic': None, - 'suggested_topics': None, - 'context': [], - }, + 'answer': LLMAnswer( + **{ + 'status': '', + 'answer': 'an answer from llm', + 'topic': None, + 'suggested_topics': None, + 'context': [], + } + ), 'documents': docs, } - llm_answer = LLMAnswer(**response['answer']) + llm_answer = response['answer'] # Setup mock factories/init return value observability_factory_instance = mocked_get_callback_handler_factory.return_value @@ -228,11 +227,9 @@ async def test_rag_chain( # Assert the rag guardrail is called mocked_guardrail_parse.assert_called_once_with( os.path.join(request.guardrail_setting.api_base, 'guardrail'), - json={'text': [mocked_rag_answer['answer']['answer']]}, - ) - # Assert the rag guard is called - mocked_rag_guard.assert_called_once_with( - inputs, llm_answer, response, request.documents_required + json={ + 'text': [mocked_rag_answer['answer'].answer] + }, # TODO MASS : answer.answer pas top, à renomer ) @@ -459,91 +456,3 @@ def test_check_guardrail_output_is_ok(): } assert check_guardrail_output(guardrail_output) is True - - -@patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_log') -def test_rag_guard_fails_if_no_docs_in_valid_answer(mocked_log): - question = 'Hi!' - response = { - 'answer': {'status': 'found_in_context', 'answer': 'a valid answer'}, - 'documents': [], - } - try: - rag_chain.rag_guard( - question, LLMAnswer(**response['answer']), response, documents_required=True - ) - except Exception as e: - assert isinstance(e, GenAIGuardCheckException) - - -@patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_log') -def test_rag_guard_accepts_no_answer_even_with_docs(mocked_log): - question = 'Hi!' - response = { - 'answer': { - 'status': 'not_found_in_context', - 'answer': 'Sorry, I don t know.', - 'context': [ - { - 'chunk': 1, - 'sentences': ['str1'], - } - ], - }, - 'documents': ['a doc as a string'], - } - rag_chain.rag_guard( - question, LLMAnswer(**response['answer']), response, documents_required=True - ) - # No answer found in the retrieved context. The documents are therefore removed from the RAG response. - assert response['documents'] == [] - - -@patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_log') -def test_rag_guard_valid_answer_with_docs(mocked_log): - question = 'Hi!' - response = { - 'answer': { - 'status': 'found_in_context', - 'answer': 'a valid answer', - }, - 'documents': ['doc1', 'doc2'], - } - rag_chain.rag_guard( - question, LLMAnswer(**response['answer']), response, documents_required=True - ) - assert response['documents'] == ['doc1', 'doc2'] - - -@patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_log') -def test_rag_guard_no_answer_with_no_docs(mocked_log): - question = 'Hi!' - response = { - 'answer': {'status': 'not_found_in_context', 'answer': 'Sorry, I don t know.'}, - 'documents': [], - } - rag_chain.rag_guard( - question, LLMAnswer(**response['answer']), response, documents_required=True - ) - assert response['documents'] == [] - - -@patch('gen_ai_orchestrator.services.langchain.rag_chain.rag_log') -def test_rag_guard_without_no_answer_input(mocked_log): - """Test that __rag_guard handles missing no_answer input correctly.""" - question = 'Hi!' - response = { - 'answer': { - 'status': 'found_in_context', - 'answer': 'a valid answer', - }, - 'documents': [], - } - with pytest.raises(GenAIGuardCheckException) as exc: - rag_chain.rag_guard( - question, LLMAnswer(**response['answer']), response, documents_required=True - ) - - mocked_log.assert_called_once() - - assert isinstance(exc.value, GenAIGuardCheckException) From df02044f14208f9ac6ccea6fb44c2f7b55c0a31a Mon Sep 17 00:00:00 2001 From: Mohamed ASSOUKTI Date: Tue, 19 Aug 2025 10:56:44 +0200 Subject: [PATCH 3/7] [DERCBOT-1609] Structuring the LLM response --- .../admin/bot/rag/BotRAGConfiguration.kt | 4 + .../kotlin/engine/config/RAGAnswerHandler.kt | 84 ++-- .../orchestratorclient/responses/Models.kt | 16 +- .../models/rag/rag_models.py | 130 ++--- .../routers/responses/responses.py | 87 ++-- .../services/langchain/rag_chain.py | 297 ++++++------ .../server/tests/services/test_rag_chain.py | 459 ++++++++++-------- 7 files changed, 534 insertions(+), 543 deletions(-) diff --git a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt index 18b6552f48..e769149c7d 100644 --- a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt +++ b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt @@ -35,6 +35,10 @@ data class BotRAGConfiguration( val llmSetting: LLMSetting? = null, val emSetting: EMSetting, val indexSessionId: String? = null, + @Deprecated("Replaced by LLM answer status") + val noAnswerSentence: String, + val noAnswerStoryId: String? = null, + val documentsRequired: Boolean = true, val debugEnabled: Boolean = false, val maxDocumentsRetrieved: Int = 4, val maxMessagesFromHistory: Int = 5, diff --git a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt index d37fa6b0f9..e2a3dee712 100644 --- a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt +++ b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt @@ -28,10 +28,7 @@ import ai.tock.bot.engine.action.SendSentence import ai.tock.bot.engine.action.SendSentenceWithFootnotes import ai.tock.bot.engine.dialog.Dialog import ai.tock.bot.engine.user.PlayerType -import ai.tock.genai.orchestratorclient.requests.ChatMessage -import ai.tock.genai.orchestratorclient.requests.ChatMessageType -import ai.tock.genai.orchestratorclient.requests.DialogDetails -import ai.tock.genai.orchestratorclient.requests.RAGRequest +import ai.tock.genai.orchestratorclient.requests.* import ai.tock.genai.orchestratorclient.responses.LLMAnswer import ai.tock.genai.orchestratorclient.responses.ObservabilityInfo import ai.tock.genai.orchestratorclient.responses.RAGResponse @@ -62,7 +59,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { BotRepository.saveMetric(createMetric(MetricType.STORY_HANDLED)) // Call RAG Api - Gen AI Orchestrator - val (answer, footnotes, debug, redirectStory, observabilityInfo) = rag(this) + val (answer, footnotes, debug, noAnswerStory, observabilityInfo) = rag(this) // Add debug data if available and if debugging is enabled if (debug != null) { @@ -72,15 +69,23 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { val modifiedObservabilityInfo = observabilityInfo?.let { updateObservabilityInfo(this, it) } - // Footnotes building - val preparedFootnotes = - footnotes?.map { - Footnote( - it.identifier, - it.title, - it.url, - if (action.metadata.sourceWithContent) it.content else null, - it.score, + val modifiedObservabilityInfo = observabilityInfo?.let { updateObservabilityInfo(this, it) } + + send( + action = SendSentenceWithFootnotes( + playerId = botId, + applicationId = connectorId, + recipientId = userId, + text = answer.answer, + footnotes = footnotes?.map { + Footnote( + it.identifier, it.title, it.url, + if(action.metadata.sourceWithContent) it.content else null, + it.score + ) + }?.toMutableList() ?: mutableListOf(), + // modifiedObservabilityInfo includes the public langfuse URL if filled. + metadata = ActionMetadata(isGenAiRagAnswer = true, observabilityInfo = modifiedObservabilityInfo) ) }?.toMutableList() ?: mutableListOf() @@ -130,12 +135,24 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { * @param botBus the bot Bus * @param response the RAG response */ - private fun ragStoryRedirection( - botBus: BotBus, - response: RAGResponse?, - ): StoryDefinition? { - return response?.answer?.redirectionIntent?.let { - botBus.botDefinition.findStoryDefinition(it, "") + private fun ragStoryRedirection(botBus: BotBus, response: RAGResponse?): StoryDefinition? { + return with(botBus) { + botDefinition.ragConfiguration?.let { ragConfig -> + if (response?.answer?.status.equals("not_found_in_context", ignoreCase = true)) { + // Save no answer metric + saveRagMetric(IndicatorValues.NO_ANSWER) + + // Switch to no answer story if configured + if (!ragConfig.noAnswerStoryId.isNullOrBlank()) { + logger.info { "Switch to the no-answer RAG story." } + getNoAnswerRAGStory(ragConfig) + } else null + } else { + // Save success metric + saveRagMetric(IndicatorValues.SUCCESS) + null + } + } } } @@ -214,26 +231,23 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { } // Handle RAG response - return RAGResult( - response?.answer, - response?.footnotes, - response?.debug, - ragStoryRedirection(this, response), - response?.observabilityInfo, - ) + return RAGResult(response?.answer, response?.footnotes, response?.debug, ragStoryRedirection(this, response), response?.observabilityInfo) } catch (exc: Exception) { logger.error { exc } // Save failure metric saveRagMetric(IndicatorValues.FAILURE) - return RAGResult( - answer = LLMAnswer(status = "error", answer = technicalErrorMessage), - debug = - when (exc) { - is GenAIOrchestratorBusinessError -> RAGError(exc.message, exc.error) - is GenAIOrchestratorValidationError -> RAGError(exc.message, exc.detail) - else -> RAGError(errorMessage = exc.message) - }, + return if (exc is GenAIOrchestratorBusinessError && exc.error.info.error == "APITimeoutError") { + logger.info { "The APITimeoutError is raised, so switch to the no-answer story." } + RAGResult(noAnswerStory = getNoAnswerRAGStory(ragConfiguration)) + } + else RAGResult( + answer = LLMAnswer(status="error", answer = technicalErrorMessage), + debug = when(exc) { + is GenAIOrchestratorBusinessError -> RAGError(exc.message, exc.error) + is GenAIOrchestratorValidationError -> RAGError(exc.message, exc.detail) + else -> RAGError(errorMessage = exc.message) + } ) } } diff --git a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/Models.kt b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/Models.kt index 95f765d00f..4cd5ceaaf8 100644 --- a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/Models.kt +++ b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/responses/Models.kt @@ -23,16 +23,18 @@ data class ChunkSentences( val reason: String? = null, ) +data class ChunkSentences( + val chunk: String? = null, + val sentences: List? = emptyList(), + val reason: String? = null, +) + data class LLMAnswer( - val status: String?, - val answer: String?, - val confidenceScore: Double? = 0.0, - val displayAnswer: Boolean? = true, + val status: String, + val answer: String, val topic: String? = null, val suggestedTopics: List? = null, - val understanding: String? = null, - val redirectionIntent: String? = null, - val contextUsage: List? = null, + val context: List? = null, ) data class Footnote( diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py index 5682c99ee9..1e2ab63a1b 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py @@ -27,16 +27,10 @@ class Source(BaseModel): """A source model, used to associate document sources with the QA response""" - title: str = Field(description='Source title', examples=['Tock Documentation']) - url: Optional[AnyUrl] = Field( - description='Source url', examples=['https://doc.tock.ai/tock/'], default=None - ) - content: str = Field( - description='Source content', examples=['Tock: The Open Conversation Kit'] - ) - score: Optional[float] = Field( - description='The compressor score', examples=[0.9149009585380554], default=None - ) + title: str = Field(description="Source title", examples=["Tock Documentation"]) + url: Optional[AnyUrl] = Field(description="Source url", examples=["https://doc.tock.ai/tock/"], default=None) + content: str = Field(description="Source content", examples=["Tock: The Open Conversation Kit"]) + score: Optional[float] = Field(description="The compressor score", examples=[0.9149009585380554], default=None) def __eq__(self, other): """ @@ -52,35 +46,26 @@ def __eq__(self, other): ) def __hash__(self): - return hash((self.title, str(self.url or ''), self.content)) + return hash((self.title, str(self.url or ""), self.content)) class Footnote(Source): """A footnote model, used to associate document sources with the RAG answer""" - identifier: str = Field(description='Footnote identifier', examples=['1']) + identifier: str = Field(description="Footnote identifier", examples=["1"]) class ChunkInfos(BaseModel): """A model representing information about a chunk used in the RAG context.""" chunk: Optional[str] = Field( - description='Unique identifier of the chunk.', - examples=['cd6d8221-ba9f-44da-86ee-0e25a3c9a5c7'], - default=None, + description="Unique identifier of the chunk.", examples=["cd6d8221-ba9f-44da-86ee-0e25a3c9a5c7"], default=None ) sentences: Optional[List[str]] = Field( - description='List of verbatim sentences from the chunk that were used by the LLM.', - default=None, - ) - used_in_response: Optional[bool] = Field( - description='It indicates whether the chunk is actually used to generate the response.', - examples=[True], - default=False, + description="List of verbatim sentences from the chunk that were used by the LLM.", default=None ) reason: Optional[str] = Field( - description='Reason why the chunk was not used (e.g., irrelevant, general background).', - default=None, + description="Reason why the chunk was not used (e.g., irrelevant, general background).", default=None ) @@ -91,44 +76,25 @@ class LLMAnswer(BaseModel): """ status: Optional[str] = Field( - description='The status of the answer generation. ' + description="The status of the answer generation. " "Possible values: 'found_in_context', 'not_found_in_context', 'small_talk', " - 'or other case-specific codes.', + "or other case-specific codes.", default=None, ) answer: Optional[str] = Field( - description="The textual answer generated by the LLM, in the user's locale.", - default=None, - ) - confidence_score: Optional[float] = Field( - description="Confidence score assigned to the answer provided to the user's question", - examples=[0.93], - default=None, - ) - display_answer: Optional[bool] = Field( - description='It indicates if we should display answer or not.', - examples=[True], - default=False, + description="The textual answer generated by the LLM, in the user's locale.", default=None ) topic: Optional[str] = Field( - description='The main topic assigned to the answer. Must be one of the predefined list ' + description="The main topic assigned to the answer. Must be one of the predefined list " "of topics, or 'unknown' if no match is possible.", default=None, ) suggested_topics: Optional[List[str]] = Field( - description='A list of suggested alternative or related topics, ' - "used when the main topic is 'unknown'.", + description="A list of suggested alternative or related topics, " "used when the main topic is 'unknown'.", default=None, ) - understanding: Optional[str] = Field( - description='LLM understanding of user query.', - default=None, - ) - redirection_intent: Optional[str] = Field( - description='The intent to use to redirect the conversation flow.', default=None - ) - context_usage: Optional[List[ChunkInfos]] = Field( - description='The list of chunks from the context that contributed to or were considered ' + context: Optional[List[ChunkInfos]] = Field( + description="The list of chunks from the context that contributed to or were considered " "in the LLM's answer. Each entry contains identifiers, sentences, and reasons.", default=None, ) @@ -138,35 +104,31 @@ class LLMAnswer(BaseModel): class ChatMessageType(str, Enum): """Enumeration to list a chat message type""" - HUMAN = 'HUMAN' - AI = 'AI' + HUMAN = "HUMAN" + AI = "AI" class ChatMessage(BaseModel): """A conversation chat message""" - text: str = Field( - description='Conversation message text', examples=['Hello, how can I do this?'] - ) - type: ChatMessageType = Field(description='The message origin (Human or AI)') + text: str = Field(description="Conversation message text", examples=["Hello, how can I do this?"]) + type: ChatMessageType = Field(description="The message origin (Human or AI)") class RAGDocumentMetadata(BaseModel): """The RAG document metadata""" - index_session_id: str = Field( - description='The indexing session id.', examples=['123f-ed01-gt21-gg08'] - ) - id: str = Field(description='The document id.', examples=['e014-g24-0f11-1g3e']) - title: str = Field(description='The document title.', examples=['Tracking shot']) + index_session_id: str = Field(description="The indexing session id.", examples=["123f-ed01-gt21-gg08"]) + id: str = Field(description="The document id.", examples=["e014-g24-0f11-1g3e"]) + title: str = Field(description="The document title.", examples=["Tracking shot"]) url: Optional[HttpUrl] = Field( - description='The document url.', - examples=['https://en.wikipedia.org/wiki/Tracking_shot'], + description="The document url.", + examples=["https://en.wikipedia.org/wiki/Tracking_shot"], default=None, ) - chunk: str = Field(description='The document chunk.', examples=['1/3']) + chunk: str = Field(description="The document chunk.", examples=["1/3"]) retriever_score: Optional[float] = Field( - description='The compressor score', examples=[0.9149009585380554], default=None + description="The compressor score", examples=[0.9149009585380554], default=None ) @@ -174,14 +136,14 @@ class RAGDocument(BaseModel): """The definition of RAG document""" content: str = Field( - description='The document content.', + description="The document content.", examples=[ - 'In cinematography, a tracking shot is any shot where the camera follows backward, ' - 'forward or moves alongside the subject being recorded.' + "In cinematography, a tracking shot is any shot where the camera follows backward, " + "forward or moves alongside the subject being recorded." ], ) metadata: RAGDocumentMetadata = Field( - description='The document metadata.', + description="The document metadata.", ) @@ -192,40 +154,32 @@ class QADebugData(BaseModel): description="The user's initial question.", examples=["I'm interested in going to Morocco"], ) - documents: List[RAGDocument] = Field( - description='Documents retrieved from the vector store.' - ) + documents: List[RAGDocument] = Field(description="Documents retrieved from the vector store.") document_index_name: str = Field( - description='Index name corresponding to a document collection in the vector database.', + description="Index name corresponding to a document collection in the vector database.", ) document_search_params: DocumentSearchParams = Field( - description='The document search parameters. Ex: number of documents, metadata filter', - ) - duration: float = Field( - description='The duration of RAG in seconds.', examples=['7.2'] + description="The document search parameters. Ex: number of documents, metadata filter", ) + duration: float = Field(description="The duration of RAG in seconds.", examples=["7.2"]) class RAGDebugData(QADebugData): """A RAG debug data""" question_condensing_prompt: Optional[str] = Field( - description='The prompt of the question rephrased with the history of the conversation.', - examples=[ - 'Given the following conversation, rephrase the follow up question to be a standalone question.' - ], + description="The prompt of the question rephrased with the history of the conversation.", + examples=["Given the following conversation, rephrase the follow up question to be a standalone question."], ) question_condensing_history: list[ChatMessage] = Field( description="Conversation history, used to reformulate the user's question." ) condensed_question: Optional[str] = Field( - description='The question rephrased with the history of the conversation.', - examples=['Hello, how to plan a trip to Morocco ?'], + description="The question rephrased with the history of the conversation.", + examples=["Hello, how to plan a trip to Morocco ?"], ) question_answering_prompt: Optional[str] = Field( - description='The question answering prompt.', - examples=[ - 'Question: Hello, how to plan a trip to Morocco ?. Answer in French.' - ], + description="The question answering prompt.", + examples=["Question: Hello, how to plan a trip to Morocco ?. Answer in French."], ) - answer: LLMAnswer = Field(description='The RAG answer.') + answer: LLMAnswer = Field(description="The RAG answer.") diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/responses/responses.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/responses/responses.py index 1b7b2ab60b..e638100a8a 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/responses/responses.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/responses/responses.py @@ -27,62 +27,52 @@ ErrorInfo, ) from gen_ai_orchestrator.models.llm.llm_provider import LLMProvider -from gen_ai_orchestrator.models.observability.observability_provider import ( - ObservabilityProvider, -) from gen_ai_orchestrator.models.rag.rag_models import Source, LLMAnswer, Footnote -from gen_ai_orchestrator.models.vector_stores.vectore_store_provider import ( - VectorStoreProvider, -) +from gen_ai_orchestrator.models.observability.observability_provider import ObservabilityProvider +from gen_ai_orchestrator.models.vector_stores.vectore_store_provider import VectorStoreProvider class ErrorResponse(BaseModel): """The error response model""" code: ErrorCode = Field( - description='The AI orchestrator error code.', + description="The AI orchestrator error code.", examples=[ErrorCode.GEN_AI_AUTHENTICATION_ERROR], ) message: str = Field( - description='The AI orchestrator error message.', - examples=['Authentication error to the AI Provider API.'], + description="The AI orchestrator error message.", + examples=["Authentication error to the AI Provider API."], ) detail: Optional[str] = Field( - description='The AI orchestrator error detail. It provides help or a solution.', - examples=[ - 'Check your API key or token and make sure it is correct and active.' - ], + description="The AI orchestrator error detail. It provides help or a solution.", + examples=["Check your API key or token and make sure it is correct and active."], default=None, ) - info: ErrorInfo = Field( - description='The AI orchestrator error info. It exposes the raised error cause.' - ) + info: ErrorInfo = Field(description="The AI orchestrator error info. It exposes the raised error cause.") class ProviderSettingStatusResponse(BaseModel): """The response model of the provider setting status""" valid: bool = Field( - description='It indicates the setting validity.', + description="It indicates the setting validity.", examples=[True], default=False, ) - errors: list[ErrorResponse] = Field(description='The list of errors.', default=[]) + errors: list[ErrorResponse] = Field(description="The list of errors.", default=[]) class LLMProviderResponse(BaseModel): """The response model of the LLM provider""" - provider: LLMProvider = Field( - description='The LLM Provider ID', default=[LLMProvider.OPEN_AI] - ) + provider: LLMProvider = Field(description="The LLM Provider ID", default=[LLMProvider.OPEN_AI]) class VectorStoreProviderResponse(BaseModel): """The response model of the Vector Store provider""" provider: VectorStoreProvider = Field( - description='The Vector Store Provider ID', default=[VectorStoreProvider.OPEN_SEARCH] + description="The Vector Store Provider ID", default=[VectorStoreProvider.OPEN_SEARCH] ) @@ -90,7 +80,7 @@ class ObservabilityProviderResponse(BaseModel): """The response model of the Observability provider""" provider: ObservabilityProvider = Field( - description='The Observability Provider ID', default=[ObservabilityProvider.LANGFUSE] + description="The Observability Provider ID", default=[ObservabilityProvider.LANGFUSE] ) @@ -98,7 +88,7 @@ class DocumentCompressorProviderResponse(BaseModel): """The response model of the Document Compressor provider""" provider: DocumentCompressorProvider = Field( - description='The Document Compressor Provider ID', default=[DocumentCompressorProvider.BLOOMZ] + description="The Document Compressor Provider ID", default=[DocumentCompressorProvider.BLOOMZ] ) @@ -106,7 +96,7 @@ class EMProviderResponse(BaseModel): """The response model of the EM provider""" provider: EMProvider = Field( - description='The Embedding Model Provider ID', + description="The Embedding Model Provider ID", default=[EMProvider.AZURE_OPEN_AI_SERVICE], ) @@ -114,55 +104,38 @@ class EMProviderResponse(BaseModel): class ObservabilityInfo(BaseModel): """The Observability Info model""" - trace_id: str = Field( - description='The observability trace id.' - ) - trace_name: str = Field( - description='The observability trace name.' - ) - trace_url: str = Field( - description='The observability trace url.' - ) + trace_id: str = Field(description="The observability trace id.") + trace_name: str = Field(description="The observability trace name.") + trace_url: str = Field(description="The observability trace url.") + class RAGResponse(BaseModel): """The RAG response model""" - answer: Optional[LLMAnswer] = Field( - description='The RAG answer' - ) - footnotes: set[Footnote] = Field(description='Set of footnotes') + answer: Optional[LLMAnswer] = Field(description="The RAG answer") + footnotes: set[Footnote] = Field(description="Set of footnotes") debug: Optional[Any] = Field( - description='Debug data', - examples=[{'action': 'retrieve', 'result': 'OK', 'errors': []}], + description="Debug data", + examples=[{"action": "retrieve", "result": "OK", "errors": []}], default=None, ) - observability_info: Optional[ObservabilityInfo] = Field( - description='The observability info.', - default=None - ) + observability_info: Optional[ObservabilityInfo] = Field(description="The observability info.", default=None) + class QAResponse(BaseModel): """The QA response model""" - documents: set[Source] = Field( - description='The sources corresponding to the QA request.' - ) + documents: set[Source] = Field(description="The sources corresponding to the QA request.") class SentenceGenerationResponse(BaseModel): """The sentence generation response model""" - sentences: list[str] = Field( - description='The list of generated sentences.', default=[] - ) + sentences: list[str] = Field(description="The list of generated sentences.", default=[]) + class PlaygroundResponse(BaseModel): """The playground response model""" - answer: str = Field( - description='The playground answer.' - ) - observability_info: Optional[ObservabilityInfo] = Field( - description='The observability info.', - default=None - ) + answer: str = Field(description="The playground answer.") + observability_info: Optional[ObservabilityInfo] = Field(description="The observability info.", default=None) diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py index e585db67f4..e4531327f1 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py @@ -21,8 +21,9 @@ import logging import time from functools import partial +from logging import ERROR, WARNING from operator import itemgetter -from typing import List, Optional +from typing import List, Optional, Tuple from langchain.retrievers.contextual_compression import ( ContextualCompressionRetriever, @@ -30,12 +31,8 @@ from langchain_community.chat_message_histories import ChatMessageHistory from langchain_core.callbacks import BaseCallbackHandler from langchain_core.documents import Document -from langchain_core.messages import AIMessage, HumanMessage -from langchain_core.output_parsers import ( - JsonOutputParser, - PydanticOutputParser, - StrOutputParser, -) +from langchain_core.messages import HumanMessage, AIMessage +from langchain_core.output_parsers import StrOutputParser, JsonOutputParser from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.prompts import PromptTemplate as LangChainPromptTemplate from langchain_core.runnables import ( @@ -43,6 +40,8 @@ RunnableParallel, RunnablePassthrough, RunnableSerializable, + RunnableConfig, + RunnableLambda, ) from langchain_core.runnables.config import RunnableConfig from langchain_core.vectorstores import VectorStoreRetriever @@ -73,9 +72,12 @@ RAGDebugData, RAGDocument, RAGDocumentMetadata, + LLMAnswer, ) from gen_ai_orchestrator.routers.requests.requests import RAGRequest -from gen_ai_orchestrator.routers.responses.responses import RAGResponse +from gen_ai_orchestrator.routers.responses.responses import ( + RAGResponse, +) from gen_ai_orchestrator.services.langchain.callbacks.rag_callback_handler import ( RAGCallbackHandler, ) @@ -98,7 +100,7 @@ @opensearch_exception_handler -@openai_exception_handler(provider='OpenAI or AzureOpenAIService') +@openai_exception_handler(provider="OpenAI or AzureOpenAIService") async def execute_rag_chain( request: RAGRequest, debug: bool, @@ -115,16 +117,12 @@ async def execute_rag_chain( The RAG response (Answer and document sources) """ - logger.info('RAG chain - Start of execution...') + logger.info("RAG chain - Start of execution...") start_time = time.time() - conversational_retrieval_chain = create_rag_chain( - request=request, vector_db_async_mode=False # TODO MASS - ) + conversational_retrieval_chain = create_rag_chain(request=request, vector_db_async_mode=False) # TODO MASS message_history = ChatMessageHistory() - metadata = {} - if request.dialog: for msg in request.dialog.history: if ChatMessageType.HUMAN == msg.type: @@ -132,95 +130,83 @@ async def execute_rag_chain( else: message_history.add_ai_message(msg.text) - if request.dialog.user_id is not None: - metadata['langfuse_user_id'] = request.dialog.user_id - if request.dialog.dialog_id is not None: - metadata['langfuse_session_id'] = request.dialog.dialog_id - if request.dialog.tags: - metadata['langfuse_tags'] = request.dialog.tags + logger.debug("RAG chain - Use chat history: %s", len(message_history.messages) > 0) + logger.debug("RAG chain - Use RAGCallbackHandler for debugging : %s", debug) - logger.debug( - 'RAG chain - Use chat history: %s', - 'Yes' if len(message_history.messages) > 0 else 'No', - ) + records_handler, observability_handler = get_callback_handlers(request, debug) + + callbacks = [ + handler + for handler in (records_handler, observability_handler, custom_observability_handler) + if handler is not None + ] inputs = { **request.question_answering_prompt.inputs, - 'chat_history': message_history.messages, + "chat_history": message_history.messages, } - logger.debug( - 'RAG chain - Use RAGCallbackHandler for debugging : %s', - debug, - ) + response = await conversational_retrieval_chain.ainvoke(input=inputs, config=RunnableConfig(callbacks=callbacks)) + llm_answer = LLMAnswer(**response["answer"]) - callback_handlers = [] - records_callback_handler = RAGCallbackHandler() - observability_handler = None - if debug: - # Debug callback handler - callback_handlers.append(records_callback_handler) - if custom_observability_handler is not None: - callback_handlers.append(custom_observability_handler) - if request.observability_setting is not None: - # Langfuse callback handler - observability_handler = create_observability_callback_handler( - observability_setting=request.observability_setting, - ) - callback_handlers.append(observability_handler) - - response = await conversational_retrieval_chain.ainvoke( - input=inputs, - config=RunnableConfig( - callbacks=callback_handlers, - metadata=metadata, - ), - ) - - llm_answer: LLMAnswer = response['answer'] + # RAG Guard + rag_guard(inputs, llm_answer, response, request.documents_required) # Guardrail if request.guardrail_setting: - guardrail = get_guardrail_factory( - setting=request.guardrail_setting - ).get_parser() + guardrail = get_guardrail_factory(setting=request.guardrail_setting).get_parser() guardrail_output = guardrail.parse(llm_answer.answer) check_guardrail_output(guardrail_output) # Calculation of RAG processing time - rag_duration = '{:.2f}'.format(time.time() - start_time) - logger.info('RAG chain - End of execution. (Duration : %s seconds)', rag_duration) + rag_duration = "{:.2f}".format(time.time() - start_time) + logger.info("RAG chain - End of execution. (Duration : %s seconds)", rag_duration) # Group contexts by chunk id - contexts_by_chunk = { - ctx.chunk: ctx - for ctx in (llm_answer.context_usage or []) - if ctx.used_in_response - } - - footnotes = { - Footnote( - identifier=doc.metadata['id'], - title=doc.metadata['title'], - url=doc.metadata['source'], - content=get_source_content(doc), - score=doc.metadata.get('retriever_score', None), - ) - for doc in response['documents'] - if doc.metadata['id'] in contexts_by_chunk - } + contexts_by_chunk = {ctx.chunk: ctx for ctx in (llm_answer.context or []) if ctx.sentences} # Returning RAG response return RAGResponse( answer=llm_answer, - footnotes=footnotes, - observability_info=get_observability_info( - observability_handler, - ObservabilityTrace.RAG.value if observability_handler is not None else None, - ), - debug=get_rag_debug_data(request, records_callback_handler, rag_duration) - if debug - else None, + footnotes={ + Footnote( + identifier=doc.metadata["id"], + title=doc.metadata["title"], + url=doc.metadata["source"], + content=get_source_content(doc), + score=doc.metadata.get("retriever_score", None), + ) + for doc in response["documents"] + if doc.metadata["id"] in contexts_by_chunk + }, + observability_info=get_observability_info(observability_handler), + debug=get_rag_debug_data(request, records_handler, rag_duration) if debug else None, + ) + + +def get_callback_handlers(request, debug) -> Tuple[ + Optional[RAGCallbackHandler], + Optional[object], +]: + records_handler = RAGCallbackHandler() if debug else None + observability_handler = None + + if request.observability_setting is not None: + if request.dialog: + session_id = request.dialog.dialog_id + user_id = request.dialog.user_id + tags = request.dialog.tags + else: + session_id = None + user_id = None + tags = None + observability_handler = create_observability_callback_handler( + observability_setting=request.observability_setting, + ) + + return ( + records_handler, + observability_handler, ) @@ -254,23 +240,15 @@ def create_rag_chain( """ # Log progress and validate prompt template - logger.info('RAG chain - Validating LLM prompt template') - validate_prompt_template( - request.question_answering_prompt, 'Question answering prompt' - ) + logger.info("RAG chain - Validating LLM prompt template") + validate_prompt_template(request.question_answering_prompt, "Question answering prompt") if request.question_condensing_prompt is not None: - validate_prompt_template( - request.question_condensing_prompt, 'Question condensing prompt' - ) + validate_prompt_template(request.question_condensing_prompt, "Question condensing prompt") question_condensing_llm_factory = None if request.question_condensing_llm_setting is not None: - question_condensing_llm_factory = get_llm_factory( - setting=request.question_condensing_llm_setting - ) - question_answering_llm_factory = get_llm_factory( - setting=request.question_answering_llm_setting - ) + question_condensing_llm_factory = get_llm_factory(setting=request.question_condensing_llm_setting) + question_answering_llm_factory = get_llm_factory(setting=request.question_answering_llm_setting) em_factory = get_em_factory(setting=request.embedding_question_em_setting) vector_store_factory = get_vector_store_factory( setting=request.vector_store_setting, @@ -285,7 +263,7 @@ def create_rag_chain( if request.compressor_setting: retriever = add_document_compressor(retriever, request.compressor_setting) - logger.debug('RAG chain - Document index name: %s', request.document_index_name) + logger.debug("RAG chain - Document index name: %s", request.document_index_name) # Build LLM and prompt templates question_condensing_llm = None @@ -300,9 +278,7 @@ def create_rag_chain( condensing_llm = question_answering_llm # Build the chat chain for question contextualization - chat_chain = build_question_condensation_chain( - condensing_llm, request.question_condensing_prompt - ) + chat_chain = build_question_condensation_chain(condensing_llm, request.question_condensing_prompt) rag_prompt = build_rag_prompt(request) # Function to contextualize the question based on chat history @@ -311,22 +287,22 @@ def create_rag_chain( # Calculate the condensed question with_condensed_question = RunnableParallel( { - 'condensed_question': contextualize_question_fn, - 'question': itemgetter('question'), - 'chat_history': itemgetter('chat_history'), + "condensed_question": contextualize_question_fn, + "question": itemgetter("question"), + "chat_history": itemgetter("chat_history"), } ) def retrieve_with_variants(inputs): variants = [ # inputs["question"], Deactivated. It's an example to prove the multi retriever process - inputs['condensed_question'] + inputs["condensed_question"] ] docs = [] for v in variants: docs.extend(retriever.invoke(v)) # Deduplicate docs - unique_docs = {d.metadata['id']: d for d in docs} + unique_docs = {d.metadata["id"]: d for d in docs} # TODO [DERCBOT-1649] Apply the RRF Algo on unique_docs. return list(unique_docs.values()) @@ -334,33 +310,31 @@ def retrieve_with_variants(inputs): # Build the RAG inputs rag_inputs = with_condensed_question | RunnableParallel( { - 'question': itemgetter('condensed_question'), - 'chat_history': itemgetter('chat_history'), - 'documents': RunnableLambda(retrieve_with_variants), + "question": itemgetter("condensed_question"), + "chat_history": itemgetter("chat_history"), + "documents": RunnableLambda(retrieve_with_variants), } ) - parser = PydanticOutputParser(pydantic_object=LLMAnswer, name='rag_chain_output') - return rag_inputs | RunnablePassthrough.assign( answer=( { - 'context': lambda x: json.dumps( + "context": lambda x: json.dumps( [ { - 'chunk_id': doc.metadata['id'], - 'chunk_text': doc.page_content, + "chunk_id": doc.metadata["id"], + "chunk_text": doc.page_content, } - for doc in x['documents'] + for doc in x["documents"] ], ensure_ascii=False, indent=2, ), - 'chat_history': format_chat_history, + "chat_history": format_chat_history, } | rag_prompt | question_answering_llm - | parser + | JsonOutputParser(pydantic_object=LLMAnswer, name="rag_chain_output") ) ) @@ -378,17 +352,15 @@ def build_rag_prompt(request: RAGRequest) -> LangChainPromptTemplate: def format_chat_history(x): messages = [] - for msg in x['chat_history']: + for msg in x["chat_history"]: if isinstance(msg, HumanMessage): - messages.append({'user': msg.content}) + messages.append({"user": msg.content}) elif isinstance(msg, AIMessage): - messages.append({'assistant': msg.content}) + messages.append({"assistant": msg.content}) return json.dumps(messages, ensure_ascii=False, indent=2) -def build_question_condensation_chain( - llm, prompt: Optional[PromptTemplate] -) -> ChatPromptTemplate: +def build_question_condensation_chain(llm, prompt: Optional[PromptTemplate]) -> ChatPromptTemplate: """ Build the chat chain for contextualizing questions. """ @@ -418,13 +390,13 @@ def build_question_condensation_chain( return ( ChatPromptTemplate.from_messages( [ - ('system', prompt.template), - MessagesPlaceholder(variable_name='chat_history'), - ('human', '{question}'), + ("system", prompt.template), + MessagesPlaceholder(variable_name="chat_history"), + ("human", "{question}"), ] ).partial(**prompt.inputs) | llm - | StrOutputParser(name='chat_chain_output') + | StrOutputParser(name="chat_chain_output") ) @@ -432,9 +404,33 @@ def contextualize_question(inputs: dict, chat_chain) -> str: """ Contextualize the question based on the chat history. """ - if inputs.get('chat_history') and len(inputs['chat_history']) > 0: + if inputs.get("chat_history") and len(inputs["chat_history"]) > 0: return chat_chain - return inputs['question'] + return inputs["question"] + + +def rag_guard(question, answer, response, documents_required): + """ + Validates the RAG system's response based on the presence or absence of source documents + and the `documentsRequired` setting. + + Args: + question: user question + answer: the LLM answer + response: the RAG response + documents_required (bool): Specifies whether documents are mandatory for the response. + """ + + if documents_required and answer.status == "found_in_context" and len(response["documents"]) == 0: + message = "No documents were retrieved, yet an answer was attempted." + rag_log(level=ERROR, message=message, question=question, answer=answer.answer, response=response) + raise GenAIGuardCheckException(ErrorInfo(cause=message)) + + if answer.status == "not_found_in_context" and len(response["documents"]) > 0: + # If the answer is not found in context and some documents are retrieved, so we remove them from the RAG response. + message = "No answer found in the retrieved context. The documents are therefore removed from the RAG response." + rag_log(level=WARNING, message=message, question=question, answer=answer.answer, response=response) + response["documents"] = [] def rag_log(level, message, question, answer, response): @@ -451,13 +447,12 @@ def rag_log(level, message, question, answer, response): logger.log( level, - '%(message)s \n' - 'RAG chain - question="%(question)s", answer="%(answer)s", documents="%(documents)s"', + "%(message)s \n" 'RAG chain - question="%(question)s", answer="%(answer)s", documents="%(documents)s"', { - 'message': message, - 'question': question, - 'answer': answer, - 'documents': len(response['documents']), + "message": message, + "question": question, + "answer": answer, + "documents": len(response["documents"]), }, ) @@ -470,33 +465,31 @@ def get_rag_documents(handler: RAGCallbackHandler) -> List[RAGDocument]: handler: the RAG Callback Handler """ - if handler.records['documents'] is None: + if handler.records["documents"] is None: return [] return [ # Get first 100 char of content RAGDocument( - content=doc.page_content[0 : len(doc.metadata['title']) + 100] + '...', + content=doc.page_content[0 : len(doc.metadata["title"]) + 100] + "...", metadata=RAGDocumentMetadata(**doc.metadata), ) - for doc in handler.records['documents'] + for doc in handler.records["documents"] ] +def get_llm_answer(rag_chain_output) -> LLMAnswer: + return LLMAnswer(**json.loads(rag_chain_output.strip().removeprefix("```json").removesuffix("```").strip())) + + def get_llm_answer(rag_chain_output) -> LLMAnswer: if rag_chain_output is None: return LLMAnswer() - return LLMAnswer( - **json.loads( - rag_chain_output.strip().removeprefix('```json').removesuffix('```').strip() - ) - ) + return LLMAnswer(**json.loads(rag_chain_output.strip().removeprefix("```json").removesuffix("```").strip())) -def get_rag_debug_data( - request: RAGRequest, records_callback_handler: RAGCallbackHandler, rag_duration -) -> RAGDebugData: +def get_rag_debug_data(request: RAGRequest, records_callback_handler: RAGCallbackHandler, rag_duration) -> RAGDebugData: """RAG debug data assembly""" history = [] @@ -504,15 +497,15 @@ def get_rag_debug_data( history = request.dialog.history return RAGDebugData( - user_question=request.question_answering_prompt.inputs['question'], - question_condensing_prompt=records_callback_handler.records['chat_prompt'], + user_question=request.question_answering_prompt.inputs["question"], + question_condensing_prompt=records_callback_handler.records["chat_prompt"], question_condensing_history=history, - condensed_question=records_callback_handler.records['chat_chain_output'], - question_answering_prompt=records_callback_handler.records['rag_prompt'], + condensed_question=records_callback_handler.records["chat_chain_output"], + question_answering_prompt=records_callback_handler.records["rag_prompt"], documents=get_rag_documents(records_callback_handler), document_index_name=request.document_index_name, document_search_params=request.document_search_params, - answer=get_llm_answer(records_callback_handler.records['rag_chain_output']), + answer=get_llm_answer(records_callback_handler.records["rag_chain_output"]), duration=rag_duration, ) @@ -524,7 +517,7 @@ def check_guardrail_output(guardrail_output: dict) -> bool: Returns: Returns True if nothing is detected, raises an exception otherwise. """ - if guardrail_output['output_toxicity']: + if guardrail_output["output_toxicity"]: message = f"Toxicity detected in LLM output ({','.join(guardrail_output['output_toxicity_reason'])})" raise GenAIGuardCheckException(ErrorInfo(cause=message)) return True diff --git a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py index ef16f03eb9..46589feb0e 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py +++ b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_rag_chain.py @@ -44,25 +44,20 @@ ) -@patch( - 'gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post' -) -@patch( - 'gen_ai_orchestrator.services.langchain.factories.langchain_factory.get_compressor_factory' -) -@patch( - 'gen_ai_orchestrator.services.langchain.factories.langchain_factory.get_callback_handler_factory' -) -@patch('gen_ai_orchestrator.services.langchain.rag_chain.create_rag_chain') -@patch('gen_ai_orchestrator.services.langchain.rag_chain.RAGCallbackHandler') -@patch('gen_ai_orchestrator.services.langchain.rag_chain.RAGResponse') -@patch('gen_ai_orchestrator.services.langchain.rag_chain.RAGDebugData') -@patch('gen_ai_orchestrator.services.langchain.rag_chain.get_llm_answer') +@patch("gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post") +@patch("gen_ai_orchestrator.services.langchain.factories.langchain_factory.get_compressor_factory") +@patch("gen_ai_orchestrator.services.langchain.factories.langchain_factory.get_callback_handler_factory") +@patch("gen_ai_orchestrator.services.langchain.rag_chain.create_rag_chain") +@patch("gen_ai_orchestrator.services.langchain.rag_chain.RAGCallbackHandler") +@patch("gen_ai_orchestrator.services.langchain.rag_chain.RAGResponse") +@patch("gen_ai_orchestrator.services.langchain.rag_chain.RAGDebugData") +@patch("gen_ai_orchestrator.services.langchain.rag_chain.get_llm_answer") @pytest.mark.asyncio async def test_rag_chain( mocked_get_llm_answer, mocked_rag_debug_data, mocked_rag_response, + mocked_rag_guard, mocked_callback_init, mocked_create_rag_chain, mocked_get_callback_handler_factory, @@ -72,28 +67,28 @@ async def test_rag_chain( """Test the full execute_qa_chain method by mocking all external calls.""" # Build a test RAGRequest query_dict = { - 'dialog': { - 'history': [ - {'text': 'Hello, how can I do this?', 'type': 'HUMAN'}, + "dialog": { + "history": [ + {"text": "Hello, how can I do this?", "type": "HUMAN"}, { - 'text': 'you can do this with the following method ....', - 'type': 'AI', + "text": "you can do this with the following method ....", + "type": "AI", }, ], - 'tags': [], + "tags": [], }, - 'question_answering_llm_setting': { - 'provider': 'OpenAI', - 'api_key': { - 'type': 'Raw', - 'secret': 'ab7***************************A1IV4B', + "question_answering_llm_setting": { + "provider": "OpenAI", + "api_key": { + "type": "Raw", + "secret": "ab7***************************A1IV4B", }, - 'temperature': 1.2, - 'model': 'gpt-3.5-turbo', + "temperature": 1.2, + "model": "gpt-3.5-turbo", }, - 'question_answering_prompt': { - 'formatter': 'f-string', - 'template': """Use the following context to answer the question at the end. + "question_answering_prompt": { + "formatter": "f-string", + "template": """Use the following context to answer the question at the end. If you don't know the answer, just say I don't know. Context: @@ -103,89 +98,81 @@ async def test_rag_chain( {question} Answer in {locale}:""", - 'inputs': { - 'question': 'How to get started playing guitar ?', - 'locale': 'French', + "inputs": { + "question": "How to get started playing guitar ?", + "locale": "French", }, }, - 'embedding_question_em_setting': { - 'provider': 'OpenAI', - 'api_key': { - 'type': 'Raw', - 'secret': 'ab7***************************A1IV4B', + "embedding_question_em_setting": { + "provider": "OpenAI", + "api_key": { + "type": "Raw", + "secret": "ab7***************************A1IV4B", }, - 'model': 'text-embedding-ada-002', + "model": "text-embedding-ada-002", }, - 'document_index_name': 'my-index-name', - 'document_search_params': { - 'provider': 'OpenSearch', - 'filter': [ - { - 'term': { - 'metadata.index_session_id.keyword': '352d2466-17c5-4250-ab20-d7c823daf035' - } - } - ], - 'k': 4, + "document_index_name": "my-index-name", + "document_search_params": { + "provider": "OpenSearch", + "filter": [{"term": {"metadata.index_session_id.keyword": "352d2466-17c5-4250-ab20-d7c823daf035"}}], + "k": 4, }, - 'vector_store_setting': { - 'provider': 'OpenSearch', - 'host': 'localhost', - 'port': 9200, - 'username': 'admin', - 'password': { - 'type': 'Raw', - 'secret': 'admin', + "vector_store_setting": { + "provider": "OpenSearch", + "host": "localhost", + "port": 9200, + "username": "admin", + "password": { + "type": "Raw", + "secret": "admin", }, }, - 'observability_setting': { - 'provider': 'Langfuse', - 'url': 'http://localhost:3000', - 'secret_key': { - 'type': 'Raw', - 'secret': 'sk-********************be8f', + "observability_setting": { + "provider": "Langfuse", + "url": "http://localhost:3000", + "secret_key": { + "type": "Raw", + "secret": "sk-********************be8f", }, - 'public_key': 'pk-lf-5e374dc6-e194-4b37-9c07-b77e68ef7d2c', + "public_key": "pk-lf-5e374dc6-e194-4b37-9c07-b77e68ef7d2c", }, - 'guardrail_setting': { - 'provider': 'BloomzGuardrail', - 'api_base': 'http://test-guard.com', - 'max_score': 0.5, + "guardrail_setting": { + "provider": "BloomzGuardrail", + "api_base": "http://test-guard.com", + "max_score": 0.5, }, - 'compressor_setting': { - 'provider': 'BloomzRerank', - 'min_score': 0.7, - 'endpoint': 'http://test-rerank.com', + "compressor_setting": { + "provider": "BloomzRerank", + "min_score": 0.7, + "endpoint": "http://test-rerank.com", }, - 'documents_required': True, + "documents_required": True, } request = RAGRequest(**query_dict) inputs = { **request.question_answering_prompt.inputs, - 'chat_history': [ - HumanMessage(content='Hello, how can I do this?'), - AIMessage(content='you can do this with the following method ....'), + "chat_history": [ + HumanMessage(content="Hello, how can I do this?"), + AIMessage(content="you can do this with the following method ...."), ], } docs = [ Document( - page_content='some page content', - metadata={'id': '123-abc', 'title': 'my-title', 'source': None}, + page_content="some page content", + metadata={"id": "123-abc", "title": "my-title", "source": None}, ) ] response = { - 'answer': LLMAnswer( - **{ - 'status': '', - 'answer': 'an answer from llm', - 'topic': None, - 'suggested_topics': None, - 'context': [], - } - ), - 'documents': docs, + "answer": { + "status": "", + "answer": "an answer from llm", + "topic": None, + "suggested_topics": None, + "context": [], + }, + "documents": docs, } - llm_answer = response['answer'] + llm_answer = LLMAnswer(**response["answer"]) # Setup mock factories/init return value observability_factory_instance = mocked_get_callback_handler_factory.return_value @@ -197,7 +184,7 @@ async def test_rag_chain( mocked_response = MagicMock() mocked_response.status_code = 200 - mocked_response.content = {'response': []} + mocked_response.content = {"response": []} mocked_guardrail_parse.return_value = mocked_response @@ -205,15 +192,13 @@ async def test_rag_chain( await execute_rag_chain(request, debug=True) # Assert that the given observability_setting is used - mocked_get_callback_handler_factory.assert_called_once_with( - setting=request.observability_setting - ) + mocked_get_callback_handler_factory.assert_called_once_with(setting=request.observability_setting) # Assert qa chain is ainvoke()d with the expected settings from request mocked_chain.ainvoke.assert_called_once_with( input=inputs, config={ - 'callbacks': [mocked_callback, mocked_langfuse_callback], - 'metadata': ANY, + "callbacks": [mocked_callback, mocked_langfuse_callback], + "metadata": ANY, }, ) # Assert the response is build using the expected settings @@ -226,77 +211,69 @@ async def test_rag_chain( mocked_get_document_compressor_factory(setting=request.compressor_setting) # Assert the rag guardrail is called mocked_guardrail_parse.assert_called_once_with( - os.path.join(request.guardrail_setting.api_base, 'guardrail'), - json={ - 'text': [mocked_rag_answer['answer'].answer] - }, # TODO MASS : answer.answer pas top, à renomer + os.path.join(request.guardrail_setting.api_base, "guardrail"), + json={"text": [mocked_rag_answer["answer"]["answer"]]}, ) + # Assert the rag guard is called + mocked_rag_guard.assert_called_once_with(inputs, llm_answer, response, request.documents_required) -@patch( - 'gen_ai_orchestrator.services.langchain.impls.guardrail.bloomz_guardrail.requests.post' -) +@patch("gen_ai_orchestrator.services.langchain.impls.guardrail.bloomz_guardrail.requests.post") def test_guardrail_parse_succeed_with_toxicities_encountered( mocked_guardrail_response, ): guardrail = get_guardrail_factory( - BloomzGuardrailSetting( - provider='BloomzGuardrail', max_score=0.5, api_base='http://test-guard.com' - ) + BloomzGuardrailSetting(provider="BloomzGuardrail", max_score=0.5, api_base="http://test-guard.com") ).get_parser() rag_response = { - 'answer': { - 'status': '', - 'answer': 'This is a sample text.', - 'topic': None, - 'suggested_topics': None, - 'context': [], + "answer": { + "status": "", + "answer": "This is a sample text.", + "topic": None, + "suggested_topics": None, + "context": [], } } mocked_response = MagicMock() mocked_response.status_code = 200 mocked_response.json.return_value = { - 'response': [ + "response": [ [ - {'label': 'racism', 'score': 0.1}, - {'label': 'insult', 'score': 0.2}, - {'label': 'threat', 'score': 0.7}, - {'label': 'hate speech', 'score': 0.95}, + {"label": "racism", "score": 0.1}, + {"label": "insult", "score": 0.2}, + {"label": "threat", "score": 0.7}, + {"label": "hate speech", "score": 0.95}, ] ] } mocked_guardrail_response.return_value = mocked_response - guardrail_output = guardrail.parse(rag_response['answer']['answer']) + guardrail_output = guardrail.parse(rag_response["answer"]["answer"]) mocked_guardrail_response.assert_called_once_with( - os.path.join(guardrail.endpoint, 'guardrail'), - json={'text': [rag_response['answer']['answer']]}, + os.path.join(guardrail.endpoint, "guardrail"), + json={"text": [rag_response["answer"]["answer"]]}, ) assert guardrail_output == { - 'content': 'This is a sample text.', - 'output_toxicity': True, - 'output_toxicity_reason': ['threat', 'hate speech'], + "content": "This is a sample text.", + "output_toxicity": True, + "output_toxicity_reason": ["threat", "hate speech"], } -@patch( - 'gen_ai_orchestrator.services.langchain.impls.guardrail.bloomz_guardrail.requests.post' -) +@patch("gen_ai_orchestrator.services.langchain.impls.guardrail.bloomz_guardrail.requests.post") def test_guardrail_parse_fail(mocked_guardrail_response): guardrail = get_guardrail_factory( - BloomzGuardrailSetting( - provider='BloomzGuardrail', max_score=0.5, api_base='http://test-guard.com' - ) + BloomzGuardrailSetting(provider="BloomzGuardrail", max_score=0.5, api_base="http://test-guard.com") ).get_parser() rag_response = { - 'answer': { - 'status': '', - 'answer': 'This is a sample text.', - 'topic': None, - 'suggested_topics': None, - 'context': [], + "answer": { + "status": "", + "answer": "This is a sample text.", + "topic": None, + "suggested_topics": None, + "context": [], } } @@ -308,49 +285,47 @@ def test_guardrail_parse_fail(mocked_guardrail_response): HTTPError, match=f"Error {mocked_response.status_code}. Bloomz guardrail didn't respond as expected.", ): - guardrail.parse(rag_response['answer']['answer']) + guardrail.parse(rag_response["answer"]["answer"]) mocked_guardrail_response.assert_called_once_with( - os.path.join(guardrail.endpoint, 'guardrail'), - json={'text': [rag_response['answer']['answer']]}, + os.path.join(guardrail.endpoint, "guardrail"), + json={"text": [rag_response["answer"]["answer"]]}, ) -@patch( - 'gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post' -) +@patch("gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post") def test_compress_documents_should_succeed(mocked_rerank): - bloomz_reranker = BloomzRerank(label='entailement', endpoint='http://example.com') + bloomz_reranker = BloomzRerank(label="entailement", endpoint="http://example.com") documents = [ Document( - page_content='Page content 1', + page_content="Page content 1", metadata={ - 'source': 'doc1.pdf', - 'file_path': 'doc1.pdf', - 'page': 57, - 'total_pages': 104, - 'Producer': 'GPL Ghostscript 9.05', - 'CreationDate': "D:20230828165103+02'00'", - 'ModDate': "D:20230828165103+02'00'", - 'Title': 'DGRC 2023 - ABEI', - 'Creator': 'PDFCreator Version 1.6.2', - 'Author': 'F9261', - 'Keywords': '', - 'Subject': '', + "source": "doc1.pdf", + "file_path": "doc1.pdf", + "page": 57, + "total_pages": 104, + "Producer": "GPL Ghostscript 9.05", + "CreationDate": "D:20230828165103+02'00'", + "ModDate": "D:20230828165103+02'00'", + "Title": "DGRC 2023 - ABEI", + "Creator": "PDFCreator Version 1.6.2", + "Author": "F9261", + "Keywords": "", + "Subject": "", }, ), Document( - page_content='Contenu du document 8', + page_content="Contenu du document 8", metadata={ - 'source': 'incident - v5.pdf', - 'file_path': 'incident - v5.pdf', - 'page': 19, - 'total_pages': 23, - 'Author': 'F0421', - 'CreationDate': "D:20231212161411+01'00'", - 'ModDate': "D:20231212161411+01'00'", - 'Producer': 'Microsoft: Print To PDF', - 'Title': "Microsoft Word - P1 - Département des Risques - Déclaration d'un incident - v5.doc", + "source": "incident - v5.pdf", + "file_path": "incident - v5.pdf", + "page": 19, + "total_pages": 23, + "Author": "F0421", + "CreationDate": "D:20231212161411+01'00'", + "ModDate": "D:20231212161411+01'00'", + "Producer": "Microsoft: Print To PDF", + "Title": "Microsoft Word - P1 - Département des Risques - Déclaration d'un incident - v5.doc", }, ), ] @@ -358,59 +333,57 @@ def test_compress_documents_should_succeed(mocked_rerank): mocked_response = MagicMock() mocked_response.status_code = 200 mocked_response.json.return_value = { - 'response': [ + "response": [ [ - {'label': 'entailement', 'score': 0.1}, - {'label': 'neutral', 'score': 0.2}, - {'label': 'contradiction', 'score': 0.7}, + {"label": "entailement", "score": 0.1}, + {"label": "neutral", "score": 0.2}, + {"label": "contradiction", "score": 0.7}, ], [ - {'label': 'entailement', 'score': 0.8}, - {'label': 'neutral', 'score': 0.2}, - {'label': 'contradiction', 'score': 0.7}, + {"label": "entailement", "score": 0.8}, + {"label": "neutral", "score": 0.2}, + {"label": "contradiction", "score": 0.7}, ], ] } mocked_rerank.return_value = mocked_response - result = bloomz_reranker.compress_documents(documents=documents, query='Some query') + result = bloomz_reranker.compress_documents(documents=documents, query="Some query") mocked_rerank.assert_called_once() assert result == [ Document( - page_content='Contenu du document 8', + page_content="Contenu du document 8", metadata={ - 'source': 'incident - v5.pdf', - 'file_path': 'incident - v5.pdf', - 'page': 19, - 'total_pages': 23, - 'Author': 'F0421', - 'CreationDate': "D:20231212161411+01'00'", - 'ModDate': "D:20231212161411+01'00'", - 'Producer': 'Microsoft: Print To PDF', - 'Title': "Microsoft Word - P1 - Département des Risques - Déclaration d'un incident - v5.doc", - 'retriever_score': 0.8, + "source": "incident - v5.pdf", + "file_path": "incident - v5.pdf", + "page": 19, + "total_pages": 23, + "Author": "F0421", + "CreationDate": "D:20231212161411+01'00'", + "ModDate": "D:20231212161411+01'00'", + "Producer": "Microsoft: Print To PDF", + "Title": "Microsoft Word - P1 - Département des Risques - Déclaration d'un incident - v5.doc", + "retriever_score": 0.8, }, ) ] -@patch( - 'gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post' -) +@patch("gen_ai_orchestrator.services.langchain.impls.document_compressor.bloomz_rerank.requests.post") def test_compress_documents_with_unknown_label(mocked_rerank): - bloomz_reranker = BloomzRerank(label='unknown_label', endpoint='http://example.com') + bloomz_reranker = BloomzRerank(label="unknown_label", endpoint="http://example.com") documents = [ Document( - page_content='Page content 1', + page_content="Page content 1", metadata={ - 'source': 'doc1.pdf', + "source": "doc1.pdf", }, ), Document( - page_content='Contenu du document 8', + page_content="Contenu du document 8", metadata={ - 'source': 'incident - v5.pdf', + "source": "incident - v5.pdf", }, ), ] @@ -418,41 +391,119 @@ def test_compress_documents_with_unknown_label(mocked_rerank): mocked_response = MagicMock() mocked_response.status_code = 200 mocked_response.json.return_value = { - 'response': [ + "response": [ [ - {'label': 'entailement', 'score': 0.1}, + {"label": "entailement", "score": 0.1}, ] ] } mocked_rerank.return_value = mocked_response with pytest.raises(GenAIDocumentCompressorUnknownLabelException) as exc: - bloomz_reranker.compress_documents(documents=documents, query='Some query') + bloomz_reranker.compress_documents(documents=documents, query="Some query") assert exc.value.error_code.value == 6002 - assert exc.value.message == 'Unknown Document Compressor label.' - assert exc.value.detail == 'Check the Document Compressor label you sent.' + assert exc.value.message == "Unknown Document Compressor label." + assert exc.value.detail == "Check the Document Compressor label you sent." def test_check_guardrail_output_find_toxicities(): guardrail_output = { - 'content': 'This is a sample text.', - 'output_toxicity': True, - 'output_toxicity_reason': ['threat', 'hate speech'], + "content": "This is a sample text.", + "output_toxicity": True, + "output_toxicity_reason": ["threat", "hate speech"], } with pytest.raises(GenAIGuardCheckException) as exc_found: check_guardrail_output(guardrail_output) assert exc_found.value.error_code.value == 1004 - assert 'Guard check failed.' in exc_found.value.message + assert "Guard check failed." in exc_found.value.message def test_check_guardrail_output_is_ok(): guardrail_output = { - 'content': 'This is a sample text.', - 'output_toxicity': False, - 'output_toxicity_reason': [], + "content": "This is a sample text.", + "output_toxicity": False, + "output_toxicity_reason": [], } assert check_guardrail_output(guardrail_output) is True + + +@patch("gen_ai_orchestrator.services.langchain.rag_chain.rag_log") +def test_rag_guard_fails_if_no_docs_in_valid_answer(mocked_log): + question = "Hi!" + response = { + "answer": {"status": "found_in_context", "answer": "a valid answer"}, + "documents": [], + } + try: + rag_chain.rag_guard(question, LLMAnswer(**response["answer"]), response, documents_required=True) + except Exception as e: + assert isinstance(e, GenAIGuardCheckException) + + +@patch("gen_ai_orchestrator.services.langchain.rag_chain.rag_log") +def test_rag_guard_accepts_no_answer_even_with_docs(mocked_log): + question = "Hi!" + response = { + "answer": { + "status": "not_found_in_context", + "answer": "Sorry, I don t know.", + "context": [ + { + "chunk": 1, + "sentences": ["str1"], + } + ], + }, + "documents": ["a doc as a string"], + } + rag_chain.rag_guard(question, LLMAnswer(**response["answer"]), response, documents_required=True) + # No answer found in the retrieved context. The documents are therefore removed from the RAG response. + assert response["documents"] == [] + + +@patch("gen_ai_orchestrator.services.langchain.rag_chain.rag_log") +def test_rag_guard_valid_answer_with_docs(mocked_log): + question = "Hi!" + response = { + "answer": { + "status": "found_in_context", + "answer": "a valid answer", + }, + "documents": ["doc1", "doc2"], + } + rag_chain.rag_guard(question, LLMAnswer(**response["answer"]), response, documents_required=True) + assert response["documents"] == ["doc1", "doc2"] + + +@patch("gen_ai_orchestrator.services.langchain.rag_chain.rag_log") +def test_rag_guard_no_answer_with_no_docs(mocked_log): + question = "Hi!" + response = { + "answer": {"status": "not_found_in_context", "answer": "Sorry, I don t know."}, + "documents": [], + } + rag_chain.rag_guard(question, LLMAnswer(**response["answer"]), response, documents_required=True) + assert response["documents"] == [] + + +@patch("gen_ai_orchestrator.services.langchain.rag_chain.rag_log") +def test_rag_guard_without_no_answer_input(mocked_log): + """Test that __rag_guard handles missing no_answer input correctly.""" + question = "Hi!" + response = { + "answer": { + "status": "found_in_context", + "answer": "a valid answer", + }, + "documents": [], + } + with pytest.raises(GenAIGuardCheckException) as exc: + rag_chain.rag_guard(question, LLMAnswer(**response["answer"]), response, documents_required=True) + + mocked_log.assert_called_once() + + assert isinstance(exc.value, GenAIGuardCheckException) From 6ba6d90bc22fcf08da5c9fffab1e3e6776a8fd22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rodolphe=20K=C3=BCffer?= Date: Mon, 2 Feb 2026 17:30:20 +0100 Subject: [PATCH 4/7] Front modifications --- .../chat-ui-message-debug.component.html | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html index 361645b06d..a07ba89667 100644 --- a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html +++ b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html @@ -21,20 +21,13 @@ >
Status: {{ message.data.answer?.status }}
+
Confidence score:: {{ message.data.answer?.confidence_score }}
+
Topic: {{ message.data.answer?.topic }}
-
- Suggested topics: - - {{ suggestion }} - -
-
Redirection intent: {{ message.data.answer?.redirection_intent }}
- -
Confidence score:: {{ message.data.answer?.confidence_score }}
From 2db19a1ecff99f87d49884bc3d77dce582bf6a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rodolphe=20K=C3=BCffer?= Date: Tue, 3 Feb 2026 14:53:53 +0100 Subject: [PATCH 5/7] Front modifications - Wip --- .../chat-ui-message-debug.component.html | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html index a07ba89667..361645b06d 100644 --- a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html +++ b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html @@ -21,13 +21,20 @@ >
Status: {{ message.data.answer?.status }}
-
Confidence score:: {{ message.data.answer?.confidence_score }}
-
Topic: {{ message.data.answer?.topic }}
+
+ Suggested topics: + + {{ suggestion }} + +
+
Redirection intent: {{ message.data.answer?.redirection_intent }}
+ +
Confidence score:: {{ message.data.answer?.confidence_score }}
From dacc9a58876534013aa4b13b13cd279ffbb92854 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rodolphe=20K=C3=BCffer?= Date: Wed, 4 Feb 2026 17:40:00 +0100 Subject: [PATCH 6/7] Remove documentsRequired Rag option + UX improvements --- .../models/engines-configurations.ts | 3 +- .../rag/rag-settings/models/rag-settings.ts | 2 - .../rag-settings/rag-settings.component.html | 18 ------ .../rag-settings/rag-settings.component.ts | 8 --- .../chat-ui-message-debug.component.html | 60 +++++++++++++----- .../chat-ui-message-debug.component.scss | 61 +++++++++++++------ .../chat-ui-message-debug.component.ts | 43 ++++++++++++- .../web/src/app/shared/model/dialog-data.ts | 14 +++++ bot/admin/web/src/app/shared/utils/utils.ts | 36 +++++++++++ .../components/header/header.component.html | 10 +++ .../components/header/header.component.ts | 4 ++ .../web/src/app/theme/styles/utilities.scss | 4 ++ 12 files changed, 198 insertions(+), 65 deletions(-) diff --git a/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts b/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts index 758ababaa3..9beab4b51a 100644 --- a/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts +++ b/bot/admin/web/src/app/rag/rag-settings/models/engines-configurations.ts @@ -70,7 +70,7 @@ Your job is to surface the right information from provided context. - **Tone**: neutral, kind, “you” address, light humor when appropriate. - **Language**: Introduce technical jargon only when strictly necessary and briefly define it. -- **Structure**: Use short sentences, bold or bullet points for key ideas, headings to separate the main sections, and fenced \`code\` blocks for examples. +- **Structure**: Use short sentences, bold or bullet points for key ideas, headings to separate the main sections, and fenced \`code\` blocks for examples. Only include absolute links in your answers. - **Style**: Direct and technical tone, with **bold** for important concepts. - **Formatting**: Mandatory Markdown, with line breaks for readability. - **Examples**: Include a concrete example (code block or CLI command) for each feature. @@ -89,6 +89,7 @@ Before responding, ensure: - The documentation actually addresses the question. - Your answer is consistent with the docs. +- If you include a link in your response, make sure it is an absolute link; otherwise, do not include it. ## Technical Instructions: diff --git a/bot/admin/web/src/app/rag/rag-settings/models/rag-settings.ts b/bot/admin/web/src/app/rag/rag-settings/models/rag-settings.ts index e605ec6561..c29537b2bb 100644 --- a/bot/admin/web/src/app/rag/rag-settings/models/rag-settings.ts +++ b/bot/admin/web/src/app/rag/rag-settings/models/rag-settings.ts @@ -37,6 +37,4 @@ export interface RagSettings { indexName: string; maxDocumentsRetrieved: number; - - documentsRequired: boolean; } diff --git a/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.html b/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.html index cf3527fa9f..7faf77dbdd 100644 --- a/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.html +++ b/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.html @@ -408,24 +408,6 @@

Rag settings

/> - -
- - - Don't allow undocumented answers - - -
diff --git a/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.ts b/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.ts index c906ffc48e..12d8cdee99 100644 --- a/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.ts +++ b/bot/admin/web/src/app/rag/rag-settings/rag-settings.component.ts @@ -50,8 +50,6 @@ interface RagSettingsForm { maxDocumentsRetrieved: FormControl; - documentsRequired: FormControl; - questionCondensingLlmProvider: FormControl; questionCondensingLlmSetting: FormGroup; questionCondensingPrompt: FormGroup; @@ -197,7 +195,6 @@ export class RagSettingsComponent implements OnInit, OnDestroy { indexSessionId: new FormControl(undefined), indexName: new FormControl(undefined), - documentsRequired: new FormControl(undefined), maxDocumentsRetrieved: new FormControl(undefined), questionCondensingLlmProvider: new FormControl(undefined, [Validators.required]), @@ -242,10 +239,6 @@ export class RagSettingsComponent implements OnInit, OnDestroy { return this.form.get('indexSessionId') as FormControl; } - get documentsRequired(): FormControl { - return this.form.get('documentsRequired') as FormControl; - } - get maxDocumentsRetrieved(): FormControl { return this.form.get('maxDocumentsRetrieved') as FormControl; } @@ -363,7 +356,6 @@ export class RagSettingsComponent implements OnInit, OnDestroy { setFormDefaultValues(): void { this.form.patchValue({ - documentsRequired: false, debugEnabled: false, maxMessagesFromHistory: 5, maxDocumentsRetrieved: 4 diff --git a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html index 361645b06d..09354d8922 100644 --- a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html +++ b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.html @@ -14,27 +14,55 @@ ~ limitations under the License. --> -
-
Status: {{ message.data.answer?.status }}
+
+
+ Status | {{ getStatusLabel() }} +
-
- Topic: {{ message.data.answer?.topic }} +
+ Topic | {{ message.data.answer?.topic }}
-
- Suggested topics: - - {{ suggestion }} - +
+ Suggested topics | + + + {{ suggestion }} + +
-
- Redirection intent: {{ message.data.answer?.redirection_intent }} +
+ Redirection intent | {{ message.data.answer?.redirection_intent }}
-
Confidence score:: {{ message.data.answer?.confidence_score }}
+
+ Confidence | {{ message.data.answer?.confidence_score }} +
diff --git a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.scss b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.scss index 5f59982a64..d698b164ed 100644 --- a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.scss +++ b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.scss @@ -14,31 +14,56 @@ * limitations under the License. */ -@import '@nebular/theme/styles/theming'; -@import '@nebular/theme/styles/themes'; - :host { display: flex; - // justify-content: center; .debug { width: 100%; - cursor: pointer; - font-size: 0.75rem; - color: var(--chat-message-sender-text-color); - border-top: 1px dashed var(--chat-message-sender-text-color); - border-bottom: 1px dashed var(--chat-message-sender-text-color); - background-color: var(--background-basic-color-3); + margin-top: 0.5em; + margin-bottom: 0.5em; + + .tag { + display: inline-flex; + width: fit-content; + padding: 0.2rem 0.5rem; + border-radius: 10rem; + background-color: var(--border-basic-color-3); + font-size: 0.7rem; + line-height: 1rem; + white-space: nowrap; + cursor: pointer; + + &.status-success { + background-color: var(--color-success-500); + color: white; + } + &.status-warning { + background-color: var(--color-warning-500); + color: white; + } + &.status-info { + background-color: var(--color-info-500); + color: white; + } + &.status-danger { + background-color: var(--color-danger-500); + color: white; + } - margin-left: 1em; - // height: 11px; - // margin-bottom: 10px; + &.topic { + background-color: var(--color-success-300); + color: var(--color-success-600); + } + &.suggested-topics { + background-color: var(--color-warning-300); + color: var(--color-warning-600); + } - // span { - // background: var(--card-background-color); - // padding: 0 5px; - // border-radius: 0.5rem; - // } + &.confidence { + background-color: var(--color-default-300); + color: var(--text-basic-color); + } + } } } diff --git a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.ts b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.ts index 12a45039e6..b6adb66ada 100644 --- a/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.ts +++ b/bot/admin/web/src/app/shared/components/chat-ui/chat-ui-message/chat-ui-message-debug/chat-ui-message-debug.component.ts @@ -14,10 +14,11 @@ * limitations under the License. */ -import { Component, Input, OnInit } from '@angular/core'; +import { Component, Input } from '@angular/core'; import { NbDialogService } from '@nebular/theme'; -import { Debug } from '../../../../model/dialog-data'; +import { Debug, RagAnswerStatus, RagAnswerStatusLabels } from '../../../../model/dialog-data'; import { DebugViewerDialogComponent } from '../../../debug-viewer-dialog/debug-viewer-dialog.component'; +import { getContrastYIQ, getInterpolatedColor } from '../../../../utils'; @Component({ selector: 'tock-chat-ui-message-debug', @@ -35,4 +36,42 @@ export class ChatUiMessageDebugComponent { } }); } + + getStatusClassName(): string { + const status = this.message.data.answer?.status; + if (!status) { + return ''; + } + + switch (status.toLowerCase()) { + case RagAnswerStatus.FOUND_IN_CONTEXT: + return 'status-success'; + case RagAnswerStatus.NOT_FOUND_IN_CONTEXT: + return 'status-warning'; + case RagAnswerStatus.SMALL_TALK: + return 'status-info'; + case RagAnswerStatus.OUT_OF_SCOPE: + return 'status-danger'; + default: + return ''; + } + } + + getStatusLabel(): string { + const status = this.message.data.answer?.status; + if (status) { + return RagAnswerStatusLabels[status.toLowerCase()] || status.replace(/_/g, ' ').replace(/^(.)|\s+(.)/g, (c) => c.toUpperCase()); + } + return ''; + } + + getConfidenceBgColor(): { bg: string; fg: string } { + const score = this.message.data.answer?.confidence_score; + if (score != null) { + const bg = getInterpolatedColor(score); + const fg = getContrastYIQ(bg); + return { bg, fg }; + } + return { bg: '', fg: '' }; + } } diff --git a/bot/admin/web/src/app/shared/model/dialog-data.ts b/bot/admin/web/src/app/shared/model/dialog-data.ts index 428ebb1962..b8253f54a6 100644 --- a/bot/admin/web/src/app/shared/model/dialog-data.ts +++ b/bot/admin/web/src/app/shared/model/dialog-data.ts @@ -315,6 +315,20 @@ export class Debug extends BotMessage { } } +export enum RagAnswerStatus { + FOUND_IN_CONTEXT = 'found_in_context', + NOT_FOUND_IN_CONTEXT = 'not_found_in_context', + SMALL_TALK = 'small_talk', + OUT_OF_SCOPE = 'out_of_scope' +} + +export const RagAnswerStatusLabels: Record = { + [RagAnswerStatus.FOUND_IN_CONTEXT]: 'Found in context', + [RagAnswerStatus.NOT_FOUND_IN_CONTEXT]: 'Not found in context', + [RagAnswerStatus.SMALL_TALK]: 'Small talk', + [RagAnswerStatus.OUT_OF_SCOPE]: 'Out of scope' +}; + export class SentenceElement { constructor( public connectorType: ConnectorType, diff --git a/bot/admin/web/src/app/shared/utils/utils.ts b/bot/admin/web/src/app/shared/utils/utils.ts index 7a4164040b..d886dd6c5d 100644 --- a/bot/admin/web/src/app/shared/utils/utils.ts +++ b/bot/admin/web/src/app/shared/utils/utils.ts @@ -174,6 +174,42 @@ export function shadeColor(hexcolor: string, amount: number) { return '#' + RR + GG + BB; } +/** + * Interpolates between two hex colors based on a value between 0 and 1. + * + * @param {number} value - A value between 0 and 1. Clamped to this range if outside. + * @param {string} [colorStart="#acbef4"] - The starting hex color (e.g., "#acbef4"). + * @param {string} [colorEnd="#3366ff"] - The ending hex color (e.g., "#3366ff"). + * @returns {string} The interpolated hex color as a string. + * + * @example + * // Returns a color halfway between #acbef4 and #3366ff + * const interpolatedColor = getInterpolatedColor(0.5, "#acbef4", "#3366ff"); + */ +export function getInterpolatedColor(value: number, colorStart: string = '#acbef4', colorEnd: string = '#3366ff'): string { + // Clamp value between 0 and 1 + const clampedValue = Math.min(1, Math.max(0, value)); + + // Parse hex colors to RGB + const parseHex = (hex: string) => { + const r = parseInt(hex.slice(1, 3), 16); + const g = parseInt(hex.slice(3, 5), 16); + const b = parseInt(hex.slice(5, 7), 16); + return { r, g, b }; + }; + + const start = parseHex(colorStart); + const end = parseHex(colorEnd); + + // Interpolate each channel + const r = Math.round(start.r + (end.r - start.r) * clampedValue); + const g = Math.round(start.g + (end.g - start.g) * clampedValue); + const b = Math.round(start.b + (end.b - start.b) * clampedValue); + + // Convert back to hex + return `#${r.toString(16).padStart(2, '0')}${g.toString(16).padStart(2, '0')}${b.toString(16).padStart(2, '0')}`; +} + export async function copyToClipboard(text: string): Promise { if (navigator.clipboard) { await navigator.clipboard.writeText(text); diff --git a/bot/admin/web/src/app/theme/components/header/header.component.html b/bot/admin/web/src/app/theme/components/header/header.component.html index b76bd7871c..4256d73059 100644 --- a/bot/admin/web/src/app/theme/components/header/header.component.html +++ b/bot/admin/web/src/app/theme/components/header/header.component.html @@ -103,6 +103,16 @@ + +