From e454260507909794dee5546000b6ab171f43140a Mon Sep 17 00:00:00 2001 From: Sasindu Alahakoon Date: Mon, 3 Nov 2025 14:28:59 +0530 Subject: [PATCH 1/4] Mark embedding as optional and add semantic configs --- ballerina/azure_ai_search_knowledgebase.bal | 51 ++++++++++++++++----- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/ballerina/azure_ai_search_knowledgebase.bal b/ballerina/azure_ai_search_knowledgebase.bal index 29f2677..a81f1d5 100644 --- a/ballerina/azure_ai_search_knowledgebase.bal +++ b/ballerina/azure_ai_search_knowledgebase.bal @@ -74,6 +74,12 @@ type IndexSchemaInfo record { map allFields; }; +# Details for semantic configuration in Azure AI Search. +public type SemanticConfigurationDetails record {| + # Name of the semantic configuration + string name; +|}; + # Represents the Azure Search Knowledge Base implementation. public distinct isolated class AiSearchKnowledgeBase { *ai:KnowledgeBase; @@ -85,11 +91,12 @@ public distinct isolated class AiSearchKnowledgeBase { private final string apiKey; private final boolean verbose; private final ai:Chunker|ai:AUTO|ai:DISABLE chunker; - private final ai:EmbeddingProvider embeddingModel; + private final ai:EmbeddingProvider? embeddingModel; private final string contentFieldName; private final string keyFieldName; private final string[] vectorFieldNames; private final map allFields; + private final SemanticConfigurationDetails? semanticConfigurationDetails; # Initializes a new `AiSearchKnowledgeBase` instance. # @@ -107,17 +114,20 @@ public distinct isolated class AiSearchKnowledgeBase { # This configuration is only required when the `index` parameter is # provided as an `search:SearchIndex` # + indexClientConnectionConfig - Connection configuration for the Azure AI index client. + # + semanticConfigurationDetails - Optional semantic configuration details for semantic search. # + return - An instance of `AiSearchKnowledgeBase` or an `ai:Error` if initialization fails public isolated function init(string serviceUrl, string apiKey, - string|search:SearchIndex index, ai:EmbeddingProvider embeddingModel, + string|search:SearchIndex index, ai:EmbeddingProvider? embeddingModel = (), ai:Chunker|ai:AUTO|ai:DISABLE chunker = ai:AUTO, boolean verbose = false, string apiVersion = AI_AZURE_KNOWLEDGE_BASE_API_VERSION, string contentFieldName = CONTENT_FIELD_NAME, search:ConnectionConfig searchClientConnectionConfig = {}, - index:ConnectionConfig indexClientConnectionConfig = {}) returns ai:Error? { + index:ConnectionConfig indexClientConnectionConfig = {}, + SemanticConfigurationDetails? semanticConfigurationDetails = ()) returns ai:Error? { self.chunker = chunker; self.embeddingModel = embeddingModel; self.verbose = verbose; self.contentFieldName = contentFieldName; + self.semanticConfigurationDetails = semanticConfigurationDetails.cloneReadOnly(); // Initialize service client for management operations self.apiKey = apiKey; @@ -189,14 +199,22 @@ public distinct isolated class AiSearchKnowledgeBase { return error ai:Error("Failed to chunk documents before ingestion", chunks); } - ai:Embedding[]|error embeddings = self.embeddingModel->batchEmbed(chunks); - if embeddings is error { + ai:Embedding[]? embeddings = (); + ai:EmbeddingProvider? embeddingProvider = self.embeddingModel; + if embeddingProvider is ai:EmbeddingProvider { + logIfVerboseEnabled(self.verbose, + string `Generating embeddings for ${chunks.length().toString()} chunks using embedding model.`); + ai:Embedding[]|error? embeddingResults = embeddingProvider->batchEmbed(chunks); + if embeddingResults is error { + logIfVerboseEnabled(self.verbose, + string `Failed to generate embeddings for documents: ${embeddingResults.message()}}`, embeddingResults); + return error ai:Error("Failed to generate embeddings for documents", embeddingResults); + } + + embeddings = embeddingResults; logIfVerboseEnabled(self.verbose, - string `Failed to generate embeddings for documents: ${embeddings.message()}}`, embeddings); - return error ai:Error("Failed to generate embeddings for documents", embeddings); + string `Generated embeddings for ${embeddings == () ? 0: embeddings.length().toString()} chunks.`); } - logIfVerboseEnabled(self.verbose, - string `Generated embeddings for ${embeddings.length().toString()} chunks.`); index:IndexDocumentsResult|error uploadResult = self.uploadDocuments(self.indexClient, chunks, self.index, embeddings, {[API_KEY_HEADER_NAME]: self.apiKey}, {api\-version: self.apiVersion}); @@ -236,13 +254,17 @@ public distinct isolated class AiSearchKnowledgeBase { lock { ai:TextChunk queryChunk = {content: query, 'type: CONTENT_TYPE_TEXT_CHUNK}; - ai:Embedding queryEmbedding = check self.embeddingModel->embed(queryChunk); + ai:Embedding? queryEmbedding = (); + ai:EmbeddingProvider? embeddingProvider = self.embeddingModel; + if embeddingProvider is ai:EmbeddingProvider { + queryEmbedding = check embeddingProvider->embed(queryChunk); + } // Create vector search request using Azure AI Search's integrated vectorization int vectorFieldLength = self.vectorFieldNames.length(); index:VectorQuery[]? vectorQuery = (); - if vectorFieldLength != 0 { + if vectorFieldLength != 0 && queryEmbedding is ai:Embedding { ai:Vector|ai:Error vectors = generateVectorFromEmbedding(queryEmbedding); if vectors is ai:Error { return vectors; @@ -258,9 +280,16 @@ public distinct isolated class AiSearchKnowledgeBase { ]; } + SemanticConfigurationDetails? semanticConfig = self.semanticConfigurationDetails is SemanticConfigurationDetails + ? self.semanticConfigurationDetails : (); + index:QueryType queryType = semanticConfig is SemanticConfigurationDetails + ? "semantic" : "simple"; + index:SearchRequest searchRequest = { search: query, 'select: "*", + queryType: queryType, + semanticConfiguration: semanticConfig is SemanticConfigurationDetails ? semanticConfig.name : (), vectorQueries: vectorQuery ?: [], top: maxLimit == -1 ? () : maxLimit }; From 22b65de86da512da7a5de88e706c67e92b470261 Mon Sep 17 00:00:00 2001 From: Sasindu Alahakoon Date: Mon, 3 Nov 2025 14:42:01 +0530 Subject: [PATCH 2/4] Update the version 1.3.0 --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index ebf2c57..6dd0237 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,6 +1,6 @@ org.gradle.caching=true group=io.ballerina.lib -version=1.2.1-SNAPSHOT +version=1.3.0-SNAPSHOT ballerinaLangVersion=2201.12.0 shadowJarPluginVersion=8.1.1 From 692be437386e5bf2660c34dc5a4c18b6e06aac63 Mon Sep 17 00:00:00 2001 From: Sasindu Alahakoon Date: Mon, 3 Nov 2025 15:13:21 +0530 Subject: [PATCH 3/4] [Automated] Update the toml files --- ballerina/Ballerina.toml | 6 +++--- ballerina/CompilerPlugin.toml | 2 +- ballerina/Dependencies.toml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ballerina/Ballerina.toml b/ballerina/Ballerina.toml index 4b366d9..4a980a6 100644 --- a/ballerina/Ballerina.toml +++ b/ballerina/Ballerina.toml @@ -7,7 +7,7 @@ icon="icon.png" name = "ai.azure" org = "ballerinax" repository = "https://github.com/ballerina-platform/module-ballerinax-ai.azure" -version = "1.2.0" +version = "1.3.0" [platform.java21] graalvmCompatible = true @@ -15,5 +15,5 @@ graalvmCompatible = true [[platform.java21.dependency]] groupId = "io.ballerina.lib" artifactId = "ai.azure-native" -version = "1.2.0" -path = "../native/build/libs/ai.azure-native-1.2.0.jar" +version = "1.3.0" +path = "../native/build/libs/ai.azure-native-1.3.0-SNAPSHOT.jar" diff --git a/ballerina/CompilerPlugin.toml b/ballerina/CompilerPlugin.toml index b40eb33..a9d5849 100644 --- a/ballerina/CompilerPlugin.toml +++ b/ballerina/CompilerPlugin.toml @@ -3,7 +3,7 @@ id = "ai-azure-compiler-plugin" class = "io.ballerina.lib.ai.azure.AiAzureCompilerPlugin" [[dependency]] -path = "../compiler-plugin/build/libs/ai.azure-compiler-plugin-1.2.0.jar" +path = "../compiler-plugin/build/libs/ai.azure-compiler-plugin-1.3.0-SNAPSHOT.jar" [[dependency]] path = "../compiler-plugin/build/libs/ballerina-to-openapi-2.3.0.jar" diff --git a/ballerina/Dependencies.toml b/ballerina/Dependencies.toml index c712fe5..54b2eb1 100644 --- a/ballerina/Dependencies.toml +++ b/ballerina/Dependencies.toml @@ -5,7 +5,7 @@ [ballerina] dependencies-toml-version = "2" -distribution-version = "2201.12.0" +distribution-version = "2201.12.9" [[package]] org = "ballerina" @@ -407,7 +407,7 @@ dependencies = [ [[package]] org = "ballerinax" name = "ai.azure" -version = "1.2.0" +version = "1.3.0" dependencies = [ {org = "ballerina", name = "ai"}, {org = "ballerina", name = "constraint"}, From 48501b3882821685e7ba5a29a0c5b03353a2fc2b Mon Sep 17 00:00:00 2001 From: Sasindu Alahakoon Date: Mon, 3 Nov 2025 16:49:46 +0530 Subject: [PATCH 4/4] Update the semantic config name --- ballerina/azure_ai_search_knowledgebase.bal | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/ballerina/azure_ai_search_knowledgebase.bal b/ballerina/azure_ai_search_knowledgebase.bal index a81f1d5..a5e8314 100644 --- a/ballerina/azure_ai_search_knowledgebase.bal +++ b/ballerina/azure_ai_search_knowledgebase.bal @@ -74,12 +74,6 @@ type IndexSchemaInfo record { map allFields; }; -# Details for semantic configuration in Azure AI Search. -public type SemanticConfigurationDetails record {| - # Name of the semantic configuration - string name; -|}; - # Represents the Azure Search Knowledge Base implementation. public distinct isolated class AiSearchKnowledgeBase { *ai:KnowledgeBase; @@ -96,7 +90,7 @@ public distinct isolated class AiSearchKnowledgeBase { private final string keyFieldName; private final string[] vectorFieldNames; private final map allFields; - private final SemanticConfigurationDetails? semanticConfigurationDetails; + private final string? semanticConfigurationName; # Initializes a new `AiSearchKnowledgeBase` instance. # @@ -114,7 +108,7 @@ public distinct isolated class AiSearchKnowledgeBase { # This configuration is only required when the `index` parameter is # provided as an `search:SearchIndex` # + indexClientConnectionConfig - Connection configuration for the Azure AI index client. - # + semanticConfigurationDetails - Optional semantic configuration details for semantic search. + # + semanticConfigurationName - The name of the semantic configuration to use for semantic search. # + return - An instance of `AiSearchKnowledgeBase` or an `ai:Error` if initialization fails public isolated function init(string serviceUrl, string apiKey, string|search:SearchIndex index, ai:EmbeddingProvider? embeddingModel = (), @@ -122,12 +116,12 @@ public distinct isolated class AiSearchKnowledgeBase { string apiVersion = AI_AZURE_KNOWLEDGE_BASE_API_VERSION, string contentFieldName = CONTENT_FIELD_NAME, search:ConnectionConfig searchClientConnectionConfig = {}, index:ConnectionConfig indexClientConnectionConfig = {}, - SemanticConfigurationDetails? semanticConfigurationDetails = ()) returns ai:Error? { + string? semanticConfigurationName = ()) returns ai:Error? { self.chunker = chunker; self.embeddingModel = embeddingModel; self.verbose = verbose; self.contentFieldName = contentFieldName; - self.semanticConfigurationDetails = semanticConfigurationDetails.cloneReadOnly(); + self.semanticConfigurationName = semanticConfigurationName; // Initialize service client for management operations self.apiKey = apiKey; @@ -280,16 +274,13 @@ public distinct isolated class AiSearchKnowledgeBase { ]; } - SemanticConfigurationDetails? semanticConfig = self.semanticConfigurationDetails is SemanticConfigurationDetails - ? self.semanticConfigurationDetails : (); - index:QueryType queryType = semanticConfig is SemanticConfigurationDetails - ? "semantic" : "simple"; + index:QueryType queryType = self.semanticConfigurationName is string ? "semantic" : "simple"; index:SearchRequest searchRequest = { search: query, 'select: "*", queryType: queryType, - semanticConfiguration: semanticConfig is SemanticConfigurationDetails ? semanticConfig.name : (), + semanticConfiguration: self.semanticConfigurationName is string ? self.semanticConfigurationName : (), vectorQueries: vectorQuery ?: [], top: maxLimit == -1 ? () : maxLimit };