From 2cd42e8a24d0a9f35b4aa14dcf0728e128fa3da7 Mon Sep 17 00:00:00 2001
From: Sasindu Alahakoon <dilsharasasindu@gmail.com>
Date: Wed, 22 Oct 2025 15:20:44 +0530
Subject: [PATCH 01/10] Add basic Ai Search knowledgebase implementation

---
 ballerina/azure_ai_search_knowledgebase.bal | 712 ++++++++++++++++++++
 1 file changed, 712 insertions(+)
 create mode 100644 ballerina/azure_ai_search_knowledgebase.bal

diff --git a/ballerina/azure_ai_search_knowledgebase.bal b/ballerina/azure_ai_search_knowledgebase.bal
new file mode 100644
index 0000000..0dea4de
--- /dev/null
+++ b/ballerina/azure_ai_search_knowledgebase.bal
@@ -0,0 +1,712 @@
+// Copyright (c) 2025 WSO2 LLC (http://www.wso2.com).
+//
+// WSO2 LLC. licenses this file to you under the Apache License,
+// Version 2.0 (the "License"); you may not use this file except
+// in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import ballerina/ai;
+import ballerina/log;
+import ballerina/uuid;
+import ballerinax/azure.ai.search as search;
+import ballerinax/azure.ai.search.index;
+
+const CONTENT_FIELD_NAME = "content";
+const KEY_FIELD_NAME = "id";
+const API_VERSION = "2025-09-01";
+const API_KEY_HEADER_NAME = "api-key";
+
+# Information about the analyzed index schema
+type IndexSchemaInfo record {
+    # Name of the key field in the index
+    string keyFieldName;
+    # Names of vector fields that need embeddings
+    string[] vectorFieldNames;
+    # Names of content fields that are searchable
+    string[] contentFieldNames;
+    # Map of all fields in the index
+    map<search:SearchField> allFields;
+};
+
+# Configuration for the Azure AI Service Clients
+public type ClientConfiguration record {|
+    # Connection configuration for the Azure AI search client that use for create search index
+    # This configuration is only required when the `index` parameter
+    # is provided as an `search:SearchIndex` (i.e., when the system will create the index).
+    search:ConnectionConfig searchClientConnectionConfig = {};
+    # Connection configuration for the Azure AI index client that use for index operations
+    index:ConnectionConfig indexClientConnectionConfig = {};
+|};
+
+# Represents the Azure Search Knowledge Base implementation.
+# User should create the required `indexer`, `data source` and `index` beforehand using 
+# the util functions provided in this module. 
+# Currently search fields only supported with `id`, `content` and `type` field names.
+public distinct isolated class AzureAiSearchKnowledgeBase {
+    *ai:KnowledgeBase;
+    
+    private final search:SearchIndex index;
+    private final search:Client serviceClient;
+    private final index:Client indexClient;
+    private final string apiVersion;
+    private final string apiKey;
+    private final boolean verbose;
+    private final ai:Chunker|ai:AUTO|ai:DISABLE chunker;
+    private final ai:EmbeddingProvider embeddingModel;
+    private final string contentFieldName;
+    private final string keyFieldName;
+    private final string[] vectorFieldNames;
+    private final map<search:SearchField> allFields;
+
+    # Initializes a new `AzureAiSearchKnowledgeBase` instance.
+    # 
+    # + serviceUrl - The service URL of the Azure AI Search instance
+    # + apiKey - The API key for authenticating with the Azure AI Search service
+    # + index - The name of an existing search index or a `search:SearchIndex` definition to create
+    # + embeddingModel - The embedding model to use for generating embeddings
+    # + chunker - The chunker to use for chunking documents before ingestion. Defaults to `ai:AUTO`.
+    # + verbose - Whether to enable verbose logging. Defaults to `false`.
+    # + apiVersion - The API version to use for requests.
+    # + clientConfigurations - Additional client configurations for Azure AI Search clients
+    # + contentFieldName - The name of the field in the index that contains the main content. Defaults to "content".
+    # + return - An instance of `AzureAiSearchKnowledgeBase` or an `ai:Error` if initialization fails
+    public isolated function init(string serviceUrl, string apiKey, string|search:SearchIndex index, ai:EmbeddingProvider embeddingModel, 
+            ai:Chunker|ai:AUTO|ai:DISABLE chunker = ai:AUTO, boolean verbose = false, 
+            string apiVersion = API_VERSION, string contentFieldName = CONTENT_FIELD_NAME, 
+            *ClientConfiguration clientConfigurations) returns ai:Error? {
+        self.chunker = chunker;
+        self.embeddingModel = embeddingModel;
+        self.verbose = verbose;
+        self.contentFieldName = contentFieldName;
+        
+        // Initialize service client for management operations
+        search:ConnectionConfig searchClientConfig = clientConfigurations.searchClientConnectionConfig;
+        self.apiKey = apiKey;
+        self.apiVersion = apiVersion;
+
+        search:Client|error serviceClient = new search:Client(serviceUrl, searchClientConfig);
+        if serviceClient is error {
+            return error ai:Error("Failed to initialize Azure AI Service Client", serviceClient);
+        }
+
+        self.serviceClient = serviceClient;
+
+        string indexName = index is string ? index : index.name;
+        if index is string {
+            // Verify that the index exists
+            search:SearchIndex|error searchIndex = self.serviceClient->indexesGet(indexName, {
+                [API_KEY_HEADER_NAME]: self.apiKey}, {api\-version: self.apiVersion});
+            if searchIndex is error {
+                logIfVerboseEnable(self.verbose, string `Search index ${indexName} does not exist: ${searchIndex.message()}`);
+                return error ai:Error("Failed to verify existence of index", searchIndex);
+            }
+
+            self.index = searchIndex.cloneReadOnly();
+            logIfVerboseEnable(self.verbose, string `Search index ${indexName} exists. Details: ${searchIndex.toJsonString()}`);
+        } else {
+            logIfVerboseEnable(self.verbose, string `Attempting to create search index ${indexName}...`);
+            search:SearchIndex|error createdIndex = self.serviceClient->indexesCreateOrUpdate(indexName, {
+                [API_KEY_HEADER_NAME]: self.apiKey, Prefer: "return=representation"}, index, {api\-version: self.apiVersion});
+            if createdIndex is error {
+                logIfVerboseEnable(self.verbose, string `Failed to create search index ${indexName}: ${createdIndex.message()}`);
+                return error ai:Error("Failed to create search index", createdIndex);
+            }
+            self.index = createdIndex.cloneReadOnly();
+            logIfVerboseEnable(self.verbose, string `Search index ${indexName} created successfully.`);
+        }
+
+        string indexServiceUrl = string `${serviceUrl}/indexes('${indexName}')`;
+        logIfVerboseEnable(self.verbose, string `Initializing Azure Index Client for index URL: ${indexServiceUrl}`);
+        index:Client|error indexClient = new (indexServiceUrl, clientConfigurations.indexClientConnectionConfig);
+        if indexClient is error {
+            logIfVerboseEnable(self.verbose, string `Failed to initialize Azure Index Client: ${indexClient.message()}`);
+            return error ai:Error("Failed to initialize Azure Index Client", indexClient);
+        }
+        self.indexClient = indexClient;
+
+        lock {
+            IndexSchemaInfo schemaInfo = check analyzeIndexSchema(self.verbose, self.index, self.contentFieldName);
+
+            self.keyFieldName = schemaInfo.keyFieldName;
+            self.vectorFieldNames = schemaInfo.vectorFieldNames.cloneReadOnly();
+            self.allFields = schemaInfo.allFields.cloneReadOnly();
+        }
+    }
+
+    # Ingests documents into the Azure search knowledge base.
+    # + documents - The documents or chunks to ingest (single document, array of documents, or array of chunks)
+    # + return - An `ai:Error` if ingestion fails, otherwise `nil`
+    public isolated function ingest(ai:Chunk[]|ai:Document[]|ai:Document documents) returns ai:Error? {
+        lock {
+            ai:Chunk[]|ai:Error chunks = self.chunk(documents.clone());
+            if chunks is ai:Error {
+                logIfVerboseEnable(self.verbose, string `Failed to chunk documents: ${chunks.message()}}`, chunks);
+                return error ai:Error("Failed to chunk documents before ingestion", chunks);
+            }
+
+            ai:Embedding[]|error embeddings = self.embeddingModel->batchEmbed(chunks);
+            if embeddings is error {
+                logIfVerboseEnable(self.verbose, string `Failed to generate embeddings for documents: ${embeddings.message()}}`, embeddings);
+                return error ai:Error("Failed to generate embeddings for documents", embeddings);
+            }
+            logIfVerboseEnable(self.verbose, string `Generated embeddings for ${embeddings.length().toString()} chunks.`);
+
+            index:IndexDocumentsResult|error uploadResult = self.uploadDocuments(self.indexClient, chunks, self.index, 
+                    embeddings, {[API_KEY_HEADER_NAME]: self.apiKey}, {api\-version: self.apiVersion});
+            if uploadResult is error {
+                logIfVerboseEnable(self.verbose, string `Failed to upload documents to search index: ${uploadResult.message()}}`, uploadResult);
+                return error ai:Error("Failed to upload documents to search index", uploadResult);
+            }
+            
+            // Validate that all documents were successfully indexed
+            foreach index:IndexingResult result in uploadResult.value {
+                if !result.status {
+                    return error ai:Error(string `Failed to index document with key ${result.'key}: ${result.errorMessage ?: "Unknown error"}`);
+                }
+            }
+            
+            return;
+        }
+    }
+
+    # Retrieves relevant chunks for the given query using vector search.
+    #
+    # + query - The text query to search for
+    # + maxLimit - The maximum number of items to return
+    # + filters - Optional metadata filters to apply during retrieval
+    # + return - An array of matching chunks with similarity scores, or an `ai:Error` if retrieval fails
+    public isolated function retrieve(string query, int maxLimit = 10, ai:MetadataFilters? filters = ()) returns ai:QueryMatch[]|ai:Error {
+        if query is "" {
+            return error ai:Error("Query cannot be empty for retrieval");
+        }
+
+        if maxLimit != -1 && maxLimit <= 0 {
+            return error ai:Error("maxLimit must be a positive integer");
+        }
+
+        if maxLimit > int:SIGNED32_MAX_VALUE {
+            return error ai:Error(string `maxLimit exceeds maximum allowed value of ${int:SIGNED32_MAX_VALUE}`);
+        }
+
+        lock {
+            ai:TextChunk queryChunk = {content: query, 'type: "text-chunk"};
+            ai:Embedding queryEmbedding = check self.embeddingModel->embed(queryChunk);
+
+            // Create vector search request using Azure AI Search's integrated vectorization
+            int vectorFieldLength = self.vectorFieldNames.length();
+            index:VectorQuery[]? vectorQuery = ();
+
+            if vectorFieldLength != 0 {
+                ai:Vector|ai:Error vectors = self.generateVector(queryEmbedding);
+                if vectors is ai:Error {
+                    return vectors;
+                }
+
+                vectorQuery = [
+                    {
+                        kind: "vector",
+                        k: maxLimit == -1 ? () : <int:Signed32>maxLimit,
+                        fields: string:'join(",", ...self.vectorFieldNames),
+                        "vector": vectors
+                    }
+                ];
+            }
+
+            index:SearchRequest searchRequest = {
+                search: query,
+                'select: "*",
+                vectorQueries: vectorQuery ?: [],
+                top: maxLimit == -1 ? () : <int:Signed32>maxLimit
+            };
+
+            // Apply metadata filters if provided
+            if filters is ai:MetadataFilters {
+                string? filterExpression = self.buildODataFilter(filters.cloneReadOnly());
+                if filterExpression is string {
+                    searchRequest.filter = filterExpression;
+                }
+            }
+
+            // Execute search
+            index:SearchDocumentsResult|error searchResult = self.indexClient->documentsSearchPost(
+                searchRequest,
+                {[API_KEY_HEADER_NAME]: self.apiKey},
+                api\-version = self.apiVersion
+            );
+
+            if searchResult is error {
+                logIfVerboseEnable(self.verbose, string `Failed to retrieve documents from Azure AI Search: ${searchResult.message()}}`, searchResult);
+                return error ai:Error("Failed to retrieve documents from Azure AI Search", searchResult);
+            }
+
+            // Convert search results to QueryMatch array
+            ai:QueryMatch[] matches = [];
+            foreach index:SearchResult result in searchResult.value {
+                ai:Chunk chunk = {
+                    'type: "text-chunk",
+                    content: self.getFieldValue(result, self.contentFieldName),
+                    metadata: self.extractMetadata(result)
+                };
+                
+                ai:QueryMatch queryMatch = {
+                    chunk: chunk,
+                    similarityScore: <float>result.\@search\.score
+                };
+                matches.push(queryMatch);
+            }
+
+            return matches.cloneReadOnly();
+        }
+    }
+
+    # Deletes chunks that match the given metadata filters.
+    #
+    # + filters - The metadata filters used to identify which chunks to delete
+    # + return - An `ai:Error` if the deletion fails, otherwise `nil`
+    public isolated function deleteByFilter(ai:MetadataFilters filters) returns ai:Error? {
+        ai:MetadataFilters filtersCopy = filters.cloneReadOnly();
+        // First, search for documents matching the filters
+        string? filterExpression = self.buildODataFilter(filtersCopy);
+
+        index:SearchRequest searchRequest = {
+            filter: filterExpression,
+            'select: self.keyFieldName
+            // TODO: Implement batching if large number of documents expected
+        };
+
+        index:SearchDocumentsResult|error searchResult = self.indexClient->documentsSearchPost(
+            searchRequest,
+            {[API_KEY_HEADER_NAME]: self.apiKey},
+            api\-version = self.apiVersion
+        );
+
+        if searchResult is error {
+            logIfVerboseEnable(self.verbose, string `Failed to search for documents to delete: ${searchResult.message()}}`, searchResult);
+            return error ai:Error("Failed to search for documents to delete", searchResult);
+        }
+
+        // Extract document IDs
+        string[] documentIds = [];
+        foreach index:SearchResult result in searchResult.value {
+            string? documentId = self.getFieldValue(result, self.keyFieldName);
+            if documentId is string {
+                documentIds.push(documentId);
+            }
+        }
+
+        if documentIds.length() == 0 {
+            return; // No documents found matching the filters
+        }
+
+        // Create delete actions
+        index:IndexAction[] deleteActions = [];
+        foreach string docId in documentIds {
+            index:IndexAction deleteAction = {
+                \@search\.action: "delete"
+            };
+            // Set the key field for deletion
+            deleteAction[self.keyFieldName] = docId;
+            deleteActions.push(deleteAction);
+        }
+
+        // Execute batch delete
+        index:IndexBatch deleteBatch = {
+            value: deleteActions
+        };
+
+        index:IndexDocumentsResult|error deleteResult = self.indexClient->documentsIndex(
+            deleteBatch,
+            {[API_KEY_HEADER_NAME]: self.apiKey},
+            api\-version = self.apiVersion
+        );
+
+        if deleteResult is error {
+            return error ai:Error("Failed to delete documents from Azure AI Search", deleteResult);
+        }
+
+        // Check for any failures in the delete operation
+        foreach index:IndexingResult result in deleteResult.value {
+            if !result.status {
+                return error ai:Error(string `Failed to delete document with key ${result.'key}: ${result.errorMessage ?: "Unknown error"}`);
+            }
+        }
+
+        return;
+    }
+    
+    private isolated function buildODataFilter(ai:MetadataFilters filters) returns string? {
+        return self.convertFiltersToOData(filters);
+    }
+    
+    private isolated function convertFiltersToOData(ai:MetadataFilters|ai:MetadataFilter node) returns string? {
+        if node is ai:MetadataFilter {
+            return self.convertSingleFilterToOData(node);
+        }
+        
+        // Handle MetadataFilters with multiple filters
+        string[] filterExpressions = [];
+        foreach ai:MetadataFilters|ai:MetadataFilter child in node.filters {
+            string? childExpression = self.convertFiltersToOData(child);
+            if childExpression is string {
+                filterExpressions.push(childExpression);
+            }
+        }
+        
+        if filterExpressions.length() == 0 {
+            return ();
+        }
+        
+        if filterExpressions.length() == 1 {
+            return filterExpressions[0];
+        }
+        
+        // Combine filters with the appropriate logical operator
+        string logicalOperator = node.condition == ai:AND ? " and " : " or ";
+        return string `(${string:'join(logicalOperator, ...filterExpressions)})`;
+    }
+    
+    private isolated function convertSingleFilterToOData(ai:MetadataFilter filter) returns string? {
+        string fieldName = filter.key;
+        json value = filter.value;
+        ai:MetadataFilterOperator operator = filter.operator;
+        
+        match operator {
+            ai:EQUAL => {
+                return self.buildEqualityFilter(fieldName, value);
+            }
+            ai:NOT_EQUAL => {
+                return self.buildInequalityFilter(fieldName, value);
+            }
+            ai:IN => {
+                return self.buildInFilter(fieldName, value);
+            }
+            ai:NOT_IN => {
+                return self.buildNotInFilter(fieldName, value);
+            }
+            ai:GREATER_THAN => {
+                return self.buildComparisonFilter(fieldName, value, "gt");
+            }
+            ai:LESS_THAN => {
+                return self.buildComparisonFilter(fieldName, value, "lt");
+            }
+            ai:GREATER_THAN_OR_EQUAL => {
+                return self.buildComparisonFilter(fieldName, value, "ge");
+            }
+            ai:LESS_THAN_OR_EQUAL => {
+                return self.buildComparisonFilter(fieldName, value, "le");
+            }
+            _ => {
+                return (); // Unsupported operator
+            }
+        }
+    }
+    
+    private isolated function buildEqualityFilter(string fieldName, json value) returns string? {
+        string? formattedValue = self.formatValueForOData(value);
+        if formattedValue is string {
+            return string `${fieldName} eq ${formattedValue}`;
+        }
+        return ();
+    }
+    
+    private isolated function buildInequalityFilter(string fieldName, json value) returns string? {
+        string? formattedValue = self.formatValueForOData(value);
+        if formattedValue is string {
+            return string `${fieldName} ne ${formattedValue}`;
+        }
+        return ();
+    }
+
+    private isolated function buildInFilter(string fieldName, json value) returns string? {
+        if value is json[] && value.length() > 0 {
+            string[] conditions = [];
+            foreach json item in value {
+                string? formattedValue = self.formatValueForOData(item);
+                if formattedValue is string {
+                    conditions.push(string `${fieldName} eq ${formattedValue}`);
+                }
+            }
+            if conditions.length() > 0 {
+                return "(" + string:'join(" or ", ...conditions) + ")";
+            }
+        }
+        return ();
+    }
+
+    private isolated function buildNotInFilter(string fieldName, json value) returns string? {
+        if value is json[] && value.length() > 0 {
+            string[] conditions = [];
+            foreach json item in value {
+                string? formattedValue = self.formatValueForOData(item);
+                if formattedValue is string {
+                    conditions.push(string `${fieldName} ne ${formattedValue}`);
+                }
+            }
+            if conditions.length() > 0 {
+                return "(" + string:'join(" and ", ...conditions) + ")";
+            }
+        }
+        return ();
+    }
+    
+    private isolated function buildComparisonFilter(string fieldName, json value, string odataOperator) returns string? {
+        string? formattedValue = self.formatValueForOData(value);
+        if formattedValue is string {
+            return string `${fieldName} ${odataOperator} ${formattedValue}`;
+        }
+        return ();
+    }
+    
+    private isolated function formatValueForOData(json value) returns string? {
+        if value is string {
+            // Escape single quotes in strings and wrap in single quotes
+            string escapedValue = re `'`.replaceAll(value, "''");
+            return string `'${escapedValue}'`;
+        } else if value is int|decimal {
+            return value.toString();
+        } else if value is boolean {
+            return value.toString();
+        }
+        // For other types (like null), return null to indicate unsupported
+        return ();
+    }
+    
+    private isolated function getFieldValue(index:SearchResult result, string fieldName) returns string {
+        anydata fieldValue = result[fieldName];
+        if fieldValue is string {
+            return fieldValue;
+        }
+        if fieldValue is () {
+            logIfVerboseEnable(self.verbose, string `Field ${fieldName} is null in search result.`);
+            return "";
+        }
+        // Handle other types if they are possible content
+        return fieldValue.toString();
+    }
+
+    private isolated function extractMetadata(index:SearchResult result) returns ai:Metadata {
+        lock {
+            ai:Metadata metadata = {};
+
+            // Extract all fields except the core content/title fields as metadata
+            map<anydata> clonedResult = result.cloneReadOnly();
+            foreach string k in clonedResult.keys() {
+                anydata value = clonedResult[k];
+                if k != self.contentFieldName && k != self.keyFieldName && self.vectorFieldNames.indexOf(k) == () &&
+                k != "@search.score" && k != "@search.highlights" {
+                    if value is json {
+                        metadata[k] = value;
+                    }
+                }
+            }
+            
+            return metadata.cloneReadOnly();
+        }
+    }
+
+    private isolated function chunk(ai:Document|ai:Document[]|ai:Chunk[] input) returns ai:Chunk[]|ai:Error {
+        (ai:Document|ai:Chunk)[] inputs = input is ai:Document[]|ai:Chunk[] ? input : [input];
+        ai:Chunker|ai:AUTO|ai:DISABLE chunker = self.chunker;
+        if chunker is ai:DISABLE {
+            return inputs;
+        }
+        ai:Chunk[] chunks = [];
+        foreach ai:Document|ai:Chunk item in inputs {
+            ai:Chunker chunkerToUse = chunker is ai:Chunker ? chunker : guessChunker(item);
+            chunks.push(...check chunkerToUse.chunk(item));
+        }
+        return chunks;
+    }
+
+    private isolated function uploadDocuments(
+        index:Client 'client,
+        (ai:Document|ai:Chunk)[] documents,
+        search:SearchIndex index,
+        ai:Embedding[]? embeddings = (),
+        index:DocumentsIndexHeaders headers = {},
+        index:DocumentsIndexQueries queries = {api\-version: API_VERSION}
+    ) returns index:IndexDocumentsResult|error {
+        if embeddings is ai:Embedding[] && embeddings.length() != documents.length() {
+            return error ai:Error("Embeddings count does not match documents count, Embeddings length: " +
+                string `${embeddings.length()}, Documents length: ${documents.length()}`);
+        }
+
+        lock {
+            index:IndexAction[] indexActions = [];
+            (ai:Document|ai:Chunk)[] & readonly docs = documents.cloneReadOnly();
+            ai:Embedding[]? embeddingValues = embeddings.cloneReadOnly();
+            foreach int i in 0..<docs.length() {
+                (ai:Document|ai:Chunk) doc = docs[i];
+                
+                // Start with the basic action structure
+                index:IndexAction indexAction = {
+                    \@search\.action: "mergeOrUpload"
+                };
+
+                // Set the key field with a UUID
+                // TODO: handle non-string key fields
+                ai:Metadata? metadata = doc.metadata;
+                string keyValue = metadata !is () && metadata.hasKey(self.keyFieldName)
+                    ? doc.metadata[self.keyFieldName].toString() + i.toString()
+                    : uuid:createType1AsString();
+                    
+                indexAction[self.keyFieldName] = keyValue;
+                logIfVerboseEnable(
+                    self.verbose, string `Set key field ${self.keyFieldName} to value ${keyValue} for document index ${i}.`);
+
+                // Add embeddings to vector fields if available
+                if embeddingValues is ai:Embedding[] {
+                    ai:Embedding embedding = embeddingValues[i];
+                    foreach string vectorFieldName in self.vectorFieldNames {
+                        ai:Vector|ai:Error vectors = self.generateVector(embedding);
+                        if vectors is ai:Error {
+                            logIfVerboseEnable(
+                                self.verbose, string `Failed to generate vector for document index ${i} and field ${vectorFieldName}: ${vectors.message()}`);
+                            return vectors;
+                        }
+
+                        indexAction[vectorFieldName] = vectors;
+                        logIfVerboseEnable(
+                            self.verbose, string `Added vector for document index ${i} to field ${vectorFieldName}.`);
+                    }
+                }
+                
+                indexAction[self.contentFieldName] = doc.content;
+                logIfVerboseEnable(
+                    self.verbose, string `Added content for document index ${i} to field ${self.contentFieldName}.`);
+
+                // Add document type if there's a field for it (check if "type" field exists)
+                if self.allFields.hasKey("type") {
+                    indexAction["type"] = doc.'type;
+                }
+
+                // Add metadata fields
+                if metadata is ai:Metadata {
+                    foreach [string, json] [key, value] in metadata.entries() {
+                        boolean isPossibleMetadata = key != self.keyFieldName && key != self.contentFieldName 
+                                && self.vectorFieldNames.indexOf(key) == ();
+                        // Only add metadata if the field exists in the index schema
+                        if self.allFields.hasKey(key) && isPossibleMetadata {
+                            indexAction[key] = value;
+                        } else {
+                            if isPossibleMetadata {
+                                logIfVerboseEnable(
+                                    self.verbose, string `Skipping field ${key} as it does not exist in index schema.`);
+                            }
+                        }
+                    }
+                }
+
+                indexActions.push(indexAction);
+            }
+
+            index:IndexBatch batch = {
+                value: indexActions
+            };
+
+            logIfVerboseEnable(self.verbose, string `Uploading ${indexActions.length().toString()} documents to Azure AI Search index ${index.name}.`);
+            return 'client->documentsIndex(batch.cloneReadOnly(), headers.cloneReadOnly(), queries.cloneReadOnly());
+        }
+    }
+
+    private isolated function generateVector(ai:Embedding embedding) returns ai:Vector|ai:Error {
+        if embedding is ai:Vector {
+            return embedding;
+        } else if embedding is ai:HybridVector {
+            // Return the dense part, discard sparse
+            return embedding.dense;
+        } else {
+            // Explicitly fail for sparse-only embeddings
+            return error ai:Error("AzureAiSearchKnowledgeBase only supports dense or hybrid embeddings, but received a SparseVector.");
+        }
+    }
+}
+
+isolated function logIfVerboseEnable(boolean verbose, string value, 'error? err = ()) {
+    if verbose {
+        log:printInfo(string `[AzureAiSearchKnowledgeBase] ${value}`);
+        if err is error {
+            log:printError(string `[AzureAiSearchKnowledgeBase] Error Details: ${err.message()}`, err);
+        }
+    }
+}
+
+isolated function guessChunker(ai:Document|ai:Chunk doc) returns ai:Chunker {
+    // Guess the chunker based on the document type or mimeType in metadata
+    string? mimeType = doc.metadata?.mimeType;
+    if mimeType == "text/markdown" {
+        return new ai:MarkdownChunker();
+    }
+    if mimeType == "text/html" {
+        return new ai:HtmlChunker();
+    }
+    // Fallback to file name
+    string? fileName = doc.metadata?.fileName;
+    if fileName is string {
+        if fileName.endsWith(".md") {
+            return new ai:MarkdownChunker();
+        }
+        if fileName.endsWith(".html") {
+            return new ai:HtmlChunker();
+        }
+    }
+    return new ai:GenericRecursiveChunker();
+}
+
+isolated function analyzeIndexSchema(boolean verbose, search:SearchIndex index, string contentFieldName) returns IndexSchemaInfo|ai:Error {
+    string? keyFieldName = ();
+    string[] vectorFieldNames = [];
+    string[] contentFieldNames = [];
+    map<search:SearchField> allFields = {};
+    
+    foreach search:SearchField indexField in index.fields {
+        allFields[indexField.name] = indexField;
+        
+        // Identify key field
+        if indexField.'key == true {
+            keyFieldName = indexField.name;
+        }
+        
+        // Identify vector fields (fields with dimensions and vector search profile)
+        if indexField?.dimensions is int && indexField?.vectorSearchProfile is string {
+            vectorFieldNames.push(indexField.name);
+        }
+        
+        // Identify potential content fields (searchable string fields)
+        if indexField.name == contentFieldName {
+            contentFieldNames.push(indexField.name);
+        }
+    }
+
+    if vectorFieldNames.length() == 0 {
+        logIfVerboseEnable(verbose, "No vector fields found in index schema.");
+    }
+
+    if contentFieldNames.length() == 0 {
+        return error(string `Index schema must contains a field named '${contentFieldName}'.`);
+    }
+
+    if keyFieldName is () {
+        logIfVerboseEnable(verbose, string `No key field defined in index schema. Using default key field name as '${KEY_FIELD_NAME}'.`);
+    }
+
+    if vectorFieldNames.length() > 1 {
+        logIfVerboseEnable(verbose, string `Multiple vector fields found in index schema: ${string:'join(", ", ...vectorFieldNames)}. Currently one vecotr field is prefered. So for now, there is more than one, all the vector fileds will share the same vectors.`);
+    }
+    
+    return {
+        keyFieldName: keyFieldName ?: KEY_FIELD_NAME,
+        vectorFieldNames: vectorFieldNames,
+        contentFieldNames: contentFieldNames,
+        allFields: allFields
+    };
+}

From 291a240b57488d1f739d747747b4cfc26e942fd1 Mon Sep 17 00:00:00 2001
From: Sasindu Alahakoon <dilsharasasindu@gmail.com>
Date: Wed, 22 Oct 2025 15:31:44 +0530
Subject: [PATCH 02/10] [Automated] Update the toml files

---
 ballerina/Ballerina.toml    | 12 +++++++++
 ballerina/Dependencies.toml | 52 +++++++++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/ballerina/Ballerina.toml b/ballerina/Ballerina.toml
index 0fe26e5..0bb7100 100644
--- a/ballerina/Ballerina.toml
+++ b/ballerina/Ballerina.toml
@@ -17,3 +17,15 @@ groupId = "io.ballerina.lib"
 artifactId = "ai.azure-native"
 version = "1.2.0"
 path = "../native/build/libs/ai.azure-native-1.2.0-SNAPSHOT.jar"
+
+[[dependency]]
+org="ballerinax"
+name="azure.ai.search"
+version="1.0.0" 
+repository="local"
+
+[[dependency]]
+org="ballerinax"
+name="azure.ai.search.index"
+version="1.0.0" 
+repository="local"
diff --git a/ballerina/Dependencies.toml b/ballerina/Dependencies.toml
index d47d55d..447bb9d 100644
--- a/ballerina/Dependencies.toml
+++ b/ballerina/Dependencies.toml
@@ -73,7 +73,7 @@ modules = [
 [[package]]
 org = "ballerina"
 name = "crypto"
-version = "2.9.1"
+version = "2.9.2"
 dependencies = [
 	{org = "ballerina", name = "jballerina.java"},
 	{org = "ballerina", name = "time"}
@@ -82,7 +82,7 @@ dependencies = [
 [[package]]
 org = "ballerina"
 name = "data.jsondata"
-version = "1.1.2"
+version = "1.1.3"
 dependencies = [
 	{org = "ballerina", name = "jballerina.java"},
 	{org = "ballerina", name = "lang.object"}
@@ -91,7 +91,7 @@ dependencies = [
 [[package]]
 org = "ballerina"
 name = "data.xmldata"
-version = "1.5.0"
+version = "1.5.2"
 dependencies = [
 	{org = "ballerina", name = "jballerina.java"},
 	{org = "ballerina", name = "lang.object"}
@@ -262,13 +262,16 @@ dependencies = [
 [[package]]
 org = "ballerina"
 name = "log"
-version = "2.13.0"
+version = "2.14.0"
 dependencies = [
 	{org = "ballerina", name = "io"},
 	{org = "ballerina", name = "jballerina.java"},
 	{org = "ballerina", name = "lang.value"},
 	{org = "ballerina", name = "observe"}
 ]
+modules = [
+	{org = "ballerina", packageName = "log", moduleName = "log"}
+]
 
 [[package]]
 org = "ballerina"
@@ -278,7 +281,7 @@ version = "1.2.0"
 [[package]]
 org = "ballerina"
 name = "mcp"
-version = "1.0.0"
+version = "1.0.1"
 dependencies = [
 	{org = "ballerina", name = "http"},
 	{org = "ballerina", name = "jballerina.java"},
@@ -376,6 +379,9 @@ dependencies = [
 	{org = "ballerina", name = "lang.int"},
 	{org = "ballerina", name = "time"}
 ]
+modules = [
+	{org = "ballerina", packageName = "uuid", moduleName = "uuid"}
+]
 
 [[package]]
 org = "ballerina"
@@ -408,7 +414,11 @@ dependencies = [
 	{org = "ballerina", name = "http"},
 	{org = "ballerina", name = "jballerina.java"},
 	{org = "ballerina", name = "lang.array"},
+	{org = "ballerina", name = "log"},
 	{org = "ballerina", name = "test"},
+	{org = "ballerina", name = "uuid"},
+	{org = "ballerinax", name = "azure.ai.search"},
+	{org = "ballerinax", name = "azure.ai.search.index"},
 	{org = "ballerinax", name = "azure.openai.chat"},
 	{org = "ballerinax", name = "azure.openai.embeddings"}
 ]
@@ -416,6 +426,38 @@ modules = [
 	{org = "ballerinax", packageName = "ai.azure", moduleName = "ai.azure"}
 ]
 
+[[package]]
+org = "ballerinax"
+name = "azure.ai.search"
+version = "1.0.0"
+dependencies = [
+	{org = "ballerina", name = "data.jsondata"},
+	{org = "ballerina", name = "http"},
+	{org = "ballerina", name = "log"},
+	{org = "ballerina", name = "url"},
+	{org = "ballerina", name = "uuid"},
+	{org = "ballerinai", name = "observe"}
+]
+modules = [
+	{org = "ballerinax", packageName = "azure.ai.search", moduleName = "azure.ai.search"}
+]
+
+[[package]]
+org = "ballerinax"
+name = "azure.ai.search.index"
+version = "1.0.0"
+dependencies = [
+	{org = "ballerina", name = "constraint"},
+	{org = "ballerina", name = "data.jsondata"},
+	{org = "ballerina", name = "http"},
+	{org = "ballerina", name = "log"},
+	{org = "ballerina", name = "url"},
+	{org = "ballerinai", name = "observe"}
+]
+modules = [
+	{org = "ballerinax", packageName = "azure.ai.search.index", moduleName = "azure.ai.search.index"}
+]
+
 [[package]]
 org = "ballerinax"
 name = "azure.openai.chat"

From cf93a111d57f637791f7b4338c7a63d458be819b Mon Sep 17 00:00:00 2001
From: Sasindu Alahakoon <dilsharasasindu@gmail.com>
Date: Wed, 22 Oct 2025 15:48:04 +0530
Subject: [PATCH 03/10] [Automated] Update the toml files

---
 ballerina/Dependencies.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ballerina/Dependencies.toml b/ballerina/Dependencies.toml
index 447bb9d..77971fd 100644
--- a/ballerina/Dependencies.toml
+++ b/ballerina/Dependencies.toml
@@ -111,7 +111,7 @@ dependencies = [
 [[package]]
 org = "ballerina"
 name = "http"
-version = "2.14.6"
+version = "2.14.7"
 dependencies = [
 	{org = "ballerina", name = "auth"},
 	{org = "ballerina", name = "cache"},

From a849984c82a175e536387301de2ffc546c97cc28 Mon Sep 17 00:00:00 2001
From: Sasindu Alahakoon <dilsharasasindu@gmail.com>
Date: Thu, 23 Oct 2025 12:51:38 +0530
Subject: [PATCH 04/10] refactor the knowledgebase implementation

---
 ballerina/azure_ai_search_knowledgebase.bal | 533 ++++++++++++--------
 1 file changed, 332 insertions(+), 201 deletions(-)

diff --git a/ballerina/azure_ai_search_knowledgebase.bal b/ballerina/azure_ai_search_knowledgebase.bal
index 0dea4de..fa84980 100644
--- a/ballerina/azure_ai_search_knowledgebase.bal
+++ b/ballerina/azure_ai_search_knowledgebase.bal
@@ -22,9 +22,46 @@ import ballerinax/azure.ai.search.index;
 
 const CONTENT_FIELD_NAME = "content";
 const KEY_FIELD_NAME = "id";
-const API_VERSION = "2025-09-01";
+const AI_AZURE_KNOWLEDGEBASE_API_VERSION = "2025-09-01";
 const API_KEY_HEADER_NAME = "api-key";
 
+// Search action constants
+const SEARCH_ACTION_MERGE_OR_UPLOAD = "mergeOrUpload";
+const SEARCH_ACTION_DELETE = "delete";
+
+// Vector search constants
+const VECTOR_QUERY_KIND = "vector";
+
+// Content type constants
+const CONTENT_TYPE_TEXT_CHUNK = "text-chunk";
+const MIME_TYPE_MARKDOWN = "text/markdown";
+const MIME_TYPE_HTML = "text/html";
+
+// File extension constants
+const FILE_EXT_MARKDOWN = ".md";
+const FILE_EXT_HTML = ".html";
+
+// Search field constants
+const SEARCH_SCORE_FIELD = "@search.score";
+const SEARCH_HIGHLIGHTS_FIELD = "@search.highlights";
+const SEARCH_ACTION_FIELD = "@search.action";
+
+// OData operator constants
+const ODATA_OPERATOR_GT = "gt";
+const ODATA_OPERATOR_LT = "lt";
+const ODATA_OPERATOR_GE = "ge";
+const ODATA_OPERATOR_LE = "le";
+const ODATA_OPERATOR_EQ = "eq";
+const ODATA_OPERATOR_NE = "ne";
+const ODATA_OPERATOR_AND = " and ";
+const ODATA_OPERATOR_OR = " or ";
+
+// Preference header constants
+const PREFER_HEADER_RETURN_REPRESENTATION = "return=representation";
+
+// Default field names
+const DEFAULT_TYPE_FIELD_NAME = "type";
+
 # Information about the analyzed index schema
 type IndexSchemaInfo record {
     # Name of the key field in the index
@@ -38,7 +75,7 @@ type IndexSchemaInfo record {
 };
 
 # Configuration for the Azure AI Service Clients
-public type ClientConfiguration record {|
+public type AzureAiSearchKnowledgeBaseClientConfiguration record {|
     # Connection configuration for the Azure AI search client that use for create search index
     # This configuration is only required when the `index` parameter
     # is provided as an `search:SearchIndex` (i.e., when the system will create the index).
@@ -71,7 +108,8 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
     # 
     # + serviceUrl - The service URL of the Azure AI Search instance
     # + apiKey - The API key for authenticating with the Azure AI Search service
-    # + index - The name of an existing search index or a `search:SearchIndex` definition to create
+    # + index - The name of an existing search index or a `search:SearchIndex` definition to create,
+    #   When creating a new index, ensure that it contains one key field of type string.
     # + embeddingModel - The embedding model to use for generating embeddings
     # + chunker - The chunker to use for chunking documents before ingestion. Defaults to `ai:AUTO`.
     # + verbose - Whether to enable verbose logging. Defaults to `false`.
@@ -81,8 +119,8 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
     # + return - An instance of `AzureAiSearchKnowledgeBase` or an `ai:Error` if initialization fails
     public isolated function init(string serviceUrl, string apiKey, string|search:SearchIndex index, ai:EmbeddingProvider embeddingModel, 
             ai:Chunker|ai:AUTO|ai:DISABLE chunker = ai:AUTO, boolean verbose = false, 
-            string apiVersion = API_VERSION, string contentFieldName = CONTENT_FIELD_NAME, 
-            *ClientConfiguration clientConfigurations) returns ai:Error? {
+            string apiVersion = AI_AZURE_KNOWLEDGEBASE_API_VERSION, string contentFieldName = CONTENT_FIELD_NAME, 
+            *AzureAiSearchKnowledgeBaseClientConfiguration clientConfigurations) returns ai:Error? {
         self.chunker = chunker;
         self.embeddingModel = embeddingModel;
         self.verbose = verbose;
@@ -115,7 +153,7 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
         } else {
             logIfVerboseEnable(self.verbose, string `Attempting to create search index ${indexName}...`);
             search:SearchIndex|error createdIndex = self.serviceClient->indexesCreateOrUpdate(indexName, {
-                [API_KEY_HEADER_NAME]: self.apiKey, Prefer: "return=representation"}, index, {api\-version: self.apiVersion});
+                [API_KEY_HEADER_NAME]: self.apiKey, Prefer: PREFER_HEADER_RETURN_REPRESENTATION}, index, {api\-version: self.apiVersion});
             if createdIndex is error {
                 logIfVerboseEnable(self.verbose, string `Failed to create search index ${indexName}: ${createdIndex.message()}`);
                 return error ai:Error("Failed to create search index", createdIndex);
@@ -198,7 +236,7 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
         }
 
         lock {
-            ai:TextChunk queryChunk = {content: query, 'type: "text-chunk"};
+            ai:TextChunk queryChunk = {content: query, 'type: CONTENT_TYPE_TEXT_CHUNK};
             ai:Embedding queryEmbedding = check self.embeddingModel->embed(queryChunk);
 
             // Create vector search request using Azure AI Search's integrated vectorization
@@ -206,14 +244,14 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
             index:VectorQuery[]? vectorQuery = ();
 
             if vectorFieldLength != 0 {
-                ai:Vector|ai:Error vectors = self.generateVector(queryEmbedding);
+                ai:Vector|ai:Error vectors = generateVectorFromEmbedding(queryEmbedding);
                 if vectors is ai:Error {
                     return vectors;
                 }
 
                 vectorQuery = [
                     {
-                        kind: "vector",
+                        kind: VECTOR_QUERY_KIND,
                         k: maxLimit == -1 ? () : <int:Signed32>maxLimit,
                         fields: string:'join(",", ...self.vectorFieldNames),
                         "vector": vectors
@@ -252,9 +290,9 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
             ai:QueryMatch[] matches = [];
             foreach index:SearchResult result in searchResult.value {
                 ai:Chunk chunk = {
-                    'type: "text-chunk",
-                    content: self.getFieldValue(result, self.contentFieldName),
-                    metadata: self.extractMetadata(result)
+                    'type: CONTENT_TYPE_TEXT_CHUNK,
+                    content: extractFieldValue(result, self.contentFieldName, self.verbose),
+                    metadata: extractMetadataFromResult(result, self.contentFieldName, self.keyFieldName, self.vectorFieldNames)
                 };
                 
                 ai:QueryMatch queryMatch = {
@@ -297,7 +335,7 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
         // Extract document IDs
         string[] documentIds = [];
         foreach index:SearchResult result in searchResult.value {
-            string? documentId = self.getFieldValue(result, self.keyFieldName);
+            string? documentId = extractFieldValue(result, self.keyFieldName, self.verbose);
             if documentId is string {
                 documentIds.push(documentId);
             }
@@ -311,7 +349,7 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
         index:IndexAction[] deleteActions = [];
         foreach string docId in documentIds {
             index:IndexAction deleteAction = {
-                \@search\.action: "delete"
+                \@search\.action: SEARCH_ACTION_DELETE
             };
             // Set the key field for deletion
             deleteAction[self.keyFieldName] = docId;
@@ -370,7 +408,7 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
         }
         
         // Combine filters with the appropriate logical operator
-        string logicalOperator = node.condition == ai:AND ? " and " : " or ";
+        string logicalOperator = node.condition == ai:AND ? ODATA_OPERATOR_AND : ODATA_OPERATOR_OR;
         return string `(${string:'join(logicalOperator, ...filterExpressions)})`;
     }
     
@@ -381,28 +419,28 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
         
         match operator {
             ai:EQUAL => {
-                return self.buildEqualityFilter(fieldName, value);
+                return buildEqualityFilter(fieldName, value);
             }
             ai:NOT_EQUAL => {
-                return self.buildInequalityFilter(fieldName, value);
+                return buildInequalityFilter(fieldName, value);
             }
             ai:IN => {
-                return self.buildInFilter(fieldName, value);
+                return buildInFilter(fieldName, value);
             }
             ai:NOT_IN => {
-                return self.buildNotInFilter(fieldName, value);
+                return buildNotInFilter(fieldName, value);
             }
             ai:GREATER_THAN => {
-                return self.buildComparisonFilter(fieldName, value, "gt");
+                return buildComparisonFilter(fieldName, value, ODATA_OPERATOR_GT);
             }
             ai:LESS_THAN => {
-                return self.buildComparisonFilter(fieldName, value, "lt");
+                return buildComparisonFilter(fieldName, value, ODATA_OPERATOR_LT);
             }
             ai:GREATER_THAN_OR_EQUAL => {
-                return self.buildComparisonFilter(fieldName, value, "ge");
+                return buildComparisonFilter(fieldName, value, ODATA_OPERATOR_GE);
             }
             ai:LESS_THAN_OR_EQUAL => {
-                return self.buildComparisonFilter(fieldName, value, "le");
+                return buildComparisonFilter(fieldName, value, ODATA_OPERATOR_LE);
             }
             _ => {
                 return (); // Unsupported operator
@@ -410,109 +448,6 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
         }
     }
     
-    private isolated function buildEqualityFilter(string fieldName, json value) returns string? {
-        string? formattedValue = self.formatValueForOData(value);
-        if formattedValue is string {
-            return string `${fieldName} eq ${formattedValue}`;
-        }
-        return ();
-    }
-    
-    private isolated function buildInequalityFilter(string fieldName, json value) returns string? {
-        string? formattedValue = self.formatValueForOData(value);
-        if formattedValue is string {
-            return string `${fieldName} ne ${formattedValue}`;
-        }
-        return ();
-    }
-
-    private isolated function buildInFilter(string fieldName, json value) returns string? {
-        if value is json[] && value.length() > 0 {
-            string[] conditions = [];
-            foreach json item in value {
-                string? formattedValue = self.formatValueForOData(item);
-                if formattedValue is string {
-                    conditions.push(string `${fieldName} eq ${formattedValue}`);
-                }
-            }
-            if conditions.length() > 0 {
-                return "(" + string:'join(" or ", ...conditions) + ")";
-            }
-        }
-        return ();
-    }
-
-    private isolated function buildNotInFilter(string fieldName, json value) returns string? {
-        if value is json[] && value.length() > 0 {
-            string[] conditions = [];
-            foreach json item in value {
-                string? formattedValue = self.formatValueForOData(item);
-                if formattedValue is string {
-                    conditions.push(string `${fieldName} ne ${formattedValue}`);
-                }
-            }
-            if conditions.length() > 0 {
-                return "(" + string:'join(" and ", ...conditions) + ")";
-            }
-        }
-        return ();
-    }
-    
-    private isolated function buildComparisonFilter(string fieldName, json value, string odataOperator) returns string? {
-        string? formattedValue = self.formatValueForOData(value);
-        if formattedValue is string {
-            return string `${fieldName} ${odataOperator} ${formattedValue}`;
-        }
-        return ();
-    }
-    
-    private isolated function formatValueForOData(json value) returns string? {
-        if value is string {
-            // Escape single quotes in strings and wrap in single quotes
-            string escapedValue = re `'`.replaceAll(value, "''");
-            return string `'${escapedValue}'`;
-        } else if value is int|decimal {
-            return value.toString();
-        } else if value is boolean {
-            return value.toString();
-        }
-        // For other types (like null), return null to indicate unsupported
-        return ();
-    }
-    
-    private isolated function getFieldValue(index:SearchResult result, string fieldName) returns string {
-        anydata fieldValue = result[fieldName];
-        if fieldValue is string {
-            return fieldValue;
-        }
-        if fieldValue is () {
-            logIfVerboseEnable(self.verbose, string `Field ${fieldName} is null in search result.`);
-            return "";
-        }
-        // Handle other types if they are possible content
-        return fieldValue.toString();
-    }
-
-    private isolated function extractMetadata(index:SearchResult result) returns ai:Metadata {
-        lock {
-            ai:Metadata metadata = {};
-
-            // Extract all fields except the core content/title fields as metadata
-            map<anydata> clonedResult = result.cloneReadOnly();
-            foreach string k in clonedResult.keys() {
-                anydata value = clonedResult[k];
-                if k != self.contentFieldName && k != self.keyFieldName && self.vectorFieldNames.indexOf(k) == () &&
-                k != "@search.score" && k != "@search.highlights" {
-                    if value is json {
-                        metadata[k] = value;
-                    }
-                }
-            }
-            
-            return metadata.cloneReadOnly();
-        }
-    }
-
     private isolated function chunk(ai:Document|ai:Document[]|ai:Chunk[] input) returns ai:Chunk[]|ai:Error {
         (ai:Document|ai:Chunk)[] inputs = input is ai:Document[]|ai:Chunk[] ? input : [input];
         ai:Chunker|ai:AUTO|ai:DISABLE chunker = self.chunker;
@@ -533,7 +468,7 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
         search:SearchIndex index,
         ai:Embedding[]? embeddings = (),
         index:DocumentsIndexHeaders headers = {},
-        index:DocumentsIndexQueries queries = {api\-version: API_VERSION}
+        index:DocumentsIndexQueries queries = {api\-version: AI_AZURE_KNOWLEDGEBASE_API_VERSION}
     ) returns index:IndexDocumentsResult|error {
         if embeddings is ai:Embedding[] && embeddings.length() != documents.length() {
             return error ai:Error("Embeddings count does not match documents count, Embeddings length: " +
@@ -546,69 +481,26 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
             ai:Embedding[]? embeddingValues = embeddings.cloneReadOnly();
             foreach int i in 0..<docs.length() {
                 (ai:Document|ai:Chunk) doc = docs[i];
+                ai:Embedding? embedding = embeddingValues is ai:Embedding[] ? embeddingValues[i] : ();
                 
-                // Start with the basic action structure
-                index:IndexAction indexAction = {
-                    \@search\.action: "mergeOrUpload"
-                };
-
-                // Set the key field with a UUID
-                // TODO: handle non-string key fields
-                ai:Metadata? metadata = doc.metadata;
-                string keyValue = metadata !is () && metadata.hasKey(self.keyFieldName)
-                    ? doc.metadata[self.keyFieldName].toString() + i.toString()
-                    : uuid:createType1AsString();
-                    
-                indexAction[self.keyFieldName] = keyValue;
-                logIfVerboseEnable(
-                    self.verbose, string `Set key field ${self.keyFieldName} to value ${keyValue} for document index ${i}.`);
-
-                // Add embeddings to vector fields if available
-                if embeddingValues is ai:Embedding[] {
-                    ai:Embedding embedding = embeddingValues[i];
-                    foreach string vectorFieldName in self.vectorFieldNames {
-                        ai:Vector|ai:Error vectors = self.generateVector(embedding);
-                        if vectors is ai:Error {
-                            logIfVerboseEnable(
-                                self.verbose, string `Failed to generate vector for document index ${i} and field ${vectorFieldName}: ${vectors.message()}`);
-                            return vectors;
-                        }
-
-                        indexAction[vectorFieldName] = vectors;
-                        logIfVerboseEnable(
-                            self.verbose, string `Added vector for document index ${i} to field ${vectorFieldName}.`);
-                    }
-                }
+                index:IndexAction|ai:Error indexAction = createIndexAction(
+                    doc,
+                    embedding,
+                    i,
+                    self.keyFieldName,
+                    self.contentFieldName,
+                    self.vectorFieldNames,
+                    self.allFields,
+                    self.verbose
+                );
                 
-                indexAction[self.contentFieldName] = doc.content;
-                logIfVerboseEnable(
-                    self.verbose, string `Added content for document index ${i} to field ${self.contentFieldName}.`);
-
-                // Add document type if there's a field for it (check if "type" field exists)
-                if self.allFields.hasKey("type") {
-                    indexAction["type"] = doc.'type;
-                }
-
-                // Add metadata fields
-                if metadata is ai:Metadata {
-                    foreach [string, json] [key, value] in metadata.entries() {
-                        boolean isPossibleMetadata = key != self.keyFieldName && key != self.contentFieldName 
-                                && self.vectorFieldNames.indexOf(key) == ();
-                        // Only add metadata if the field exists in the index schema
-                        if self.allFields.hasKey(key) && isPossibleMetadata {
-                            indexAction[key] = value;
-                        } else {
-                            if isPossibleMetadata {
-                                logIfVerboseEnable(
-                                    self.verbose, string `Skipping field ${key} as it does not exist in index schema.`);
-                            }
-                        }
-                    }
+                if indexAction is ai:Error {
+                    return indexAction;
                 }
 
                 indexActions.push(indexAction);
-            }
-
+            }            
+            
             index:IndexBatch batch = {
                 value: indexActions
             };
@@ -617,20 +509,13 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
             return 'client->documentsIndex(batch.cloneReadOnly(), headers.cloneReadOnly(), queries.cloneReadOnly());
         }
     }
-
-    private isolated function generateVector(ai:Embedding embedding) returns ai:Vector|ai:Error {
-        if embedding is ai:Vector {
-            return embedding;
-        } else if embedding is ai:HybridVector {
-            // Return the dense part, discard sparse
-            return embedding.dense;
-        } else {
-            // Explicitly fail for sparse-only embeddings
-            return error ai:Error("AzureAiSearchKnowledgeBase only supports dense or hybrid embeddings, but received a SparseVector.");
-        }
-    }
 }
 
+# Logs informational or error messages if verbose mode is enabled
+#
+# + verbose - Whether verbose logging is enabled
+# + value - The message to log
+# + err - Optional error to log with additional details
 isolated function logIfVerboseEnable(boolean verbose, string value, 'error? err = ()) {
     if verbose {
         log:printInfo(string `[AzureAiSearchKnowledgeBase] ${value}`);
@@ -640,28 +525,274 @@ isolated function logIfVerboseEnable(boolean verbose, string value, 'error? err
     }
 }
 
+# Determines the appropriate chunker based on document metadata
+#
+# + doc - The document or chunk to determine chunker for
+# + return - The appropriate chunker for the document type
 isolated function guessChunker(ai:Document|ai:Chunk doc) returns ai:Chunker {
     // Guess the chunker based on the document type or mimeType in metadata
     string? mimeType = doc.metadata?.mimeType;
-    if mimeType == "text/markdown" {
+    if mimeType == MIME_TYPE_MARKDOWN {
         return new ai:MarkdownChunker();
     }
-    if mimeType == "text/html" {
+    if mimeType == MIME_TYPE_HTML {
         return new ai:HtmlChunker();
     }
     // Fallback to file name
     string? fileName = doc.metadata?.fileName;
     if fileName is string {
-        if fileName.endsWith(".md") {
+        if fileName.endsWith(FILE_EXT_MARKDOWN) {
             return new ai:MarkdownChunker();
         }
-        if fileName.endsWith(".html") {
+        if fileName.endsWith(FILE_EXT_HTML) {
             return new ai:HtmlChunker();
         }
     }
     return new ai:GenericRecursiveChunker();
 }
 
+# Converts embeddings to vectors for Azure AI Search
+#
+# + embedding - The embedding to convert
+# + return - The vector representation or an error if conversion fails
+isolated function generateVectorFromEmbedding(ai:Embedding embedding) returns ai:Vector|ai:Error {
+    if embedding is ai:Vector {
+        return embedding;
+    } else if embedding is ai:HybridVector {
+        // Return the dense part, discard sparse
+        return embedding.dense;
+    } else {
+        // Explicitly fail for sparse-only embeddings
+        return error ai:Error("AzureAiSearchKnowledgeBase only supports dense or hybrid embeddings, but received a SparseVector.");
+    }
+}
+
+# Formats a JSON value for use in OData expressions
+#
+# + value - The JSON value to format
+# + return - The formatted string or null if type is unsupported
+isolated function formatValueForOData(json value) returns string? {
+    if value is string {
+        // Escape single quotes in strings and wrap in single quotes
+        string escapedValue = re `'`.replaceAll(value, "''");
+        return string `'${escapedValue}'`;
+    } else if value is int|decimal {
+        return value.toString();
+    } else if value is boolean {
+        return value.toString();
+    }
+    // For other types (like null), return null to indicate unsupported
+    return ();
+}
+
+# Builds an equality filter for OData
+#
+# + fieldName - The field name to filter on
+# + value - The value to compare
+# + return - The formatted equality filter or null if value is unsupported
+isolated function buildEqualityFilter(string fieldName, json value) returns string? {
+    string? formattedValue = formatValueForOData(value);
+    if formattedValue is string {
+        return string `${fieldName} ${ODATA_OPERATOR_EQ} ${formattedValue}`;
+    }
+    return ();
+}
+
+# Builds an inequality filter for OData
+#
+# + fieldName - The field name to filter on
+# + value - The value to compare
+# + return - The formatted inequality filter or null if value is unsupported
+isolated function buildInequalityFilter(string fieldName, json value) returns string? {
+    string? formattedValue = formatValueForOData(value);
+    if formattedValue is string {
+        return string `${fieldName} ${ODATA_OPERATOR_NE} ${formattedValue}`;
+    }
+    return ();
+}
+
+# Builds an IN filter for OData
+#
+# + fieldName - The field name to filter on
+# + value - The array of values to check membership
+# + return - The formatted IN filter or null if values are invalid
+isolated function buildInFilter(string fieldName, json value) returns string? {
+    if value is json[] && value.length() > 0 {
+        string[] conditions = [];
+        foreach json item in value {
+            string? formattedValue = formatValueForOData(item);
+            if formattedValue is string {
+                conditions.push(string `${fieldName} ${ODATA_OPERATOR_EQ} ${formattedValue}`);
+            }
+        }
+        if conditions.length() > 0 {
+            return "(" + string:'join(ODATA_OPERATOR_OR, ...conditions) + ")";
+        }
+    }
+    return ();
+}
+
+# Builds a NOT IN filter for OData
+#
+# + fieldName - The field name to filter on
+# + value - The array of values to exclude
+# + return - The formatted NOT IN filter or null if values are invalid
+isolated function buildNotInFilter(string fieldName, json value) returns string? {
+    if value is json[] && value.length() > 0 {
+        string[] conditions = [];
+        foreach json item in value {
+            string? formattedValue = formatValueForOData(item);
+            if formattedValue is string {
+                conditions.push(string `${fieldName} ${ODATA_OPERATOR_NE} ${formattedValue}`);
+            }
+        }
+        if conditions.length() > 0 {
+            return "(" + string:'join(ODATA_OPERATOR_AND, ...conditions) + ")";
+        }
+    }
+    return ();
+}
+
+# Builds a comparison filter for OData
+#
+# + fieldName - The field name to filter on
+# + value - The value to compare
+# + odataOperator - The OData comparison operator to use
+# + return - The formatted comparison filter or null if value is unsupported
+isolated function buildComparisonFilter(string fieldName, json value, string odataOperator) returns string? {
+    string? formattedValue = formatValueForOData(value);
+    if formattedValue is string {
+        return string `${fieldName} ${odataOperator} ${formattedValue}`;
+    }
+    return ();
+}
+
+# Extracts a field value from a search result
+#
+# + result - The search result to extract from
+# + fieldName - The name of the field to extract
+# + verbose - Whether verbose logging is enabled
+# + return - The field value as a string
+isolated function extractFieldValue(index:SearchResult result, string fieldName, boolean verbose) returns string {
+    anydata fieldValue = result[fieldName];
+    if fieldValue is string {
+        return fieldValue;
+    }
+    if fieldValue is () {
+        logIfVerboseEnable(verbose, string `Field ${fieldName} is null in search result.`);
+        return "";
+    }
+    // Handle other types if they are possible content
+    return fieldValue.toString();
+}
+
+# Extracts metadata from a search result, excluding core fields
+#
+# + result - The search result to extract metadata from
+# + contentFieldName - The name of the content field to exclude
+# + keyFieldName - The name of the key field to exclude
+# + vectorFieldNames - Array of vector field names to exclude
+# + return - The extracted metadata
+isolated function extractMetadataFromResult(index:SearchResult result, string contentFieldName, string keyFieldName, string[] vectorFieldNames) returns ai:Metadata {
+    ai:Metadata metadata = {};
+
+    // Extract all fields except the core content/title fields as metadata
+    map<anydata> clonedResult = result.cloneReadOnly();
+    foreach string k in clonedResult.keys() {
+        anydata value = clonedResult[k];
+        if k != contentFieldName && k != keyFieldName && vectorFieldNames.indexOf(k) == () &&
+        k != SEARCH_SCORE_FIELD && k != SEARCH_HIGHLIGHTS_FIELD {
+            if value is json {
+                metadata[k] = value;
+            }
+        }
+    }
+    
+    return metadata.cloneReadOnly();
+}
+
+# Creates an index action for a document or chunk
+#
+# + doc - The document or chunk to create action for
+# + embedding - Optional embedding for vector fields
+# + documentIndex - Index of the document in the batch
+# + keyFieldName - Name of the key field
+# + contentFieldName - Name of the content field  
+# + vectorFieldNames - Array of vector field names
+# + allFields - Map of all fields in the index schema
+# + verbose - Whether verbose logging is enabled
+# + return - The created index action or an error
+isolated function createIndexAction(
+    ai:Document|ai:Chunk doc,
+    ai:Embedding? embedding,
+    int documentIndex,
+    string keyFieldName,
+    string contentFieldName,
+    string[] vectorFieldNames,
+    map<search:SearchField> allFields,
+    boolean verbose
+) returns index:IndexAction|ai:Error {
+    // Start with the basic action structure
+    index:IndexAction indexAction = {
+        \@search\.action: SEARCH_ACTION_MERGE_OR_UPLOAD
+    };
+
+    // Set the key field with a UUID
+    // TODO: handle non-string key fields
+    ai:Metadata? metadata = doc.metadata;
+    string keyValue = metadata !is () && metadata.hasKey(keyFieldName)
+        ? doc.metadata[keyFieldName].toString() + documentIndex.toString()
+        : uuid:createType1AsString();
+        
+    indexAction[keyFieldName] = keyValue;
+    logIfVerboseEnable(
+        verbose, string `Set key field ${keyFieldName} to value ${keyValue} for document index ${documentIndex}.`);
+
+    // Add embeddings to vector fields if available
+    if embedding is ai:Embedding {
+        foreach string vectorFieldName in vectorFieldNames {
+            ai:Vector|ai:Error vectors = generateVectorFromEmbedding(embedding);
+            if vectors is ai:Error {
+                logIfVerboseEnable(
+                    verbose, string `Failed to generate vector for document index ${documentIndex} and field ${vectorFieldName}: ${vectors.message()}`);
+                return vectors;
+            }
+
+            indexAction[vectorFieldName] = vectors;
+            logIfVerboseEnable(
+                verbose, string `Added vector for document index ${documentIndex} to field ${vectorFieldName}.`);
+        }
+    }
+    
+    indexAction[contentFieldName] = doc.content;
+    logIfVerboseEnable(
+        verbose, string `Added content for document index ${documentIndex} to field ${contentFieldName}.`);
+
+    // Add document type if there's a field for it (check if "type" field exists)
+    if allFields.hasKey(DEFAULT_TYPE_FIELD_NAME) {
+        indexAction[DEFAULT_TYPE_FIELD_NAME] = doc.'type;
+    }
+
+    // Add metadata fields
+    if metadata is ai:Metadata {
+        foreach [string, json] [key, value] in metadata.entries() {
+            boolean isPossibleMetadata = key != keyFieldName && key != contentFieldName 
+                    && vectorFieldNames.indexOf(key) == ();
+            // Only add metadata if the field exists in the index schema
+            if allFields.hasKey(key) && isPossibleMetadata {
+                indexAction[key] = value;
+            } else {
+                if isPossibleMetadata {
+                    logIfVerboseEnable(
+                        verbose, string `Skipping field ${key} as it does not exist in index schema.`);
+                }
+            }
+        }
+    }
+
+    return indexAction;
+}
+
 isolated function analyzeIndexSchema(boolean verbose, search:SearchIndex index, string contentFieldName) returns IndexSchemaInfo|ai:Error {
     string? keyFieldName = ();
     string[] vectorFieldNames = [];

From e91825e0ec574c73bb5da2d9ebaeb26cc70f454a Mon Sep 17 00:00:00 2001
From: Sasindu Alahakoon <dilsharasasindu@gmail.com>
Date: Thu, 23 Oct 2025 13:04:09 +0530
Subject: [PATCH 05/10] Remove local dependencies

---
 ballerina/Ballerina.toml | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/ballerina/Ballerina.toml b/ballerina/Ballerina.toml
index 0bb7100..0fe26e5 100644
--- a/ballerina/Ballerina.toml
+++ b/ballerina/Ballerina.toml
@@ -17,15 +17,3 @@ groupId = "io.ballerina.lib"
 artifactId = "ai.azure-native"
 version = "1.2.0"
 path = "../native/build/libs/ai.azure-native-1.2.0-SNAPSHOT.jar"
-
-[[dependency]]
-org="ballerinax"
-name="azure.ai.search"
-version="1.0.0" 
-repository="local"
-
-[[dependency]]
-org="ballerinax"
-name="azure.ai.search.index"
-version="1.0.0" 
-repository="local"

From c53ceee5ac8e626c2ce2f70db7b2c933601afd9b Mon Sep 17 00:00:00 2001
From: Sasindu Alahakoon <dilsharasasindu@gmail.com>
Date: Thu, 23 Oct 2025 17:05:46 +0530
Subject: [PATCH 06/10] [Automated] Update the toml files

---
 ballerina/Ballerina.toml    | 12 ++++++++++++
 ballerina/Dependencies.toml |  3 +--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/ballerina/Ballerina.toml b/ballerina/Ballerina.toml
index 0fe26e5..0bb7100 100644
--- a/ballerina/Ballerina.toml
+++ b/ballerina/Ballerina.toml
@@ -17,3 +17,15 @@ groupId = "io.ballerina.lib"
 artifactId = "ai.azure-native"
 version = "1.2.0"
 path = "../native/build/libs/ai.azure-native-1.2.0-SNAPSHOT.jar"
+
+[[dependency]]
+org="ballerinax"
+name="azure.ai.search"
+version="1.0.0" 
+repository="local"
+
+[[dependency]]
+org="ballerinax"
+name="azure.ai.search.index"
+version="1.0.0" 
+repository="local"
diff --git a/ballerina/Dependencies.toml b/ballerina/Dependencies.toml
index 77971fd..9d017b4 100644
--- a/ballerina/Dependencies.toml
+++ b/ballerina/Dependencies.toml
@@ -144,7 +144,7 @@ modules = [
 [[package]]
 org = "ballerina"
 name = "io"
-version = "1.8.0"
+version = "1.8.1"
 dependencies = [
 	{org = "ballerina", name = "jballerina.java"},
 	{org = "ballerina", name = "lang.value"}
@@ -435,7 +435,6 @@ dependencies = [
 	{org = "ballerina", name = "http"},
 	{org = "ballerina", name = "log"},
 	{org = "ballerina", name = "url"},
-	{org = "ballerina", name = "uuid"},
 	{org = "ballerinai", name = "observe"}
 ]
 modules = [

From c44d2238c953ba524f565a0b3f9d2dd92a78c36d Mon Sep 17 00:00:00 2001
From: Sasindu Alahakoon <dilsharasasindu@gmail.com>
Date: Thu, 23 Oct 2025 17:11:15 +0530
Subject: [PATCH 07/10] Update the names and config names

---
 ballerina/azure_ai_search_knowledgebase.bal | 16 ++++++++--------
 build-config/resources/Ballerina.toml       | 12 ++++++++++++
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/ballerina/azure_ai_search_knowledgebase.bal b/ballerina/azure_ai_search_knowledgebase.bal
index fa84980..c986b3b 100644
--- a/ballerina/azure_ai_search_knowledgebase.bal
+++ b/ballerina/azure_ai_search_knowledgebase.bal
@@ -75,7 +75,7 @@ type IndexSchemaInfo record {
 };
 
 # Configuration for the Azure AI Service Clients
-public type AzureAiSearchKnowledgeBaseClientConfiguration record {|
+public type AiSearchKnowledgeBaseClientConfiguration record {|
     # Connection configuration for the Azure AI search client that use for create search index
     # This configuration is only required when the `index` parameter
     # is provided as an `search:SearchIndex` (i.e., when the system will create the index).
@@ -88,7 +88,7 @@ public type AzureAiSearchKnowledgeBaseClientConfiguration record {|
 # User should create the required `indexer`, `data source` and `index` beforehand using 
 # the util functions provided in this module. 
 # Currently search fields only supported with `id`, `content` and `type` field names.
-public distinct isolated class AzureAiSearchKnowledgeBase {
+public distinct isolated class AiSearchKnowledgeBase {
     *ai:KnowledgeBase;
     
     private final search:SearchIndex index;
@@ -104,7 +104,7 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
     private final string[] vectorFieldNames;
     private final map<search:SearchField> allFields;
 
-    # Initializes a new `AzureAiSearchKnowledgeBase` instance.
+    # Initializes a new `AiSearchKnowledgeBase` instance.
     # 
     # + serviceUrl - The service URL of the Azure AI Search instance
     # + apiKey - The API key for authenticating with the Azure AI Search service
@@ -116,11 +116,11 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
     # + apiVersion - The API version to use for requests.
     # + clientConfigurations - Additional client configurations for Azure AI Search clients
     # + contentFieldName - The name of the field in the index that contains the main content. Defaults to "content".
-    # + return - An instance of `AzureAiSearchKnowledgeBase` or an `ai:Error` if initialization fails
+    # + return - An instance of `AiSearchKnowledgeBase` or an `ai:Error` if initialization fails
     public isolated function init(string serviceUrl, string apiKey, string|search:SearchIndex index, ai:EmbeddingProvider embeddingModel, 
             ai:Chunker|ai:AUTO|ai:DISABLE chunker = ai:AUTO, boolean verbose = false, 
             string apiVersion = AI_AZURE_KNOWLEDGEBASE_API_VERSION, string contentFieldName = CONTENT_FIELD_NAME, 
-            *AzureAiSearchKnowledgeBaseClientConfiguration clientConfigurations) returns ai:Error? {
+            *AiSearchKnowledgeBaseClientConfiguration clientConfigurations) returns ai:Error? {
         self.chunker = chunker;
         self.embeddingModel = embeddingModel;
         self.verbose = verbose;
@@ -518,9 +518,9 @@ public distinct isolated class AzureAiSearchKnowledgeBase {
 # + err - Optional error to log with additional details
 isolated function logIfVerboseEnable(boolean verbose, string value, 'error? err = ()) {
     if verbose {
-        log:printInfo(string `[AzureAiSearchKnowledgeBase] ${value}`);
+        log:printInfo(string `[AiSearchKnowledgeBase] ${value}`);
         if err is error {
-            log:printError(string `[AzureAiSearchKnowledgeBase] Error Details: ${err.message()}`, err);
+            log:printError(string `[AiSearchKnowledgeBase] Error Details: ${err.message()}`, err);
         }
     }
 }
@@ -563,7 +563,7 @@ isolated function generateVectorFromEmbedding(ai:Embedding embedding) returns ai
         return embedding.dense;
     } else {
         // Explicitly fail for sparse-only embeddings
-        return error ai:Error("AzureAiSearchKnowledgeBase only supports dense or hybrid embeddings, but received a SparseVector.");
+        return error ai:Error("AiSearchKnowledgeBase only supports dense or hybrid embeddings, but received a SparseVector.");
     }
 }
 
diff --git a/build-config/resources/Ballerina.toml b/build-config/resources/Ballerina.toml
index 3d4f497..56a0388 100644
--- a/build-config/resources/Ballerina.toml
+++ b/build-config/resources/Ballerina.toml
@@ -17,3 +17,15 @@ groupId = "io.ballerina.lib"
 artifactId = "ai.azure-native"
 version = "@toml.version@"
 path = "../native/build/libs/ai.azure-native-@project.version@.jar"
+
+[[dependency]]
+org="ballerinax"
+name="azure.ai.search"
+version="1.0.0" 
+repository="local"
+
+[[dependency]]
+org="ballerinax"
+name="azure.ai.search.index"
+version="1.0.0" 
+repository="local"

From 7044bed851ca0ba6b53aaee1d9a3cf29711f1168 Mon Sep 17 00:00:00 2001
From: Sasindu Alahakoon <dilsharasasindu@gmail.com>
Date: Thu, 23 Oct 2025 17:18:57 +0530
Subject: [PATCH 08/10] Remove local dependencies

---
 build-config/resources/Ballerina.toml | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/build-config/resources/Ballerina.toml b/build-config/resources/Ballerina.toml
index 56a0388..3d4f497 100644
--- a/build-config/resources/Ballerina.toml
+++ b/build-config/resources/Ballerina.toml
@@ -17,15 +17,3 @@ groupId = "io.ballerina.lib"
 artifactId = "ai.azure-native"
 version = "@toml.version@"
 path = "../native/build/libs/ai.azure-native-@project.version@.jar"
-
-[[dependency]]
-org="ballerinax"
-name="azure.ai.search"
-version="1.0.0" 
-repository="local"
-
-[[dependency]]
-org="ballerinax"
-name="azure.ai.search.index"
-version="1.0.0" 
-repository="local"

From 4e92e3cb478716891c9bd1109d91c73f0f23e7cd Mon Sep 17 00:00:00 2001
From: Sasindu Alahakoon <dilsharasasindu@gmail.com>
Date: Thu, 23 Oct 2025 17:20:19 +0530
Subject: [PATCH 09/10] Remove local dependencies

---
 ballerina/Ballerina.toml | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/ballerina/Ballerina.toml b/ballerina/Ballerina.toml
index 0bb7100..0fe26e5 100644
--- a/ballerina/Ballerina.toml
+++ b/ballerina/Ballerina.toml
@@ -17,15 +17,3 @@ groupId = "io.ballerina.lib"
 artifactId = "ai.azure-native"
 version = "1.2.0"
 path = "../native/build/libs/ai.azure-native-1.2.0-SNAPSHOT.jar"
-
-[[dependency]]
-org="ballerinax"
-name="azure.ai.search"
-version="1.0.0" 
-repository="local"
-
-[[dependency]]
-org="ballerinax"
-name="azure.ai.search.index"
-version="1.0.0" 
-repository="local"

From ccf8150780ac29e2e184ad6be2845a0163937b10 Mon Sep 17 00:00:00 2001
From: Sasindu Alahakoon <dilsharasasindu@gmail.com>
Date: Thu, 23 Oct 2025 20:58:00 +0530
Subject: [PATCH 10/10] Update knowledgebase

---
 ballerina/azure_ai_search_knowledgebase.bal | 141 ++++++++++----------
 1 file changed, 72 insertions(+), 69 deletions(-)

diff --git a/ballerina/azure_ai_search_knowledgebase.bal b/ballerina/azure_ai_search_knowledgebase.bal
index c986b3b..29f2677 100644
--- a/ballerina/azure_ai_search_knowledgebase.bal
+++ b/ballerina/azure_ai_search_knowledgebase.bal
@@ -17,12 +17,12 @@
 import ballerina/ai;
 import ballerina/log;
 import ballerina/uuid;
-import ballerinax/azure.ai.search as search;
+import ballerinax/azure.ai.search;
 import ballerinax/azure.ai.search.index;
 
 const CONTENT_FIELD_NAME = "content";
 const KEY_FIELD_NAME = "id";
-const AI_AZURE_KNOWLEDGEBASE_API_VERSION = "2025-09-01";
+const AI_AZURE_KNOWLEDGE_BASE_API_VERSION = "2025-09-01";
 const API_KEY_HEADER_NAME = "api-key";
 
 // Search action constants
@@ -74,20 +74,7 @@ type IndexSchemaInfo record {
     map<search:SearchField> allFields;
 };
 
-# Configuration for the Azure AI Service Clients
-public type AiSearchKnowledgeBaseClientConfiguration record {|
-    # Connection configuration for the Azure AI search client that use for create search index
-    # This configuration is only required when the `index` parameter
-    # is provided as an `search:SearchIndex` (i.e., when the system will create the index).
-    search:ConnectionConfig searchClientConnectionConfig = {};
-    # Connection configuration for the Azure AI index client that use for index operations
-    index:ConnectionConfig indexClientConnectionConfig = {};
-|};
-
 # Represents the Azure Search Knowledge Base implementation.
-# User should create the required `indexer`, `data source` and `index` beforehand using 
-# the util functions provided in this module. 
-# Currently search fields only supported with `id`, `content` and `type` field names.
 public distinct isolated class AiSearchKnowledgeBase {
     *ai:KnowledgeBase;
     
@@ -116,22 +103,27 @@ public distinct isolated class AiSearchKnowledgeBase {
     # + apiVersion - The API version to use for requests.
     # + clientConfigurations - Additional client configurations for Azure AI Search clients
     # + contentFieldName - The name of the field in the index that contains the main content. Defaults to "content".
+    # + searchClientConnectionConfig - Connection configuration for the Azure AI search client.
+    #                                  This configuration is only required when the `index` parameter is 
+    #                                  provided as an `search:SearchIndex`
+    # + indexClientConnectionConfig - Connection configuration for the Azure AI index client.
     # + return - An instance of `AiSearchKnowledgeBase` or an `ai:Error` if initialization fails
-    public isolated function init(string serviceUrl, string apiKey, string|search:SearchIndex index, ai:EmbeddingProvider embeddingModel, 
+    public isolated function init(string serviceUrl, string apiKey, 
+            string|search:SearchIndex index, ai:EmbeddingProvider embeddingModel, 
             ai:Chunker|ai:AUTO|ai:DISABLE chunker = ai:AUTO, boolean verbose = false, 
-            string apiVersion = AI_AZURE_KNOWLEDGEBASE_API_VERSION, string contentFieldName = CONTENT_FIELD_NAME, 
-            *AiSearchKnowledgeBaseClientConfiguration clientConfigurations) returns ai:Error? {
+            string apiVersion = AI_AZURE_KNOWLEDGE_BASE_API_VERSION, string contentFieldName = CONTENT_FIELD_NAME, 
+            search:ConnectionConfig searchClientConnectionConfig = {},
+            index:ConnectionConfig indexClientConnectionConfig = {}) returns ai:Error? {
         self.chunker = chunker;
         self.embeddingModel = embeddingModel;
         self.verbose = verbose;
         self.contentFieldName = contentFieldName;
         
         // Initialize service client for management operations
-        search:ConnectionConfig searchClientConfig = clientConfigurations.searchClientConnectionConfig;
         self.apiKey = apiKey;
         self.apiVersion = apiVersion;
 
-        search:Client|error serviceClient = new search:Client(serviceUrl, searchClientConfig);
+        search:Client|error serviceClient = new search:Client(serviceUrl, searchClientConnectionConfig);
         if serviceClient is error {
             return error ai:Error("Failed to initialize Azure AI Service Client", serviceClient);
         }
@@ -144,29 +136,34 @@ public distinct isolated class AiSearchKnowledgeBase {
             search:SearchIndex|error searchIndex = self.serviceClient->indexesGet(indexName, {
                 [API_KEY_HEADER_NAME]: self.apiKey}, {api\-version: self.apiVersion});
             if searchIndex is error {
-                logIfVerboseEnable(self.verbose, string `Search index ${indexName} does not exist: ${searchIndex.message()}`);
+                logIfVerboseEnabled(self.verbose, 
+                    string `Search index ${indexName} does not exist: ${searchIndex.message()}`);
                 return error ai:Error("Failed to verify existence of index", searchIndex);
             }
 
             self.index = searchIndex.cloneReadOnly();
-            logIfVerboseEnable(self.verbose, string `Search index ${indexName} exists. Details: ${searchIndex.toJsonString()}`);
+            logIfVerboseEnabled(self.verbose, 
+                string `Search index ${indexName} exists. Details: ${searchIndex.toJsonString()}`);
         } else {
-            logIfVerboseEnable(self.verbose, string `Attempting to create search index ${indexName}...`);
+            logIfVerboseEnabled(self.verbose, string `Attempting to create search index ${indexName}...`);
             search:SearchIndex|error createdIndex = self.serviceClient->indexesCreateOrUpdate(indexName, {
-                [API_KEY_HEADER_NAME]: self.apiKey, Prefer: PREFER_HEADER_RETURN_REPRESENTATION}, index, {api\-version: self.apiVersion});
+                [API_KEY_HEADER_NAME]: self.apiKey, Prefer: PREFER_HEADER_RETURN_REPRESENTATION}, 
+                    index, {api\-version: self.apiVersion});
             if createdIndex is error {
-                logIfVerboseEnable(self.verbose, string `Failed to create search index ${indexName}: ${createdIndex.message()}`);
+                logIfVerboseEnabled(self.verbose, 
+                    string `Failed to create search index ${indexName}: ${createdIndex.message()}`);
                 return error ai:Error("Failed to create search index", createdIndex);
             }
             self.index = createdIndex.cloneReadOnly();
-            logIfVerboseEnable(self.verbose, string `Search index ${indexName} created successfully.`);
+            logIfVerboseEnabled(self.verbose, string `Search index ${indexName} created successfully.`);
         }
 
         string indexServiceUrl = string `${serviceUrl}/indexes('${indexName}')`;
-        logIfVerboseEnable(self.verbose, string `Initializing Azure Index Client for index URL: ${indexServiceUrl}`);
-        index:Client|error indexClient = new (indexServiceUrl, clientConfigurations.indexClientConnectionConfig);
+        logIfVerboseEnabled(self.verbose, string `Initializing Azure Index Client for index URL: ${indexServiceUrl}`);
+        index:Client|error indexClient = new (indexServiceUrl, indexClientConnectionConfig);
         if indexClient is error {
-            logIfVerboseEnable(self.verbose, string `Failed to initialize Azure Index Client: ${indexClient.message()}`);
+            logIfVerboseEnabled(self.verbose, 
+                string `Failed to initialize Azure Index Client: ${indexClient.message()}`);
             return error ai:Error("Failed to initialize Azure Index Client", indexClient);
         }
         self.indexClient = indexClient;
@@ -187,28 +184,33 @@ public distinct isolated class AiSearchKnowledgeBase {
         lock {
             ai:Chunk[]|ai:Error chunks = self.chunk(documents.clone());
             if chunks is ai:Error {
-                logIfVerboseEnable(self.verbose, string `Failed to chunk documents: ${chunks.message()}}`, chunks);
+                logIfVerboseEnabled(self.verbose, 
+                    string `Failed to chunk documents: ${chunks.message()}}`, chunks);
                 return error ai:Error("Failed to chunk documents before ingestion", chunks);
             }
 
             ai:Embedding[]|error embeddings = self.embeddingModel->batchEmbed(chunks);
             if embeddings is error {
-                logIfVerboseEnable(self.verbose, string `Failed to generate embeddings for documents: ${embeddings.message()}}`, embeddings);
+                logIfVerboseEnabled(self.verbose, 
+                    string `Failed to generate embeddings for documents: ${embeddings.message()}}`, embeddings);
                 return error ai:Error("Failed to generate embeddings for documents", embeddings);
             }
-            logIfVerboseEnable(self.verbose, string `Generated embeddings for ${embeddings.length().toString()} chunks.`);
+            logIfVerboseEnabled(self.verbose, 
+                string `Generated embeddings for ${embeddings.length().toString()} chunks.`);
 
             index:IndexDocumentsResult|error uploadResult = self.uploadDocuments(self.indexClient, chunks, self.index, 
                     embeddings, {[API_KEY_HEADER_NAME]: self.apiKey}, {api\-version: self.apiVersion});
             if uploadResult is error {
-                logIfVerboseEnable(self.verbose, string `Failed to upload documents to search index: ${uploadResult.message()}}`, uploadResult);
+                logIfVerboseEnabled(self.verbose, 
+                    string `Failed to upload documents to search index: ${uploadResult.message()}}`, uploadResult);
                 return error ai:Error("Failed to upload documents to search index", uploadResult);
             }
             
             // Validate that all documents were successfully indexed
             foreach index:IndexingResult result in uploadResult.value {
                 if !result.status {
-                    return error ai:Error(string `Failed to index document with key ${result.'key}: ${result.errorMessage ?: "Unknown error"}`);
+                    return error ai:Error(
+                        string `Failed to index document with key ${result.'key}: ${result.errorMessage ?: "Unknown error"}`);
                 }
             }
             
@@ -222,11 +224,8 @@ public distinct isolated class AiSearchKnowledgeBase {
     # + maxLimit - The maximum number of items to return
     # + filters - Optional metadata filters to apply during retrieval
     # + return - An array of matching chunks with similarity scores, or an `ai:Error` if retrieval fails
-    public isolated function retrieve(string query, int maxLimit = 10, ai:MetadataFilters? filters = ()) returns ai:QueryMatch[]|ai:Error {
-        if query is "" {
-            return error ai:Error("Query cannot be empty for retrieval");
-        }
-
+    public isolated function retrieve(string query, int maxLimit = 10, 
+                                ai:MetadataFilters? filters = ()) returns ai:QueryMatch[]|ai:Error {
         if maxLimit != -1 && maxLimit <= 0 {
             return error ai:Error("maxLimit must be a positive integer");
         }
@@ -282,7 +281,8 @@ public distinct isolated class AiSearchKnowledgeBase {
             );
 
             if searchResult is error {
-                logIfVerboseEnable(self.verbose, string `Failed to retrieve documents from Azure AI Search: ${searchResult.message()}}`, searchResult);
+                logIfVerboseEnabled(self.verbose, 
+                    string `Failed to retrieve documents from Azure AI Search: ${searchResult.message()}}`, searchResult);
                 return error ai:Error("Failed to retrieve documents from Azure AI Search", searchResult);
             }
 
@@ -328,18 +328,15 @@ public distinct isolated class AiSearchKnowledgeBase {
         );
 
         if searchResult is error {
-            logIfVerboseEnable(self.verbose, string `Failed to search for documents to delete: ${searchResult.message()}}`, searchResult);
+            logIfVerboseEnabled(self.verbose, 
+                string `Failed to search for documents to delete: ${searchResult.message()}}`, searchResult);
             return error ai:Error("Failed to search for documents to delete", searchResult);
         }
 
-        // Extract document IDs
-        string[] documentIds = [];
-        foreach index:SearchResult result in searchResult.value {
-            string? documentId = extractFieldValue(result, self.keyFieldName, self.verbose);
-            if documentId is string {
-                documentIds.push(documentId);
-            }
-        }
+        string[] documentIds = from index:SearchResult result in searchResult.value
+            let string? documentId = extractFieldValue(result, self.keyFieldName, self.verbose)
+            where documentId is string
+            select documentId;
 
         if documentIds.length() == 0 {
             return; // No documents found matching the filters
@@ -374,7 +371,8 @@ public distinct isolated class AiSearchKnowledgeBase {
         // Check for any failures in the delete operation
         foreach index:IndexingResult result in deleteResult.value {
             if !result.status {
-                return error ai:Error(string `Failed to delete document with key ${result.'key}: ${result.errorMessage ?: "Unknown error"}`);
+                return error ai:Error(string 
+                    `Failed to delete document with key ${result.'key}: ${result.errorMessage ?: "Unknown error"}`);
             }
         }
 
@@ -468,7 +466,7 @@ public distinct isolated class AiSearchKnowledgeBase {
         search:SearchIndex index,
         ai:Embedding[]? embeddings = (),
         index:DocumentsIndexHeaders headers = {},
-        index:DocumentsIndexQueries queries = {api\-version: AI_AZURE_KNOWLEDGEBASE_API_VERSION}
+        index:DocumentsIndexQueries queries = {api\-version: AI_AZURE_KNOWLEDGE_BASE_API_VERSION}
     ) returns index:IndexDocumentsResult|error {
         if embeddings is ai:Embedding[] && embeddings.length() != documents.length() {
             return error ai:Error("Embeddings count does not match documents count, Embeddings length: " +
@@ -505,7 +503,8 @@ public distinct isolated class AiSearchKnowledgeBase {
                 value: indexActions
             };
 
-            logIfVerboseEnable(self.verbose, string `Uploading ${indexActions.length().toString()} documents to Azure AI Search index ${index.name}.`);
+            logIfVerboseEnabled(self.verbose, string 
+                `Uploading ${indexActions.length().toString()} documents to Azure AI Search index ${index.name}.`);
             return 'client->documentsIndex(batch.cloneReadOnly(), headers.cloneReadOnly(), queries.cloneReadOnly());
         }
     }
@@ -516,7 +515,7 @@ public distinct isolated class AiSearchKnowledgeBase {
 # + verbose - Whether verbose logging is enabled
 # + value - The message to log
 # + err - Optional error to log with additional details
-isolated function logIfVerboseEnable(boolean verbose, string value, 'error? err = ()) {
+isolated function logIfVerboseEnabled(boolean verbose, string value, 'error? err = ()) {
     if verbose {
         log:printInfo(string `[AiSearchKnowledgeBase] ${value}`);
         if err is error {
@@ -558,13 +557,13 @@ isolated function guessChunker(ai:Document|ai:Chunk doc) returns ai:Chunker {
 isolated function generateVectorFromEmbedding(ai:Embedding embedding) returns ai:Vector|ai:Error {
     if embedding is ai:Vector {
         return embedding;
-    } else if embedding is ai:HybridVector {
+    } 
+    if embedding is ai:HybridVector {
         // Return the dense part, discard sparse
         return embedding.dense;
-    } else {
-        // Explicitly fail for sparse-only embeddings
-        return error ai:Error("AiSearchKnowledgeBase only supports dense or hybrid embeddings, but received a SparseVector.");
     }
+    // Explicitly fail for sparse-only embeddings
+    return error("AiSearchKnowledgeBase only supports dense or hybrid embeddings, but received a SparseVector.");
 }
 
 # Formats a JSON value for use in OData expressions
@@ -679,7 +678,7 @@ isolated function extractFieldValue(index:SearchResult result, string fieldName,
         return fieldValue;
     }
     if fieldValue is () {
-        logIfVerboseEnable(verbose, string `Field ${fieldName} is null in search result.`);
+        logIfVerboseEnabled(verbose, string `Field ${fieldName} is null in search result.`);
         return "";
     }
     // Handle other types if they are possible content
@@ -693,7 +692,8 @@ isolated function extractFieldValue(index:SearchResult result, string fieldName,
 # + keyFieldName - The name of the key field to exclude
 # + vectorFieldNames - Array of vector field names to exclude
 # + return - The extracted metadata
-isolated function extractMetadataFromResult(index:SearchResult result, string contentFieldName, string keyFieldName, string[] vectorFieldNames) returns ai:Metadata {
+isolated function extractMetadataFromResult(index:SearchResult result, string contentFieldName, 
+        string keyFieldName, string[] vectorFieldNames) returns ai:Metadata {
     ai:Metadata metadata = {};
 
     // Extract all fields except the core content/title fields as metadata
@@ -745,7 +745,7 @@ isolated function createIndexAction(
         : uuid:createType1AsString();
         
     indexAction[keyFieldName] = keyValue;
-    logIfVerboseEnable(
+    logIfVerboseEnabled(
         verbose, string `Set key field ${keyFieldName} to value ${keyValue} for document index ${documentIndex}.`);
 
     // Add embeddings to vector fields if available
@@ -753,19 +753,20 @@ isolated function createIndexAction(
         foreach string vectorFieldName in vectorFieldNames {
             ai:Vector|ai:Error vectors = generateVectorFromEmbedding(embedding);
             if vectors is ai:Error {
-                logIfVerboseEnable(
-                    verbose, string `Failed to generate vector for document index ${documentIndex} and field ${vectorFieldName}: ${vectors.message()}`);
+                logIfVerboseEnabled(
+                    verbose, string 
+                        `Failed to generate vector for document index ${documentIndex} and field ${vectorFieldName}: ${vectors.message()}`);
                 return vectors;
             }
 
             indexAction[vectorFieldName] = vectors;
-            logIfVerboseEnable(
+            logIfVerboseEnabled(
                 verbose, string `Added vector for document index ${documentIndex} to field ${vectorFieldName}.`);
         }
     }
     
     indexAction[contentFieldName] = doc.content;
-    logIfVerboseEnable(
+    logIfVerboseEnabled(
         verbose, string `Added content for document index ${documentIndex} to field ${contentFieldName}.`);
 
     // Add document type if there's a field for it (check if "type" field exists)
@@ -783,7 +784,7 @@ isolated function createIndexAction(
                 indexAction[key] = value;
             } else {
                 if isPossibleMetadata {
-                    logIfVerboseEnable(
+                    logIfVerboseEnabled(
                         verbose, string `Skipping field ${key} as it does not exist in index schema.`);
                 }
             }
@@ -793,7 +794,8 @@ isolated function createIndexAction(
     return indexAction;
 }
 
-isolated function analyzeIndexSchema(boolean verbose, search:SearchIndex index, string contentFieldName) returns IndexSchemaInfo|ai:Error {
+isolated function analyzeIndexSchema(
+        boolean verbose, search:SearchIndex index, string contentFieldName) returns IndexSchemaInfo|ai:Error {
     string? keyFieldName = ();
     string[] vectorFieldNames = [];
     string[] contentFieldNames = [];
@@ -819,7 +821,7 @@ isolated function analyzeIndexSchema(boolean verbose, search:SearchIndex index,
     }
 
     if vectorFieldNames.length() == 0 {
-        logIfVerboseEnable(verbose, "No vector fields found in index schema.");
+        logIfVerboseEnabled(verbose, "No vector fields found in index schema.");
     }
 
     if contentFieldNames.length() == 0 {
@@ -827,11 +829,12 @@ isolated function analyzeIndexSchema(boolean verbose, search:SearchIndex index,
     }
 
     if keyFieldName is () {
-        logIfVerboseEnable(verbose, string `No key field defined in index schema. Using default key field name as '${KEY_FIELD_NAME}'.`);
+        logIfVerboseEnabled(verbose, string `No key field defined in index schema. Using default key field name as '${KEY_FIELD_NAME}'.`);
     }
 
     if vectorFieldNames.length() > 1 {
-        logIfVerboseEnable(verbose, string `Multiple vector fields found in index schema: ${string:'join(", ", ...vectorFieldNames)}. Currently one vecotr field is prefered. So for now, there is more than one, all the vector fileds will share the same vectors.`);
+        logIfVerboseEnabled(verbose, string 
+            `Multiple vector fields found in index schema: ${string:'join(", ", ...vectorFieldNames)}. Currently one vecotr field is prefered. So for now, there is more than one, all the vector fileds will share the same vectors.`);
     }
     
     return {