Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 40 additions & 11 deletions ballerina/azure_ai_search_knowledgebase.bal
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ type IndexSchemaInfo record {
map<search:SearchField> allFields;
};

# Details for semantic configuration in Azure AI Search.
public type SemanticConfigurationDetails record {|
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public type SemanticConfigurationDetails record {|
public type SemanticConfigurations record {|

# Name of the semantic configuration
string name;
|};

# Represents the Azure Search Knowledge Base implementation.
public distinct isolated class AiSearchKnowledgeBase {
*ai:KnowledgeBase;
Expand All @@ -85,11 +91,12 @@ public distinct isolated class AiSearchKnowledgeBase {
private final string apiKey;
private final boolean verbose;
private final ai:Chunker|ai:AUTO|ai:DISABLE chunker;
private final ai:EmbeddingProvider embeddingModel;
private final ai:EmbeddingProvider? embeddingModel;
private final string contentFieldName;
private final string keyFieldName;
private final string[] vectorFieldNames;
private final map<search:SearchField> allFields;
private final SemanticConfigurationDetails? semanticConfigurationDetails;

# Initializes a new `AiSearchKnowledgeBase` instance.
#
Expand All @@ -107,17 +114,20 @@ public distinct isolated class AiSearchKnowledgeBase {
# This configuration is only required when the `index` parameter is
# provided as an `search:SearchIndex`
# + indexClientConnectionConfig - Connection configuration for the Azure AI index client.
# + semanticConfigurationDetails - Optional semantic configuration details for semantic search.
# + return - An instance of `AiSearchKnowledgeBase` or an `ai:Error` if initialization fails
public isolated function init(string serviceUrl, string apiKey,
string|search:SearchIndex index, ai:EmbeddingProvider embeddingModel,
string|search:SearchIndex index, ai:EmbeddingProvider? embeddingModel = (),
ai:Chunker|ai:AUTO|ai:DISABLE chunker = ai:AUTO, boolean verbose = false,
string apiVersion = AI_AZURE_KNOWLEDGE_BASE_API_VERSION, string contentFieldName = CONTENT_FIELD_NAME,
search:ConnectionConfig searchClientConnectionConfig = {},
index:ConnectionConfig indexClientConnectionConfig = {}) returns ai:Error? {
index:ConnectionConfig indexClientConnectionConfig = {},
SemanticConfigurationDetails? semanticConfigurationDetails = ()) returns ai:Error? {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the SemanticConfigurationDetails just have the name can't we introduce a string value instead of record?
semanticConfigurationName ?

self.chunker = chunker;
self.embeddingModel = embeddingModel;
self.verbose = verbose;
self.contentFieldName = contentFieldName;
self.semanticConfigurationDetails = semanticConfigurationDetails.cloneReadOnly();

// Initialize service client for management operations
self.apiKey = apiKey;
Expand Down Expand Up @@ -189,14 +199,22 @@ public distinct isolated class AiSearchKnowledgeBase {
return error ai:Error("Failed to chunk documents before ingestion", chunks);
}

ai:Embedding[]|error embeddings = self.embeddingModel->batchEmbed(chunks);
if embeddings is error {
ai:Embedding[]? embeddings = ();
ai:EmbeddingProvider? embeddingProvider = self.embeddingModel;
if embeddingProvider is ai:EmbeddingProvider {
logIfVerboseEnabled(self.verbose,
string `Generating embeddings for ${chunks.length().toString()} chunks using embedding model.`);
ai:Embedding[]|error? embeddingResults = embeddingProvider->batchEmbed(chunks);
if embeddingResults is error {
logIfVerboseEnabled(self.verbose,
string `Failed to generate embeddings for documents: ${embeddingResults.message()}}`, embeddingResults);
return error ai:Error("Failed to generate embeddings for documents", embeddingResults);
}

embeddings = embeddingResults;
logIfVerboseEnabled(self.verbose,
string `Failed to generate embeddings for documents: ${embeddings.message()}}`, embeddings);
return error ai:Error("Failed to generate embeddings for documents", embeddings);
string `Generated embeddings for ${embeddings == () ? 0: embeddings.length().toString()} chunks.`);
}
logIfVerboseEnabled(self.verbose,
string `Generated embeddings for ${embeddings.length().toString()} chunks.`);

index:IndexDocumentsResult|error uploadResult = self.uploadDocuments(self.indexClient, chunks, self.index,
embeddings, {[API_KEY_HEADER_NAME]: self.apiKey}, {api\-version: self.apiVersion});
Expand Down Expand Up @@ -236,13 +254,17 @@ public distinct isolated class AiSearchKnowledgeBase {

lock {
ai:TextChunk queryChunk = {content: query, 'type: CONTENT_TYPE_TEXT_CHUNK};
ai:Embedding queryEmbedding = check self.embeddingModel->embed(queryChunk);
ai:Embedding? queryEmbedding = ();
ai:EmbeddingProvider? embeddingProvider = self.embeddingModel;
if embeddingProvider is ai:EmbeddingProvider {
queryEmbedding = check embeddingProvider->embed(queryChunk);
}

// Create vector search request using Azure AI Search's integrated vectorization
int vectorFieldLength = self.vectorFieldNames.length();
index:VectorQuery[]? vectorQuery = ();

if vectorFieldLength != 0 {
if vectorFieldLength != 0 && queryEmbedding is ai:Embedding {
ai:Vector|ai:Error vectors = generateVectorFromEmbedding(queryEmbedding);
if vectors is ai:Error {
return vectors;
Expand All @@ -258,9 +280,16 @@ public distinct isolated class AiSearchKnowledgeBase {
];
}

SemanticConfigurationDetails? semanticConfig = self.semanticConfigurationDetails is SemanticConfigurationDetails
? self.semanticConfigurationDetails : ();
index:QueryType queryType = semanticConfig is SemanticConfigurationDetails
? "semantic" : "simple";

index:SearchRequest searchRequest = {
search: query,
'select: "*",
queryType: queryType,
semanticConfiguration: semanticConfig is SemanticConfigurationDetails ? semanticConfig.name : (),
vectorQueries: vectorQuery ?: [],
top: maxLimit == -1 ? () : <int:Signed32>maxLimit
};
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
org.gradle.caching=true
group=io.ballerina.lib
version=1.2.1-SNAPSHOT
version=1.3.0-SNAPSHOT
ballerinaLangVersion=2201.12.0

shadowJarPluginVersion=8.1.1
Expand Down
Loading