From ebf69393eea4630f96dc540bef9843babdca622b Mon Sep 17 00:00:00 2001 From: Nuvindu Date: Thu, 11 Sep 2025 14:59:17 +0530 Subject: [PATCH 1/7] Add `v1` at the end of the service url path --- ballerina/vector_store.bal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ballerina/vector_store.bal b/ballerina/vector_store.bal index a93fade..e259b0d 100644 --- a/ballerina/vector_store.bal +++ b/ballerina/vector_store.bal @@ -47,7 +47,7 @@ public isolated class VectorStore { token: apiKey }; do { - self.weaviateClient = check new (check httpConfig.cloneWithType(), serviceUrl); + self.weaviateClient = check new (check httpConfig.cloneWithType(), string `${serviceUrl}/v1` ); } on fail error err { return error("Failed to initialize weaviate vector store", err); } From f7adc1e7143e4e8baed3a37fc6c433721a7ac0a6 Mon Sep 17 00:00:00 2001 From: Nuvindu Date: Thu, 11 Sep 2025 14:59:43 +0530 Subject: [PATCH 2/7] Use `TextChunk` type for chunks --- ballerina/vector_store.bal | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ballerina/vector_store.bal b/ballerina/vector_store.bal index e259b0d..3a98c09 100644 --- a/ballerina/vector_store.bal +++ b/ballerina/vector_store.bal @@ -175,13 +175,14 @@ public isolated class VectorStore { QueryResult[] value = check data.cloneWithType(); ai:VectorMatch[] matches = []; foreach weaviate:JsonObject element in value { + ai:TextChunk chunk = { + content: element.content.toString(), + metadata: check metadata.cloneWithType() + }; matches.push({ id: element._additional.id, embedding: element._additional.vector, - chunk: { - 'type: element.'type is () ? "" : check element.'type.cloneWithType(), - content: element.content - }, + chunk, similarityScore: element._additional.certainty !is () ? check element._additional.certainty.cloneWithType() : 0.0 }); From 2db0f63ed5338918a2b91cc2f1861db89419add3 Mon Sep 17 00:00:00 2001 From: Nuvindu Date: Thu, 11 Sep 2025 15:00:27 +0530 Subject: [PATCH 3/7] Handle metadata values in the query API --- ballerina/utils.bal | 17 +++++++++++++---- ballerina/vector_store.bal | 12 +++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/ballerina/utils.bal b/ballerina/utils.bal index dd3f2c9..4f2c5b2 100644 --- a/ballerina/utils.bal +++ b/ballerina/utils.bal @@ -15,12 +15,13 @@ // under the License. import ballerina/ai; +import ballerina/time; # Converts metadata filters to Weaviate compatible filter format # # + filters - The metadata filters containing filter conditions and logical operators # + return - A map representing the converted filter structure or an error if conversion fails -isolated function convertWeaviateFilters(ai:MetadataFilters filters) returns map|ai:Error { +isolated function convertWeaviateFilters(ai:MetadataFilters filters, string[] metadataFields) returns map|ai:Error { (ai:MetadataFilters|ai:MetadataFilter)[]? rawFilters = filters.filters; if rawFilters == () || rawFilters.length() == 0 { return {}; @@ -28,15 +29,23 @@ isolated function convertWeaviateFilters(ai:MetadataFilters filters) returns map map[] filterList = []; foreach (ai:MetadataFilters|ai:MetadataFilter) filter in rawFilters { if filter is ai:MetadataFilter { + metadataFields.push(filter.key); map filterMap = {}; string weaviateOp = check mapWeaviateOperator(filter.operator); filterMap["path"] = [filter.key]; filterMap["operator"] = weaviateOp; - filterMap["valueText"] = filter.value; + anydata value = filter.value; + if value is string { + filterMap["valueText"] = value; + } else if value is time:Utc { + filterMap["valueNumber"] = value[0]; + } else { + filterMap["valueText"] = string `${value.toString()}`; + } filterList.push(filterMap); continue; } - map nestedFilter = check convertWeaviateFilters(filter); + map nestedFilter = check convertWeaviateFilters(filter, metadataFields); if nestedFilter.length() > 0 { filterList.push(nestedFilter); } @@ -138,7 +147,7 @@ isolated function mapToGraphQLObjectString(map filter) returns string { } result += "[" + resultArr + "]"; } else if value is string { - result += 'key == "operator" ? value : string `"${value}"`; + result += 'key == "operator" ? value : string `${value}`; } else { result += value.toString(); } diff --git a/ballerina/vector_store.bal b/ballerina/vector_store.bal index 3a98c09..be85428 100644 --- a/ballerina/vector_store.bal +++ b/ballerina/vector_store.bal @@ -129,13 +129,18 @@ public isolated class VectorStore { return error("Invalid value for topK. The value cannot be 0 or less than -1."); } string filterSection = ""; + string[] metadataFields = []; if query.hasKey("filters") && query.filters is ai:MetadataFilters { ai:MetadataFilters? filters = query.cloneReadOnly().filters; if filters !is () { - map weaviateFilter = check convertWeaviateFilters(filters); + map weaviateFilter = check convertWeaviateFilters(filters, metadataFields); filterSection = "where: " + mapToGraphQLObjectString(weaviateFilter); } } + string metadataFieldsString = ""; + foreach string fieldName in metadataFields { + metadataFieldsString += fieldName + "\n "; + } string gqlQuery = string `{ Get { ${self.config.collectionName}( @@ -148,6 +153,7 @@ public isolated class VectorStore { } ) { content + ${metadataFieldsString} _additional { certainty id @@ -175,6 +181,10 @@ public isolated class VectorStore { QueryResult[] value = check data.cloneWithType(); ai:VectorMatch[] matches = []; foreach weaviate:JsonObject element in value { + map metadata = {}; + foreach string fieldName in metadataFields { + metadata[fieldName] = element.get(fieldName); + } ai:TextChunk chunk = { content: element.content.toString(), metadata: check metadata.cloneWithType() From f79d40a5e8d374f87bfa76df05da55bcc6ba5967 Mon Sep 17 00:00:00 2001 From: Nuvindu Date: Thu, 11 Sep 2025 15:02:06 +0530 Subject: [PATCH 4/7] Update test cases to validate metdata values in query results --- ballerina/tests/test.bal | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/ballerina/tests/test.bal b/ballerina/tests/test.bal index 1051dbf..40bdc64 100644 --- a/ballerina/tests/test.bal +++ b/ballerina/tests/test.bal @@ -16,10 +16,11 @@ import ballerina/ai; import ballerina/test; +import ballerina/time; import ballerina/uuid; final VectorStore mockVectorStore = check new ( - serviceUrl = "http://localhost:8080/v1", + serviceUrl = "http://localhost:8080", config = { collectionName: "Chunk" }, @@ -27,6 +28,7 @@ final VectorStore mockVectorStore = check new ( ); string id = uuid:createRandomUuid(); +time:Utc createdAt = time:utcNow(); @test:Config {} function testAddingValuesToVectorStore() returns error? { @@ -35,8 +37,11 @@ function testAddingValuesToVectorStore() returns error? { id, embedding: [1.0, 2.0, 3.0], chunk: { - 'type: "text", - content: "This is a test chunk" + 'type: "text", + content: "This is a test chunk", + metadata: { + createdAt + } } } ]; @@ -58,7 +63,7 @@ function testDeleteMultipleValuesFromVectorStore() returns error? { id: index, embedding: [1.0, 2.0, 3.0], chunk: { - 'type: "text", + 'type: "text", content: "This is a test chunk" } } @@ -68,21 +73,24 @@ function testDeleteMultipleValuesFromVectorStore() returns error? { test:assertTrue(result !is error); } -@test:Config {} +@test:Config { + dependsOn: [testAddingValuesToVectorStore] +} function testQueryValuesFromVectorStore() returns error? { ai:VectorStoreQuery query = { filters: { filters: [ { + 'key: "createdAt", operator: ai:EQUAL, - 'key: "content", - value: "This is a test chunk" + value: createdAt } ] } }; - ai:VectorMatch[]|ai:Error result = mockVectorStore.query(query); - test:assertTrue(result !is error); + ai:VectorMatch[] result = check mockVectorStore.query(query); + test:assertTrue(result.length() > 0); + test:assertEquals(result[0].chunk.metadata?.createdAt, createdAt); } @test:Config {} From 01856e04335f6cb529469614af34a41b26e69cac Mon Sep 17 00:00:00 2001 From: Nuvindu Date: Thu, 11 Sep 2025 17:13:20 +0530 Subject: [PATCH 5/7] Convert datetime types to string in metadata filters --- ballerina/utils.bal | 11 +++++------ ballerina/vector_store.bal | 22 +++++++++++++++++----- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/ballerina/utils.bal b/ballerina/utils.bal index 4f2c5b2..a8cbf9c 100644 --- a/ballerina/utils.bal +++ b/ballerina/utils.bal @@ -20,6 +20,7 @@ import ballerina/time; # Converts metadata filters to Weaviate compatible filter format # # + filters - The metadata filters containing filter conditions and logical operators +# + metadataFields - The fields of the metadata to be filtered # + return - A map representing the converted filter structure or an error if conversion fails isolated function convertWeaviateFilters(ai:MetadataFilters filters, string[] metadataFields) returns map|ai:Error { (ai:MetadataFilters|ai:MetadataFilter)[]? rawFilters = filters.filters; @@ -34,13 +35,11 @@ isolated function convertWeaviateFilters(ai:MetadataFilters filters, string[] me string weaviateOp = check mapWeaviateOperator(filter.operator); filterMap["path"] = [filter.key]; filterMap["operator"] = weaviateOp; - anydata value = filter.value; - if value is string { - filterMap["valueText"] = value; - } else if value is time:Utc { - filterMap["valueNumber"] = value[0]; + json value = filter.value; + if value is time:Utc { + filterMap["valueDate"] = string `"${time:utcToString(value)}"`; } else { - filterMap["valueText"] = string `${value.toString()}`; + filterMap["valueText"] = value; } filterList.push(filterMap); continue; diff --git a/ballerina/vector_store.bal b/ballerina/vector_store.bal index be85428..339badc 100644 --- a/ballerina/vector_store.bal +++ b/ballerina/vector_store.bal @@ -17,6 +17,7 @@ import ballerina/ai; import ballerina/http; import ballerinax/weaviate; +import ballerina/time; # Weaviate Vector Store implementation with support for Dense, Sparse, and Hybrid vector search modes. # @@ -70,11 +71,21 @@ public isolated class VectorStore { weaviate:Object[] objects = []; foreach ai:VectorEntry entry in entries.cloneReadOnly() { ai:Embedding embedding = entry.embedding; - weaviate:PropertySchema properties = entry.chunk.metadata !is () ? - check entry.chunk.metadata.cloneWithType() : {}; + weaviate:PropertySchema properties = {}; properties[self.chunkFieldName] = entry.chunk.content; properties["type"] = entry.chunk.'type; - + ai:Metadata? metadata = entry.chunk.metadata; + if metadata !is () { + foreach string item in metadata.keys() { + anydata metadataValue = metadata.get(item); + if metadataValue is time:Utc { + string utcToString = time:utcToString(metadataValue); + properties[item] = utcToString; + } else { + properties[item] = metadataValue; + } + } + } if embedding is ai:Vector { objects.push({ 'class: self.config.collectionName, @@ -181,9 +192,10 @@ public isolated class VectorStore { QueryResult[] value = check data.cloneWithType(); ai:VectorMatch[] matches = []; foreach weaviate:JsonObject element in value { - map metadata = {}; + ai:Metadata metadata = {}; foreach string fieldName in metadataFields { - metadata[fieldName] = element.get(fieldName); + time:Utc|error metadataValue = time:utcFromString(element.get(fieldName).toString()); + metadata[fieldName] = metadataValue is error ? element.get(fieldName).toString() : metadataValue; } ai:TextChunk chunk = { content: element.content.toString(), From 69c2efbc86889cac9057a507fff16050b90dd88a Mon Sep 17 00:00:00 2001 From: Nuvindu Date: Thu, 11 Sep 2025 17:40:39 +0530 Subject: [PATCH 6/7] Fix test cases to add schema for the collection --- ballerina/tests/test.bal | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/ballerina/tests/test.bal b/ballerina/tests/test.bal index 40bdc64..989ecc1 100644 --- a/ballerina/tests/test.bal +++ b/ballerina/tests/test.bal @@ -18,15 +18,32 @@ import ballerina/ai; import ballerina/test; import ballerina/time; import ballerina/uuid; +import ballerina/http; final VectorStore mockVectorStore = check new ( serviceUrl = "http://localhost:8080", config = { - collectionName: "Chunk" + collectionName: "Test" }, apiKey = "mock-token" ); +@test:BeforeSuite +function beforeSuite() returns error? { + http:Client httpClient = check new ("http://localhost:8080"); + http:Response _ = check httpClient->post(path = "/v1/schema", headers = { + "Content-Type": "application/json" + }, message = { + "class": "Test", + "properties": [ + { "name": "content", "dataType": ["text"] }, + { "name": "type", "dataType": ["string"] }, + { "name": "createdAt", "dataType": ["date"] } + ] + }); +} + + string id = uuid:createRandomUuid(); time:Utc createdAt = time:utcNow(); From 8943b31c40eeba707aab940829724aec2e3ca85c Mon Sep 17 00:00:00 2001 From: Nuvindu Date: Thu, 11 Sep 2025 17:40:55 +0530 Subject: [PATCH 7/7] Enable docker server for anonymous access --- resources/server/compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/server/compose.yml b/resources/server/compose.yml index 815eab3..02614db 100644 --- a/resources/server/compose.yml +++ b/resources/server/compose.yml @@ -16,7 +16,7 @@ services: restart: on-failure:0 environment: QUERY_DEFAULTS_LIMIT: 25 - AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'false' + AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' AUTHENTICATION_APIKEY_ENABLED: 'true' AUTHENTICATION_APIKEY_ALLOWED_KEYS: 'mock-token' AUTHENTICATION_APIKEY_USERS: 'test-user'