Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions .github/workflows/test-cassandra.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
name: Test Cassandra

on:
workflow_run:
workflows:
- Test
types:
- completed
push:
branches: [master]
pull_request:
branches: [master]

jobs:
build:
Expand Down
9 changes: 4 additions & 5 deletions .github/workflows/test-mongo.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
name: Test Mongo

on:
workflow_run:
workflows:
- Test
types:
- completed
push:
branches: [master]
pull_request:
branches: [master]

jobs:
build:
Expand Down
9 changes: 4 additions & 5 deletions .github/workflows/test-redis.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
name: Test Redis

on:
workflow_run:
workflows:
- Test
types:
- completed
push:
branches: [master]
pull_request:
branches: [master]

jobs:
build:
Expand Down
6 changes: 6 additions & 0 deletions datasketch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,15 @@
WeightedMinHashLSH = MinHashLSH
WeightedMinHashLSHForest = MinHashLSHForest

# Optional async export (requires motor or redis.asyncio)
try:
from datasketch.aio import AsyncMinHashLSH
except ImportError:
AsyncMinHashLSH = None # type: ignore[misc,assignment]

__all__ = [
"HNSW",
"AsyncMinHashLSH",
"HyperLogLog",
"HyperLogLogPlusPlus",
"LeanMinHash",
Expand Down
36 changes: 36 additions & 0 deletions datasketch/aio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Async MinHash LSH module.

This module provides asynchronous implementations of MinHash LSH for use with
async storage backends like MongoDB (via motor) and Redis (via redis.asyncio).

Example:
.. code-block:: python

from datasketch.aio import AsyncMinHashLSH
from datasketch import MinHash

async def main():
async with AsyncMinHashLSH(
storage_config={"type": "aiomongo", "mongo": {"host": "localhost", "port": 27017}},
threshold=0.5,
num_perm=128,
prepickle=True, # Enable string keys
) as lsh:
m = MinHash(num_perm=128)
m.update(b"data")
await lsh.insert("key", m)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The example code will raise a TypeError because you are inserting a string key "key" when using aiomongo storage. By default (prepickle=False), aiomongo storage requires keys to be bytes.

Suggested change
await lsh.insert("key", m)
await lsh.insert(b"key", m)

result = await lsh.query(m)

"""

from datasketch.aio.lsh import (
AsyncMinHashLSH,
AsyncMinHashLSHDeleteSession,
AsyncMinHashLSHInsertionSession,
)

__all__ = [
"AsyncMinHashLSH",
"AsyncMinHashLSHDeleteSession",
"AsyncMinHashLSHInsertionSession",
]
Loading
Loading