Files
lancedb/python/python/tests/docs/test_distance_range.py
Will Jones ecdee4d2b1 feat(python): add search() method to async API (#2049)
Reviving #1966.

Closes #1938

The `search()` method can apply embeddings for the user. This simplifies
hybrid search, so instead of writing:

```python
vector_query = embeddings.compute_query_embeddings("flower moon")[0]
await (
    async_tbl.query()
    .nearest_to(vector_query)
    .nearest_to_text("flower moon")
    .to_pandas()
)
```

You can write:

```python
await (await async_tbl.search("flower moon", query_type="hybrid")).to_pandas()
```

Unfortunately, we had to do a double-await here because `search()` needs
to be async. This is because it often needs to do IO to retrieve and run
an embedding function.
2025-02-24 14:19:25 -08:00

66 lines
1.9 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import shutil
import pytest
# --8<-- [start:imports]
import lancedb
import numpy as np
# --8<-- [end:imports]
shutil.rmtree("data/distance_range_demo", ignore_errors=True)
def test_binary_vector():
# --8<-- [start:sync_distance_range]
db = lancedb.connect("data/distance_range_demo")
data = [
{
"id": i,
"vector": np.random.random(256),
}
for i in range(1024)
]
tbl = db.create_table("my_table", data=data)
query = np.random.random(256)
# Search for the vectors within the range of [0.1, 0.5)
tbl.search(query).distance_range(0.1, 0.5).to_arrow()
# Search for the vectors with the distance less than 0.5
tbl.search(query).distance_range(upper_bound=0.5).to_arrow()
# Search for the vectors with the distance greater or equal to 0.1
tbl.search(query).distance_range(lower_bound=0.1).to_arrow()
# --8<-- [end:sync_distance_range]
db.drop_table("my_table")
@pytest.mark.asyncio
async def test_binary_vector_async():
# --8<-- [start:async_distance_range]
db = await lancedb.connect_async("data/distance_range_demo")
data = [
{
"id": i,
"vector": np.random.random(256),
}
for i in range(1024)
]
tbl = await db.create_table("my_table", data=data)
query = np.random.random(256)
# Search for the vectors within the range of [0.1, 0.5)
await (await tbl.search(query)).distance_range(0.1, 0.5).to_arrow()
# Search for the vectors with the distance less than 0.5
await (await tbl.search(query)).distance_range(upper_bound=0.5).to_arrow()
# Search for the vectors with the distance greater or equal to 0.1
await (await tbl.search(query)).distance_range(lower_bound=0.1).to_arrow()
# --8<-- [end:async_distance_range]
await db.drop_table("my_table")