mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 06:39:57 +00:00
Reviving #1966. Closes #1938 The `search()` method can apply embeddings for the user. This simplifies hybrid search, so instead of writing: ```python vector_query = embeddings.compute_query_embeddings("flower moon")[0] await ( async_tbl.query() .nearest_to(vector_query) .nearest_to_text("flower moon") .to_pandas() ) ``` You can write: ```python await (await async_tbl.search("flower moon", query_type="hybrid")).to_pandas() ``` Unfortunately, we had to do a double-await here because `search()` needs to be async. This is because it often needs to do IO to retrieve and run an embedding function.
66 lines
1.9 KiB
Python
66 lines
1.9 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
|
|
import shutil
|
|
import pytest
|
|
|
|
# --8<-- [start:imports]
|
|
import lancedb
|
|
import numpy as np
|
|
# --8<-- [end:imports]
|
|
|
|
shutil.rmtree("data/distance_range_demo", ignore_errors=True)
|
|
|
|
|
|
def test_binary_vector():
|
|
# --8<-- [start:sync_distance_range]
|
|
db = lancedb.connect("data/distance_range_demo")
|
|
data = [
|
|
{
|
|
"id": i,
|
|
"vector": np.random.random(256),
|
|
}
|
|
for i in range(1024)
|
|
]
|
|
tbl = db.create_table("my_table", data=data)
|
|
query = np.random.random(256)
|
|
|
|
# Search for the vectors within the range of [0.1, 0.5)
|
|
tbl.search(query).distance_range(0.1, 0.5).to_arrow()
|
|
|
|
# Search for the vectors with the distance less than 0.5
|
|
tbl.search(query).distance_range(upper_bound=0.5).to_arrow()
|
|
|
|
# Search for the vectors with the distance greater or equal to 0.1
|
|
tbl.search(query).distance_range(lower_bound=0.1).to_arrow()
|
|
|
|
# --8<-- [end:sync_distance_range]
|
|
db.drop_table("my_table")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_binary_vector_async():
|
|
# --8<-- [start:async_distance_range]
|
|
db = await lancedb.connect_async("data/distance_range_demo")
|
|
data = [
|
|
{
|
|
"id": i,
|
|
"vector": np.random.random(256),
|
|
}
|
|
for i in range(1024)
|
|
]
|
|
tbl = await db.create_table("my_table", data=data)
|
|
query = np.random.random(256)
|
|
|
|
# Search for the vectors within the range of [0.1, 0.5)
|
|
await (await tbl.search(query)).distance_range(0.1, 0.5).to_arrow()
|
|
|
|
# Search for the vectors with the distance less than 0.5
|
|
await (await tbl.search(query)).distance_range(upper_bound=0.5).to_arrow()
|
|
|
|
# Search for the vectors with the distance greater or equal to 0.1
|
|
await (await tbl.search(query)).distance_range(lower_bound=0.1).to_arrow()
|
|
|
|
# --8<-- [end:async_distance_range]
|
|
await db.drop_table("my_table")
|