feat: support remote empty query (#1828)

Support sending empty query types to remote lancedb. also include offset
and limit, where were previously omitted.
This commit is contained in:
Rob Meng
2024-11-13 23:04:52 -05:00
committed by GitHub
parent abd75e0ead
commit b724b1a01f
4 changed files with 31 additions and 9 deletions

View File

@@ -943,12 +943,16 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
class LanceEmptyQueryBuilder(LanceQueryBuilder):
def to_arrow(self) -> pa.Table:
ds = self._table.to_lance()
return ds.to_table(
query = Query(
columns=self._columns,
filter=self._where,
limit=self._limit,
k=self._limit or 10,
with_row_id=self._with_row_id,
vector=[],
# not actually respected in remote query
offset=self._offset or 0,
)
return self._table._execute_query(query).read_all()
def rerank(self, reranker: Reranker) -> LanceEmptyQueryBuilder:
"""Rerank the results using the specified reranker.

View File

@@ -327,10 +327,6 @@ class RemoteTable(Table):
- and also the "_distance" column which is the distance between the query
vector and the returned vector.
"""
# empty query builder is not supported in saas, raise error
if query is None and query_type != "hybrid":
raise ValueError("Empty query is not supported")
return LanceQueryBuilder.create(
self,
query,

View File

@@ -197,6 +197,23 @@ def test_query_sync_minimal():
assert data == expected
def test_query_sync_empty_query():
def handler(body):
assert body == {
"k": 10,
"filter": "true",
"vector": [],
"columns": ["id"],
}
return pa.table({"id": [1, 2, 3]})
with query_test_table(handler) as table:
data = table.search(None).where("true").select(["id"]).limit(10).to_list()
expected = [{"id": 1}, {"id": 2}, {"id": 3}]
assert data == expected
def test_query_sync_maximal():
def handler(body):
assert body == {

View File

@@ -892,10 +892,15 @@ def test_empty_query(db):
table = LanceTable.create(db, "my_table2", data=[{"id": i} for i in range(100)])
df = table.search().select(["id"]).to_pandas()
assert len(df) == 10
# None is the same as default
df = table.search().select(["id"]).limit(None).to_pandas()
assert len(df) == 100
assert len(df) == 10
# invalid limist is the same as None, wihch is the same as default
df = table.search().select(["id"]).limit(-1).to_pandas()
assert len(df) == 100
assert len(df) == 10
# valid limit should work
df = table.search().select(["id"]).limit(42).to_pandas()
assert len(df) == 42
def test_search_with_schema_inf_single_vector(db):