feat(python,node): support with_row_id in Python and remote (#1784)

Needed to support hybrid search in Remote SDK.
This commit is contained in:
Will Jones
2024-11-04 11:25:45 -08:00
committed by GitHub
parent 9708d829a9
commit 3604d20ad3
7 changed files with 60 additions and 0 deletions

View File

@@ -425,6 +425,17 @@ describe("When creating an index", () => {
expect(plan2).not.toMatch("LanceScan");
});
it("should be able to query with row id", async () => {
const results = await tbl
.query()
.nearestTo(queryVec)
.withRowId()
.limit(1)
.toArray();
expect(results.length).toBe(1);
expect(results[0]).toHaveProperty("_rowid");
});
it("should allow parameters to be specified", async () => {
await tbl.createIndex("vec", {
config: Index.ivfPq({

View File

@@ -250,6 +250,18 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
return this;
}
/**
* Whether to return the row id in the results.
*
* This column can be used to match results between different queries. For
* example, to match results from a full text search and a vector search in
* order to perform hybrid search.
*/
withRowId(): this {
this.doCall((inner: NativeQueryType) => inner.withRowId());
return this;
}
protected nativeExecute(
options?: Partial<QueryExecutionOptions>,
): Promise<NativeBatchIterator> {

View File

@@ -85,6 +85,11 @@ impl Query {
self.inner = self.inner.clone().fast_search();
}
#[napi]
pub fn with_row_id(&mut self) {
self.inner = self.inner.clone().with_row_id();
}
#[napi(catch_unwind)]
pub async fn execute(
&self,
@@ -193,6 +198,11 @@ impl VectorQuery {
self.inner = self.inner.clone().fast_search();
}
#[napi]
pub fn with_row_id(&mut self) {
self.inner = self.inner.clone().with_row_id();
}
#[napi(catch_unwind)]
pub async fn execute(
&self,

View File

@@ -1339,6 +1339,13 @@ class AsyncQueryBase(object):
self._inner.fast_search()
return self
def with_row_id(self) -> AsyncQuery:
"""
Include the _rowid column in the results.
"""
self._inner.with_row_id()
return self
def postfilter(self) -> AsyncQuery:
"""
If this is called then filtering will happen after the search instead of

View File

@@ -331,6 +331,12 @@ async def test_query_async(table_async: AsyncTable):
# Also check an empty query
await check_query(table_async.query().where("id < 0"), expected_num_rows=0)
# with row id
await check_query(
table_async.query().select(["id", "vector"]).with_row_id(),
expected_columns=["id", "vector", "_rowid"],
)
@pytest.mark.asyncio
async def test_query_to_arrow_async(table_async: AsyncTable):

View File

@@ -72,6 +72,10 @@ impl Query {
self.inner = self.inner.clone().fast_search();
}
pub fn with_row_id(&mut self) {
self.inner = self.inner.clone().with_row_id();
}
pub fn postfilter(&mut self) {
self.inner = self.inner.clone().postfilter();
}
@@ -158,6 +162,10 @@ impl VectorQuery {
self.inner = self.inner.clone().fast_search();
}
pub fn with_row_id(&mut self) {
self.inner = self.inner.clone().with_row_id();
}
pub fn column(&mut self, column: String) {
self.inner = self.inner.clone().column(&column);
}

View File

@@ -167,6 +167,10 @@ impl<S: HttpSend> RemoteTable<S> {
body["fast_search"] = serde_json::Value::Bool(true);
}
if params.with_row_id {
body["with_row_id"] = serde_json::Value::Bool(true);
}
if let Some(full_text_search) = &params.full_text_search {
if full_text_search.wand_factor.is_some() {
return Err(Error::NotSupported {
@@ -1173,6 +1177,7 @@ mod tests {
},
"k": 10,
"vector": [],
"with_row_id": true,
});
assert_eq!(body, expected_body);
@@ -1195,6 +1200,7 @@ mod tests {
FullTextSearchQuery::new("hello world".into())
.columns(Some(vec!["a".into(), "b".into()])),
)
.with_row_id()
.limit(10)
.execute()
.await