From 3604d20ad3d5441e689132edec7b6b2c199b886d Mon Sep 17 00:00:00 2001 From: Will Jones Date: Mon, 4 Nov 2024 11:25:45 -0800 Subject: [PATCH] feat(python,node): support with_row_id in Python and remote (#1784) Needed to support hybrid search in Remote SDK. --- nodejs/__test__/table.test.ts | 11 +++++++++++ nodejs/lancedb/query.ts | 12 ++++++++++++ nodejs/src/query.rs | 10 ++++++++++ python/python/lancedb/query.py | 7 +++++++ python/python/tests/test_query.py | 6 ++++++ python/src/query.rs | 8 ++++++++ rust/lancedb/src/remote/table.rs | 6 ++++++ 7 files changed, 60 insertions(+) diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index 9c6a6d06..33d01858 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -425,6 +425,17 @@ describe("When creating an index", () => { expect(plan2).not.toMatch("LanceScan"); }); + it("should be able to query with row id", async () => { + const results = await tbl + .query() + .nearestTo(queryVec) + .withRowId() + .limit(1) + .toArray(); + expect(results.length).toBe(1); + expect(results[0]).toHaveProperty("_rowid"); + }); + it("should allow parameters to be specified", async () => { await tbl.createIndex("vec", { config: Index.ivfPq({ diff --git a/nodejs/lancedb/query.ts b/nodejs/lancedb/query.ts index 00e0b5fb..32d58e05 100644 --- a/nodejs/lancedb/query.ts +++ b/nodejs/lancedb/query.ts @@ -250,6 +250,18 @@ export class QueryBase return this; } + /** + * Whether to return the row id in the results. + * + * This column can be used to match results between different queries. For + * example, to match results from a full text search and a vector search in + * order to perform hybrid search. + */ + withRowId(): this { + this.doCall((inner: NativeQueryType) => inner.withRowId()); + return this; + } + protected nativeExecute( options?: Partial, ): Promise { diff --git a/nodejs/src/query.rs b/nodejs/src/query.rs index 6ae95142..448ca134 100644 --- a/nodejs/src/query.rs +++ b/nodejs/src/query.rs @@ -85,6 +85,11 @@ impl Query { self.inner = self.inner.clone().fast_search(); } + #[napi] + pub fn with_row_id(&mut self) { + self.inner = self.inner.clone().with_row_id(); + } + #[napi(catch_unwind)] pub async fn execute( &self, @@ -193,6 +198,11 @@ impl VectorQuery { self.inner = self.inner.clone().fast_search(); } + #[napi] + pub fn with_row_id(&mut self) { + self.inner = self.inner.clone().with_row_id(); + } + #[napi(catch_unwind)] pub async fn execute( &self, diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index 75d30270..09eaa414 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -1339,6 +1339,13 @@ class AsyncQueryBase(object): self._inner.fast_search() return self + def with_row_id(self) -> AsyncQuery: + """ + Include the _rowid column in the results. + """ + self._inner.with_row_id() + return self + def postfilter(self) -> AsyncQuery: """ If this is called then filtering will happen after the search instead of diff --git a/python/python/tests/test_query.py b/python/python/tests/test_query.py index 75733bf6..b3f0d26a 100644 --- a/python/python/tests/test_query.py +++ b/python/python/tests/test_query.py @@ -331,6 +331,12 @@ async def test_query_async(table_async: AsyncTable): # Also check an empty query await check_query(table_async.query().where("id < 0"), expected_num_rows=0) + # with row id + await check_query( + table_async.query().select(["id", "vector"]).with_row_id(), + expected_columns=["id", "vector", "_rowid"], + ) + @pytest.mark.asyncio async def test_query_to_arrow_async(table_async: AsyncTable): diff --git a/python/src/query.rs b/python/src/query.rs index e3b127ba..0f93f9ce 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -72,6 +72,10 @@ impl Query { self.inner = self.inner.clone().fast_search(); } + pub fn with_row_id(&mut self) { + self.inner = self.inner.clone().with_row_id(); + } + pub fn postfilter(&mut self) { self.inner = self.inner.clone().postfilter(); } @@ -158,6 +162,10 @@ impl VectorQuery { self.inner = self.inner.clone().fast_search(); } + pub fn with_row_id(&mut self) { + self.inner = self.inner.clone().with_row_id(); + } + pub fn column(&mut self, column: String) { self.inner = self.inner.clone().column(&column); } diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index 8aba5d3e..a8754cc3 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -167,6 +167,10 @@ impl RemoteTable { body["fast_search"] = serde_json::Value::Bool(true); } + if params.with_row_id { + body["with_row_id"] = serde_json::Value::Bool(true); + } + if let Some(full_text_search) = ¶ms.full_text_search { if full_text_search.wand_factor.is_some() { return Err(Error::NotSupported { @@ -1173,6 +1177,7 @@ mod tests { }, "k": 10, "vector": [], + "with_row_id": true, }); assert_eq!(body, expected_body); @@ -1195,6 +1200,7 @@ mod tests { FullTextSearchQuery::new("hello world".into()) .columns(Some(vec!["a".into(), "b".into()])), ) + .with_row_id() .limit(10) .execute() .await