mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Support hybrid search in both rust and node SDKs. - Adds a new rerankers package to rust LanceDB, with the implementation of the default RRF reranker - Adds a new hybrid package to lancedb, with some helper methods related to hybrid search such as normalizing scores and converting score column to rank columns - Adds capability to LanceDB VectorQuery to perform hybrid search if it has both a nearest vector and full text search parameters. - Adds wrappers for reranker implementations to nodejs SDK. Additional rerankers will be added in followup PRs https://github.com/lancedb/lancedb/issues/1921 --- Notes about how the rust rerankers are wrapped for calling from JS: I wanted to keep the core reranker logic, and the invocation of the reranker by the query code, in Rust. This aligns with the philosophy of the new node SDK where it's just a thin wrapper around Rust. However, I also wanted to have support for users who want to add custom rerankers written in Javascript. When we add a reranker to the query from Javascript, it adds a special Rust reranker that has a callback to the Javascript code (which could then turn around and call an underlying Rust reranker implementation if desired). This adds a bit of complexity, but overall I think it moves us in the right direction of having the majority of the query logic in the underlying Rust SDK while keeping the option open to support custom Javascript Rerankers.
80 lines
2.2 KiB
TypeScript
80 lines
2.2 KiB
TypeScript
// SPDX-License-Identifier: Apache-2.0
|
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
|
|
import { RecordBatch } from "apache-arrow";
|
|
import * as tmp from "tmp";
|
|
import { Connection, Index, Table, connect, makeArrowTable } from "../lancedb";
|
|
import { RRFReranker } from "../lancedb/rerankers";
|
|
|
|
describe("rerankers", function () {
|
|
let tmpDir: tmp.DirResult;
|
|
let conn: Connection;
|
|
let table: Table;
|
|
|
|
beforeEach(async () => {
|
|
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
|
conn = await connect(tmpDir.name);
|
|
table = await conn.createTable("mytable", [
|
|
{ vector: [0.1, 0.1], text: "dog" },
|
|
{ vector: [0.2, 0.2], text: "cat" },
|
|
]);
|
|
await table.createIndex("text", {
|
|
config: Index.fts(),
|
|
replace: true,
|
|
});
|
|
});
|
|
|
|
it("will query with the custom reranker", async function () {
|
|
const expectedResult = [
|
|
{
|
|
text: "albert",
|
|
// biome-ignore lint/style/useNamingConvention: this is the lance field name
|
|
_relevance_score: 0.99,
|
|
},
|
|
];
|
|
class MyCustomReranker {
|
|
async rerankHybrid(
|
|
_query: string,
|
|
_vecResults: RecordBatch,
|
|
_ftsResults: RecordBatch,
|
|
): Promise<RecordBatch> {
|
|
// no reranker logic, just return some static data
|
|
const table = makeArrowTable(expectedResult);
|
|
return table.batches[0];
|
|
}
|
|
}
|
|
|
|
let result = await table
|
|
.query()
|
|
.nearestTo([0.1, 0.1])
|
|
.fullTextSearch("dog")
|
|
.rerank(new MyCustomReranker())
|
|
.select(["text"])
|
|
.limit(5)
|
|
.toArray();
|
|
|
|
result = JSON.parse(JSON.stringify(result)); // convert StructRow to Object
|
|
expect(result).toEqual([
|
|
{
|
|
text: "albert",
|
|
// biome-ignore lint/style/useNamingConvention: this is the lance field name
|
|
_relevance_score: 0.99,
|
|
},
|
|
]);
|
|
});
|
|
|
|
it("will query with RRFReranker", async function () {
|
|
// smoke test to see if the Rust wrapping Typescript is wired up correctly
|
|
const result = await table
|
|
.query()
|
|
.nearestTo([0.1, 0.1])
|
|
.fullTextSearch("dog")
|
|
.rerank(await RRFReranker.create())
|
|
.select(["text"])
|
|
.limit(5)
|
|
.toArray();
|
|
|
|
expect(result).toHaveLength(2);
|
|
});
|
|
});
|