mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-07 20:32:59 +00:00
Support hybrid search in both rust and node SDKs. - Adds a new rerankers package to rust LanceDB, with the implementation of the default RRF reranker - Adds a new hybrid package to lancedb, with some helper methods related to hybrid search such as normalizing scores and converting score column to rank columns - Adds capability to LanceDB VectorQuery to perform hybrid search if it has both a nearest vector and full text search parameters. - Adds wrappers for reranker implementations to nodejs SDK. Additional rerankers will be added in followup PRs https://github.com/lancedb/lancedb/issues/1921 --- Notes about how the rust rerankers are wrapped for calling from JS: I wanted to keep the core reranker logic, and the invocation of the reranker by the query code, in Rust. This aligns with the philosophy of the new node SDK where it's just a thin wrapper around Rust. However, I also wanted to have support for users who want to add custom rerankers written in Javascript. When we add a reranker to the query from Javascript, it adds a special Rust reranker that has a callback to the Javascript code (which could then turn around and call an underlying Rust reranker implementation if desired). This adds a bit of complexity, but overall I think it moves us in the right direction of having the majority of the query logic in the underlying Rust SDK while keeping the option open to support custom Javascript Rerankers.
41 lines
1.1 KiB
TypeScript
41 lines
1.1 KiB
TypeScript
// SPDX-License-Identifier: Apache-2.0
|
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
|
|
import { RecordBatch } from "apache-arrow";
|
|
import { fromBufferToRecordBatch, fromRecordBatchToBuffer } from "../arrow";
|
|
import { RrfReranker as NativeRRFReranker } from "../native";
|
|
|
|
/**
|
|
* Reranks the results using the Reciprocal Rank Fusion (RRF) algorithm.
|
|
*
|
|
* Internally this uses the Rust implementation
|
|
*/
|
|
export class RRFReranker {
|
|
private inner: NativeRRFReranker;
|
|
|
|
constructor(inner: NativeRRFReranker) {
|
|
this.inner = inner;
|
|
}
|
|
|
|
public static async create(k: number = 60) {
|
|
return new RRFReranker(
|
|
await NativeRRFReranker.tryNew(new Float32Array([k])),
|
|
);
|
|
}
|
|
|
|
async rerankHybrid(
|
|
query: string,
|
|
vecResults: RecordBatch,
|
|
ftsResults: RecordBatch,
|
|
): Promise<RecordBatch> {
|
|
const buffer = await this.inner.rerankHybrid(
|
|
query,
|
|
await fromRecordBatchToBuffer(vecResults),
|
|
await fromRecordBatchToBuffer(ftsResults),
|
|
);
|
|
const recordBatch = await fromBufferToRecordBatch(buffer);
|
|
|
|
return recordBatch as RecordBatch;
|
|
}
|
|
}
|