feat: support to sepcify ef search param (#1844)

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
This commit is contained in:
BubbleCal
2024-11-19 23:12:25 +08:00
committed by GitHub
parent f2e3989831
commit b2f88f0b29
10 changed files with 165 additions and 0 deletions

View File

@@ -477,6 +477,54 @@ describe("When creating an index", () => {
expect(rst.numRows).toBe(1);
});
it("should create and search IVF_HNSW indices", async () => {
await tbl.createIndex("vec", {
config: Index.hnswSq(),
});
// check index directory
const indexDir = path.join(tmpDir.name, "test.lance", "_indices");
expect(fs.readdirSync(indexDir)).toHaveLength(1);
const indices = await tbl.listIndices();
expect(indices.length).toBe(1);
expect(indices[0]).toEqual({
name: "vec_idx",
indexType: "IvfHnswSq",
columns: ["vec"],
});
// Search without specifying the column
let rst = await tbl
.query()
.limit(2)
.nearestTo(queryVec)
.distanceType("dot")
.toArrow();
expect(rst.numRows).toBe(2);
// Search using `vectorSearch`
rst = await tbl.vectorSearch(queryVec).limit(2).toArrow();
expect(rst.numRows).toBe(2);
// Search with specifying the column
const rst2 = await tbl
.query()
.limit(2)
.nearestTo(queryVec)
.column("vec")
.toArrow();
expect(rst2.numRows).toBe(2);
expect(rst.toString()).toEqual(rst2.toString());
// test offset
rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
expect(rst.numRows).toBe(1);
// test ef
rst = await tbl.query().limit(2).nearestTo(queryVec).ef(100).toArrow();
expect(rst.numRows).toBe(2);
});
it("should be able to query unindexed data", async () => {
await tbl.createIndex("vec");
await tbl.add([

View File

@@ -385,6 +385,20 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
return this;
}
/**
* Set the number of candidates to consider during the search
*
* This argument is only used when the vector column has an HNSW index.
* If there is no index then this value is ignored.
*
* Increasing this value will increase the recall of your query but will
* also increase the latency of your query. The default value is 1.5*limit.
*/
ef(ef: number): VectorQuery {
super.doCall((inner) => inner.ef(ef));
return this;
}
/**
* Set the vector column to query
*

View File

@@ -167,6 +167,11 @@ impl VectorQuery {
self.inner = self.inner.clone().nprobes(nprobe as usize);
}
#[napi]
pub fn ef(&mut self, ef: u32) {
self.inner = self.inner.clone().ef(ef as usize);
}
#[napi]
pub fn bypass_vector_index(&mut self) {
self.inner = self.inner.clone().bypass_vector_index()