feat: enable prefilter in node js (#675)

enable prefiltering in node js, both native and remote
This commit is contained in:
Rob Meng
2023-12-01 16:49:10 -05:00
committed by Weston Pace
parent c1c3083b74
commit 59c25574f0
5 changed files with 30 additions and 3 deletions

View File

@@ -32,6 +32,7 @@ export class Query<T = number[]> {
private _select?: string[]
private _filter?: string
private _metricType?: MetricType
private _prefilter: boolean
protected readonly _embeddings?: EmbeddingFunction<T>
constructor (query: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
@@ -44,6 +45,7 @@ export class Query<T = number[]> {
this._filter = undefined
this._metricType = undefined
this._embeddings = embeddings
this._prefilter = false
}
/***
@@ -102,6 +104,11 @@ export class Query<T = number[]> {
return this
}
prefilter (value: boolean): Query<T> {
this._prefilter = value
return this
}
/**
* Execute the query and return the results as an Array of Objects
*/

View File

@@ -38,6 +38,7 @@ export class HttpLancedbClient {
vector: number[],
k: number,
nprobes: number,
prefilter: boolean,
refineFactor?: number,
columns?: string[],
filter?: string
@@ -50,7 +51,8 @@ export class HttpLancedbClient {
nprobes,
refineFactor,
columns,
filter
filter,
prefilter
},
{
headers: {

View File

@@ -156,7 +156,8 @@ export class RemoteQuery<T = number[]> extends Query<T> {
(this as any)._nprobes,
(this as any)._refineFactor,
(this as any)._select,
(this as any)._filter
(this as any)._filter,
(this as any)._prefilter
)
return data.toArray().map((entry: Record<string, unknown>) => {

View File

@@ -102,6 +102,20 @@ describe('LanceDB client', function () {
assertResults(results)
})
it('should correctly process prefilter/postfilter', async function () {
const uri = await createTestDB(16, 300)
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
// post filter should return less than the limit
let results = await table.search(new Array(16).fill(0.1)).limit(10).filter('id >= 10').prefilter(false).execute()
assert.isTrue(results.length < 10)
// pre filter should return exactly the limit
results = await table.search(new Array(16).fill(0.1)).limit(10).filter('id >= 10').prefilter(true).execute()
assert.isTrue(results.length === 10)
})
it('select only a subset of columns', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)

View File

@@ -48,6 +48,8 @@ impl JsQuery {
.map(|s| s.value(&mut cx))
.map(|s| MetricType::try_from(s.as_str()).unwrap());
let prefilter = query_obj.get::<JsBoolean, _, _>(&mut cx, "_prefilter")?.value(&mut cx);
let is_electron = cx
.argument::<JsBoolean>(1)
.or_throw(&mut cx)?
@@ -69,7 +71,8 @@ impl JsQuery {
.nprobes(nprobes)
.filter(filter)
.metric_type(metric_type)
.select(select);
.select(select)
.prefilter(prefilter);
let record_batch_stream = builder.execute();
let results = record_batch_stream
.and_then(|stream| {