feat: enable prefilter in node js (#675)

enable prefiltering in node js, both native and remote
This commit is contained in:
Rob Meng
2023-12-01 16:49:10 -05:00
committed by GitHub
parent a2a8f9615e
commit 72765d8e1a
5 changed files with 30 additions and 3 deletions

View File

@@ -32,6 +32,7 @@ export class Query<T = number[]> {
private _select?: string[] private _select?: string[]
private _filter?: string private _filter?: string
private _metricType?: MetricType private _metricType?: MetricType
private _prefilter: boolean
protected readonly _embeddings?: EmbeddingFunction<T> protected readonly _embeddings?: EmbeddingFunction<T>
constructor (query: T, tbl?: any, embeddings?: EmbeddingFunction<T>) { constructor (query: T, tbl?: any, embeddings?: EmbeddingFunction<T>) {
@@ -44,6 +45,7 @@ export class Query<T = number[]> {
this._filter = undefined this._filter = undefined
this._metricType = undefined this._metricType = undefined
this._embeddings = embeddings this._embeddings = embeddings
this._prefilter = false
} }
/*** /***
@@ -102,6 +104,11 @@ export class Query<T = number[]> {
return this return this
} }
prefilter (value: boolean): Query<T> {
this._prefilter = value
return this
}
/** /**
* Execute the query and return the results as an Array of Objects * Execute the query and return the results as an Array of Objects
*/ */

View File

@@ -38,6 +38,7 @@ export class HttpLancedbClient {
vector: number[], vector: number[],
k: number, k: number,
nprobes: number, nprobes: number,
prefilter: boolean,
refineFactor?: number, refineFactor?: number,
columns?: string[], columns?: string[],
filter?: string filter?: string
@@ -50,7 +51,8 @@ export class HttpLancedbClient {
nprobes, nprobes,
refineFactor, refineFactor,
columns, columns,
filter filter,
prefilter
}, },
{ {
headers: { headers: {

View File

@@ -156,7 +156,8 @@ export class RemoteQuery<T = number[]> extends Query<T> {
(this as any)._nprobes, (this as any)._nprobes,
(this as any)._refineFactor, (this as any)._refineFactor,
(this as any)._select, (this as any)._select,
(this as any)._filter (this as any)._filter,
(this as any)._prefilter
) )
return data.toArray().map((entry: Record<string, unknown>) => { return data.toArray().map((entry: Record<string, unknown>) => {

View File

@@ -102,6 +102,20 @@ describe('LanceDB client', function () {
assertResults(results) assertResults(results)
}) })
it('should correctly process prefilter/postfilter', async function () {
const uri = await createTestDB(16, 300)
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 })
// post filter should return less than the limit
let results = await table.search(new Array(16).fill(0.1)).limit(10).filter('id >= 10').prefilter(false).execute()
assert.isTrue(results.length < 10)
// pre filter should return exactly the limit
results = await table.search(new Array(16).fill(0.1)).limit(10).filter('id >= 10').prefilter(true).execute()
assert.isTrue(results.length === 10)
})
it('select only a subset of columns', async function () { it('select only a subset of columns', async function () {
const uri = await createTestDB() const uri = await createTestDB()
const con = await lancedb.connect(uri) const con = await lancedb.connect(uri)

View File

@@ -48,6 +48,8 @@ impl JsQuery {
.map(|s| s.value(&mut cx)) .map(|s| s.value(&mut cx))
.map(|s| MetricType::try_from(s.as_str()).unwrap()); .map(|s| MetricType::try_from(s.as_str()).unwrap());
let prefilter = query_obj.get::<JsBoolean, _, _>(&mut cx, "_prefilter")?.value(&mut cx);
let is_electron = cx let is_electron = cx
.argument::<JsBoolean>(1) .argument::<JsBoolean>(1)
.or_throw(&mut cx)? .or_throw(&mut cx)?
@@ -69,7 +71,8 @@ impl JsQuery {
.nprobes(nprobes) .nprobes(nprobes)
.filter(filter) .filter(filter)
.metric_type(metric_type) .metric_type(metric_type)
.select(select); .select(select)
.prefilter(prefilter);
let record_batch_stream = builder.execute(); let record_batch_stream = builder.execute();
let results = record_batch_stream let results = record_batch_stream
.and_then(|stream| { .and_then(|stream| {