add create index to nodejs client (#89)

This commit is contained in:
gsilvestrin
2023-05-24 16:45:58 -06:00
committed by GitHub
parent 06cb7b6458
commit f923cfe47f
11 changed files with 505 additions and 8 deletions

View File

@@ -21,7 +21,7 @@ import {
import { fromRecordsToBuffer } from './arrow'
// eslint-disable-next-line @typescript-eslint/no-var-requires
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd } = require('../native.js')
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd, tableCreateVectorIndex } = require('../native.js')
/**
* Connect to a LanceDB instance at the given URI
@@ -118,8 +118,62 @@ export class Table {
async overwrite (data: Array<Record<string, unknown>>): Promise<number> {
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Overwrite.toString())
}
async create_index (indexParams: VectorIndexParams): Promise<any> {
return tableCreateVectorIndex.call(this._tbl, indexParams)
}
}
interface IvfPQIndexConfig {
/**
* The column to be indexed
*/
column?: string
/**
* A unique name for the index
*/
index_name?: string
/**
* Metric type, L2 or Cosine
*/
metric_type?: MetricType
/**
* The number of partitions this index
*/
num_partitions?: number
/**
* The max number of iterations for kmeans training.
*/
max_iters?: number
/**
* Train as optimized product quantization.
*/
use_opq?: boolean
/**
* Number of subvectors to build PQ code
*/
num_sub_vectors?: number
/**
* The number of bits to present one PQ centroid.
*/
num_bits?: number
/**
* Max number of iterations to train OPQ, if `use_opq` is true.
*/
max_opq_iters?: number
type: 'ivf_pq'
}
export type VectorIndexParams = IvfPQIndexConfig
/**
* A builder for nearest neighbor queries for LanceDB.
*/

View File

@@ -68,7 +68,7 @@ describe('LanceDB client', function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
const results = await table.search([0.1, 0.3]).filter('id == 2').execute()
const results = await table.search([0.1, 0.1]).filter('id == 2').execute()
assert.equal(results.length, 1)
assert.equal(results[0].id, 2)
})
@@ -131,6 +131,15 @@ describe('LanceDB client', function () {
assert.equal(resultsAdd.length, 2)
})
})
describe('when creating a vector index', function () {
it('overwrite all records in a table', async function () {
const uri = await createTestDB(32, 300)
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
await table.create_index({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2 })
}).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow
})
})
describe('Query object', function () {
@@ -147,14 +156,18 @@ describe('Query object', function () {
})
})
async function createTestDB (): Promise<string> {
async function createTestDB (numDimensions: number = 2, numRows: number = 2): Promise<string> {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
const data = [
{ id: 1, vector: [0.1, 0.2], name: 'foo', price: 10, is_active: true },
{ id: 2, vector: [1.1, 1.2], name: 'bar', price: 50, is_active: false }
]
const data = []
for (let i = 0; i < numRows; i++) {
const vector = []
for (let j = 0; j < numDimensions; j++) {
vector.push(i + (j * 0.1))
}
data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector })
}
await con.createTable('vectors', data)
return dir