mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-08 21:02:58 +00:00
feat(napi): Provide a new createIndex API in the napi SDK. (#857)
This commit is contained in:
@@ -18,5 +18,5 @@ module.exports = {
|
||||
"@typescript-eslint/method-signature-style": "off",
|
||||
"@typescript-eslint/no-explicit-any": "off",
|
||||
},
|
||||
ignorePatterns: ["node_modules/", "dist/", "build/"],
|
||||
ignorePatterns: ["node_modules/", "dist/", "build/", "vectordb/native.*"],
|
||||
};
|
||||
|
||||
@@ -17,6 +17,7 @@ napi = { version = "2.14", default-features = false, features = [
|
||||
napi-derive = "2.14"
|
||||
vectordb = { path = "../rust/vectordb" }
|
||||
lance.workspace = true
|
||||
lance-linalg.workspace = true
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = "2.1"
|
||||
|
||||
@@ -1,3 +1,24 @@
|
||||
# (New) LanceDB NodeJS SDK
|
||||
|
||||
It will replace the NodeJS SDK when it is ready.
|
||||
|
||||
|
||||
## Development
|
||||
|
||||
```sh
|
||||
npm run build
|
||||
npm t
|
||||
```
|
||||
|
||||
Generating docs
|
||||
|
||||
```
|
||||
npm run docs
|
||||
|
||||
cd ../docs
|
||||
# Asssume the virtual environment was created
|
||||
# python3 -m venv venv
|
||||
# pip install -r requirements.txt
|
||||
. ./venv/bin/activate
|
||||
mkdocs build
|
||||
```
|
||||
|
||||
99
nodejs/__test__/table.test.ts
Normal file
99
nodejs/__test__/table.test.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import * as os from "os";
|
||||
import * as path from "path";
|
||||
import * as fs from "fs";
|
||||
|
||||
import { connect } from "../dist";
|
||||
import { Schema, Field, Float32, Int32, FixedSizeList } from "apache-arrow";
|
||||
import { makeArrowTable } from "../dist/arrow";
|
||||
|
||||
describe("Test creating index", () => {
|
||||
let tmpDir: string;
|
||||
const schema = new Schema([
|
||||
new Field("id", new Int32(), true),
|
||||
new Field("vec", new FixedSizeList(32, new Field("item", new Float32()))),
|
||||
]);
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "index-"));
|
||||
});
|
||||
|
||||
test("create vector index with no column", async () => {
|
||||
const db = await connect(tmpDir);
|
||||
const data = makeArrowTable(
|
||||
Array(300)
|
||||
.fill(1)
|
||||
.map((_, i) => ({
|
||||
id: i,
|
||||
vec: Array(32)
|
||||
.fill(1)
|
||||
.map(() => Math.random()),
|
||||
})),
|
||||
{
|
||||
schema,
|
||||
}
|
||||
);
|
||||
const tbl = await db.createTable("test", data);
|
||||
await tbl.createIndex().build();
|
||||
|
||||
// check index directory
|
||||
const indexDir = path.join(tmpDir, "test.lance", "_indices");
|
||||
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||
// TODO: check index type.
|
||||
});
|
||||
|
||||
test("no vector column available", async () => {
|
||||
const db = await connect(tmpDir);
|
||||
const tbl = await db.createTable(
|
||||
"no_vec",
|
||||
makeArrowTable([
|
||||
{ id: 1, val: 2 },
|
||||
{ id: 2, val: 3 },
|
||||
])
|
||||
);
|
||||
await expect(tbl.createIndex().build()).rejects.toThrow(
|
||||
"No vector column found"
|
||||
);
|
||||
|
||||
await tbl.createIndex("val").build();
|
||||
const indexDir = path.join(tmpDir, "no_vec.lance", "_indices");
|
||||
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||
});
|
||||
|
||||
test("create scalar index", async () => {
|
||||
const db = await connect(tmpDir);
|
||||
const data = makeArrowTable(
|
||||
Array(300)
|
||||
.fill(1)
|
||||
.map((_, i) => ({
|
||||
id: i,
|
||||
vec: Array(32)
|
||||
.fill(1)
|
||||
.map(() => Math.random()),
|
||||
})),
|
||||
{
|
||||
schema,
|
||||
}
|
||||
);
|
||||
const tbl = await db.createTable("test", data);
|
||||
await tbl.createIndex("id").build();
|
||||
|
||||
// check index directory
|
||||
const indexDir = path.join(tmpDir, "test.lance", "_indices");
|
||||
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||
// TODO: check index type.
|
||||
});
|
||||
});
|
||||
101
nodejs/src/index.rs
Normal file
101
nodejs/src/index.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use lance_linalg::distance::MetricType as LanceMetricType;
|
||||
use napi_derive::napi;
|
||||
|
||||
#[napi]
|
||||
pub enum IndexType {
|
||||
Scalar,
|
||||
IvfPq,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub enum MetricType {
|
||||
L2,
|
||||
Cosine,
|
||||
Dot,
|
||||
}
|
||||
|
||||
impl From<MetricType> for LanceMetricType {
|
||||
fn from(metric: MetricType) -> Self {
|
||||
match metric {
|
||||
MetricType::L2 => Self::L2,
|
||||
MetricType::Cosine => Self::Cosine,
|
||||
MetricType::Dot => Self::Dot,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub struct IndexBuilder {
|
||||
inner: vectordb::index::IndexBuilder,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl IndexBuilder {
|
||||
pub fn new(tbl: &dyn vectordb::Table) -> Self {
|
||||
let inner = tbl.create_index(&[]);
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn replace(&mut self, v: bool) {
|
||||
self.inner.replace(v);
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn column(&mut self, c: String) {
|
||||
self.inner.columns(&[c.as_str()]);
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn name(&mut self, name: String) {
|
||||
self.inner.name(name.as_str());
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn ivf_pq(
|
||||
&mut self,
|
||||
metric_type: Option<MetricType>,
|
||||
num_partitions: Option<u32>,
|
||||
num_sub_vectors: Option<u32>,
|
||||
num_bits: Option<u32>,
|
||||
max_iterations: Option<u32>,
|
||||
sample_rate: Option<u32>,
|
||||
) {
|
||||
self.inner.ivf_pq();
|
||||
metric_type.map(|m| self.inner.metric_type(m.into()));
|
||||
num_partitions.map(|p| self.inner.num_partitions(p));
|
||||
num_sub_vectors.map(|s| self.inner.num_sub_vectors(s));
|
||||
num_bits.map(|b| self.inner.num_bits(b));
|
||||
max_iterations.map(|i| self.inner.max_iterations(i));
|
||||
sample_rate.map(|s| self.inner.sample_rate(s));
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub unsafe fn scalar(&mut self) {
|
||||
self.inner.scalar();
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn build(&self) -> napi::Result<()> {
|
||||
println!("nodejs::index.rs : build");
|
||||
self.inner
|
||||
.build()
|
||||
.await
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to build index: {}", e)))?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,7 @@ use connection::Connection;
|
||||
use napi_derive::*;
|
||||
|
||||
mod connection;
|
||||
mod index;
|
||||
mod query;
|
||||
mod table;
|
||||
|
||||
|
||||
@@ -12,12 +12,14 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::query::Query;
|
||||
use arrow_ipc::writer::FileWriter;
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
use vectordb::{ipc::ipc_file_to_batches, table::TableRef};
|
||||
|
||||
use crate::index::IndexBuilder;
|
||||
use crate::query::Query;
|
||||
|
||||
#[napi]
|
||||
pub struct Table {
|
||||
pub(crate) table: TableRef,
|
||||
@@ -43,7 +45,7 @@ impl Table {
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async unsafe fn add(&mut self, buf: Buffer) -> napi::Result<()> {
|
||||
pub async fn add(&self, buf: Buffer) -> napi::Result<()> {
|
||||
let batches = ipc_file_to_batches(buf.to_vec())
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
|
||||
self.table.add(Box::new(batches), None).await.map_err(|e| {
|
||||
@@ -65,7 +67,7 @@ impl Table {
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async unsafe fn delete(&mut self, predicate: String) -> napi::Result<()> {
|
||||
pub async fn delete(&self, predicate: String) -> napi::Result<()> {
|
||||
self.table.delete(&predicate).await.map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to delete rows in table {}: predicate={}",
|
||||
@@ -74,6 +76,11 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn create_index(&self) -> IndexBuilder {
|
||||
IndexBuilder::new(self.table.as_ref())
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn query(&self) -> Query {
|
||||
Query::new(self)
|
||||
|
||||
@@ -179,5 +179,5 @@ export function toBuffer(data: Data, schema?: Schema): Buffer {
|
||||
} else {
|
||||
tbl = makeArrowTable(data, { schema });
|
||||
}
|
||||
return Buffer.from(tableToIPC(tbl, "file"));
|
||||
return Buffer.from(tableToIPC(tbl));
|
||||
}
|
||||
|
||||
@@ -15,10 +15,16 @@
|
||||
import { Connection } from "./connection";
|
||||
import { Connection as NativeConnection, ConnectionOptions } from "./native.js";
|
||||
|
||||
export { ConnectionOptions, WriteOptions, Query } from "./native.js";
|
||||
export {
|
||||
ConnectionOptions,
|
||||
WriteOptions,
|
||||
Query,
|
||||
MetricType,
|
||||
} from "./native.js";
|
||||
export { Connection } from "./connection";
|
||||
export { Table } from "./table";
|
||||
export { Data } from "./arrow";
|
||||
export { IvfPQOptions, IndexBuilder } from "./indexer";
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI.
|
||||
|
||||
102
nodejs/vectordb/indexer.ts
Normal file
102
nodejs/vectordb/indexer.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
// Copyright 2024 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import {
|
||||
MetricType,
|
||||
IndexBuilder as NativeBuilder,
|
||||
Table as NativeTable,
|
||||
} from "./native";
|
||||
|
||||
/** Options to create `IVF_PQ` index */
|
||||
export interface IvfPQOptions {
|
||||
/** Number of IVF partitions. */
|
||||
num_partitions?: number;
|
||||
|
||||
/** Number of sub-vectors in PQ coding. */
|
||||
num_sub_vectors?: number;
|
||||
|
||||
/** Number of bits used for each PQ code.
|
||||
*/
|
||||
num_bits?: number;
|
||||
|
||||
/** Metric type to calculate the distance between vectors.
|
||||
*
|
||||
* Supported metrics: `L2`, `Cosine` and `Dot`.
|
||||
*/
|
||||
metric_type?: MetricType;
|
||||
|
||||
/** Number of iterations to train K-means.
|
||||
*
|
||||
* Default is 50. The more iterations it usually yield better results,
|
||||
* but it takes longer to train.
|
||||
*/
|
||||
max_iterations?: number;
|
||||
|
||||
sample_rate?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Building an index on LanceDB {@link Table}
|
||||
*
|
||||
* @see {@link Table.createIndex} for detailed usage.
|
||||
*/
|
||||
export class IndexBuilder {
|
||||
private inner: NativeBuilder;
|
||||
|
||||
constructor(tbl: NativeTable) {
|
||||
this.inner = tbl.createIndex();
|
||||
}
|
||||
|
||||
/** Instruct the builder to build an `IVF_PQ` index */
|
||||
ivf_pq(options?: IvfPQOptions): IndexBuilder {
|
||||
this.inner.ivfPq(
|
||||
options?.metric_type,
|
||||
options?.num_partitions,
|
||||
options?.num_sub_vectors,
|
||||
options?.num_bits,
|
||||
options?.max_iterations,
|
||||
options?.sample_rate
|
||||
);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Instruct the builder to build a Scalar index. */
|
||||
scalar(): IndexBuilder {
|
||||
this.scalar();
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set the column(s) to create index on top of. */
|
||||
column(col: string): IndexBuilder {
|
||||
this.inner.column(col);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set to true to replace existing index. */
|
||||
replace(val: boolean): IndexBuilder {
|
||||
this.inner.replace(val);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Specify the name of the index. Optional */
|
||||
name(n: string): IndexBuilder {
|
||||
this.inner.name(n);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Building the index. */
|
||||
async build() {
|
||||
await this.inner.build();
|
||||
}
|
||||
}
|
||||
19
nodejs/vectordb/native.d.ts
vendored
19
nodejs/vectordb/native.d.ts
vendored
@@ -1,7 +1,17 @@
|
||||
/* tslint:disable */
|
||||
/* eslint-disable */
|
||||
|
||||
/* auto-generated by NAPI-RS */
|
||||
|
||||
export const enum IndexType {
|
||||
Scalar = 0,
|
||||
IvfPq = 1
|
||||
}
|
||||
export const enum MetricType {
|
||||
L2 = 0,
|
||||
Cosine = 1,
|
||||
Dot = 2
|
||||
}
|
||||
export interface ConnectionOptions {
|
||||
uri: string
|
||||
apiKey?: string
|
||||
@@ -36,6 +46,14 @@ export class Connection {
|
||||
/** Drop table with the name. Or raise an error if the table does not exist. */
|
||||
dropTable(name: string): Promise<void>
|
||||
}
|
||||
export class IndexBuilder {
|
||||
replace(v: boolean): void
|
||||
column(c: string): void
|
||||
name(name: string): void
|
||||
ivfPq(metricType?: MetricType | undefined | null, numPartitions?: number | undefined | null, numSubVectors?: number | undefined | null, numBits?: number | undefined | null, maxIterations?: number | undefined | null, sampleRate?: number | undefined | null): void
|
||||
scalar(): void
|
||||
build(): Promise<void>
|
||||
}
|
||||
export class Query {
|
||||
vector(vector: Float32Array): void
|
||||
toArrow(): void
|
||||
@@ -46,5 +64,6 @@ export class Table {
|
||||
add(buf: Buffer): Promise<void>
|
||||
countRows(): Promise<bigint>
|
||||
delete(predicate: string): Promise<void>
|
||||
createIndex(): IndexBuilder
|
||||
query(): Query
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
/* tslint:disable */
|
||||
/* eslint-disable */
|
||||
/* prettier-ignore */
|
||||
|
||||
@@ -294,9 +295,12 @@ if (!nativeBinding) {
|
||||
throw new Error(`Failed to load native binding`)
|
||||
}
|
||||
|
||||
const { Connection, Query, Table, WriteMode, connect } = nativeBinding
|
||||
const { Connection, IndexType, MetricType, IndexBuilder, Query, Table, WriteMode, connect } = nativeBinding
|
||||
|
||||
module.exports.Connection = Connection
|
||||
module.exports.IndexType = IndexType
|
||||
module.exports.MetricType = MetricType
|
||||
module.exports.IndexBuilder = IndexBuilder
|
||||
module.exports.Query = Query
|
||||
module.exports.Table = Table
|
||||
module.exports.WriteMode = WriteMode
|
||||
|
||||
@@ -16,6 +16,7 @@ import { Schema, tableFromIPC } from "apache-arrow";
|
||||
import { Table as _NativeTable } from "./native";
|
||||
import { toBuffer, Data } from "./arrow";
|
||||
import { Query } from "./query";
|
||||
import { IndexBuilder } from "./indexer";
|
||||
|
||||
/**
|
||||
* A LanceDB Table is the collection of Records.
|
||||
@@ -58,6 +59,42 @@ export class Table {
|
||||
await this.inner.delete(predicate);
|
||||
}
|
||||
|
||||
/** Create an index over the columns.
|
||||
*
|
||||
* @param {string} column The column to create the index on. If not specified,
|
||||
* it will create an index on vector field.
|
||||
*
|
||||
* @example
|
||||
*
|
||||
* By default, it creates vector idnex on one vector column.
|
||||
*
|
||||
* ```typescript
|
||||
* const table = await conn.openTable("my_table");
|
||||
* await table.createIndex().build();
|
||||
* ```
|
||||
*
|
||||
* You can specify `IVF_PQ` parameters via `ivf_pq({})` call.
|
||||
* ```typescript
|
||||
* const table = await conn.openTable("my_table");
|
||||
* await table.createIndex("my_vec_col")
|
||||
* .ivf_pq({ num_partitions: 128, num_sub_vectors: 16 })
|
||||
* .build();
|
||||
* ```
|
||||
*
|
||||
* Or create a Scalar index
|
||||
*
|
||||
* ```typescript
|
||||
* await table.createIndex("my_float_col").build();
|
||||
* ```
|
||||
*/
|
||||
createIndex(column?: string): IndexBuilder {
|
||||
let builder = new IndexBuilder(this.inner);
|
||||
if (column !== undefined) {
|
||||
builder = builder.column(column);
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
|
||||
search(vector?: number[]): Query {
|
||||
const q = new Query(this);
|
||||
if (vector !== undefined) {
|
||||
|
||||
Reference in New Issue
Block a user