feat: rework NodeJS SDK using napi (#847)

Use Napi to write a Node.js SDK that follows Polars for better
maintainability, while keeping most of the logic in Rust.
This commit is contained in:
Lei Xu
2024-01-23 15:14:45 -08:00
committed by Weston Pace
parent 7b8188bcd5
commit efcaa433fe
38 changed files with 8005 additions and 7 deletions

183
nodejs/vectordb/arrow.ts Normal file
View File

@@ -0,0 +1,183 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import {
Field,
FixedSizeList,
Float,
Float32,
Schema,
Table as ArrowTable,
Table,
Vector,
vectorFromArray,
tableToIPC,
} from "apache-arrow";
/** Data type accepted by NodeJS SDK */
export type Data = Record<string, unknown>[] | ArrowTable;
export class VectorColumnOptions {
/** Vector column type. */
type: Float = new Float32();
constructor(values?: Partial<VectorColumnOptions>) {
Object.assign(this, values);
}
}
/** Options to control the makeArrowTable call. */
export class MakeArrowTableOptions {
/** Provided schema. */
schema?: Schema;
/** Vector columns */
vectorColumns: Record<string, VectorColumnOptions> = {
vector: new VectorColumnOptions(),
};
constructor(values?: Partial<MakeArrowTableOptions>) {
Object.assign(this, values);
}
}
/**
* An enhanced version of the {@link makeTable} function from Apache Arrow
* that supports nested fields and embeddings columns.
*
* Note that it currently does not support nulls.
*
* @param data input data
* @param options options to control the makeArrowTable call.
*
* @example
*
* ```ts
*
* import { fromTableToBuffer, makeArrowTable } from "../arrow";
* import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
*
* const schema = new Schema([
* new Field("a", new Int32()),
* new Field("b", new Float32()),
* new Field("c", new FixedSizeList(3, new Field("item", new Float16()))),
* ]);
* const table = makeArrowTable([
* { a: 1, b: 2, c: [1, 2, 3] },
* { a: 4, b: 5, c: [4, 5, 6] },
* { a: 7, b: 8, c: [7, 8, 9] },
* ], { schema });
* ```
*
* It guesses the vector columns if the schema is not provided. For example,
* by default it assumes that the column named `vector` is a vector column.
*
* ```ts
*
* const schema = new Schema([
new Field("a", new Float64()),
new Field("b", new Float64()),
new Field(
"vector",
new FixedSizeList(3, new Field("item", new Float32()))
),
]);
const table = makeArrowTable([
{ a: 1, b: 2, vector: [1, 2, 3] },
{ a: 4, b: 5, vector: [4, 5, 6] },
{ a: 7, b: 8, vector: [7, 8, 9] },
]);
assert.deepEqual(table.schema, schema);
* ```
*
* You can specify the vector column types and names using the options as well
*
* ```typescript
*
* const schema = new Schema([
new Field('a', new Float64()),
new Field('b', new Float64()),
new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
]);
* const table = makeArrowTable([
{ a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
{ a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
], {
vectorColumns: {
vec1: { type: new Float16() },
vec2: { type: new Float16() }
}
}
* assert.deepEqual(table.schema, schema)
* ```
*/
export function makeArrowTable(
data: Record<string, any>[],
options?: Partial<MakeArrowTableOptions>
): Table {
if (data.length === 0) {
throw new Error("At least one record needs to be provided");
}
const opt = new MakeArrowTableOptions(options ?? {});
const columns: Record<string, Vector> = {};
// TODO: sample dataset to find missing columns
const columnNames = Object.keys(data[0]);
for (const colName of columnNames) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
const values = data.map((datum) => datum[colName]);
let vector: Vector;
if (opt.schema !== undefined) {
// Explicit schema is provided, highest priority
vector = vectorFromArray(
values,
opt.schema?.fields.filter((f) => f.name === colName)[0]?.type
);
} else {
const vectorColumnOptions = opt.vectorColumns[colName];
if (vectorColumnOptions !== undefined) {
const fslType = new FixedSizeList(
(values[0] as any[]).length,
new Field("item", vectorColumnOptions.type, false)
);
vector = vectorFromArray(values, fslType);
} else {
// Normal case
vector = vectorFromArray(values);
}
}
columns[colName] = vector;
}
return new Table(columns);
}
/**
* Convert an Arrow Table to a Buffer.
*
* @param data Arrow Table
* @param schema Arrow Schema, optional
* @returns Buffer node
*/
export function toBuffer(data: Data, schema?: Schema): Buffer {
let tbl: Table;
if (data instanceof Table) {
tbl = data;
} else {
tbl = makeArrowTable(data, { schema });
}
return Buffer.from(tableToIPC(tbl, "file"));
}

View File

@@ -0,0 +1,70 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { toBuffer } from "./arrow";
import { Connection as _NativeConnection } from "./native";
import { Table } from "./table";
import { Table as ArrowTable } from "apache-arrow";
/**
* A LanceDB Connection that allows you to open tables and create new ones.
*
* Connection could be local against filesystem or remote against a server.
*/
export class Connection {
readonly inner: _NativeConnection;
constructor(inner: _NativeConnection) {
this.inner = inner;
}
/** List all the table names in this database. */
async tableNames(): Promise<string[]> {
return this.inner.tableNames();
}
/**
* Open a table in the database.
*
* @param name The name of the table.
* @param embeddings An embedding function to use on this table
*/
async openTable(name: string): Promise<Table> {
const innerTable = await this.inner.openTable(name);
return new Table(innerTable);
}
/**
* Creates a new Table and initialize it with new data.
*
* @param {string} name - The name of the table.
* @param data - Non-empty Array of Records to be inserted into the table
*/
async createTable(
name: string,
data: Record<string, unknown>[] | ArrowTable
): Promise<Table> {
const buf = toBuffer(data);
const innerTable = await this.inner.createTable(name, buf);
return new Table(innerTable);
}
/**
* Drop an existing table.
* @param name The name of the table to drop.
*/
async dropTable(name: string): Promise<void> {
return this.inner.dropTable(name);
}
}

58
nodejs/vectordb/index.ts Normal file
View File

@@ -0,0 +1,58 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { Connection } from "./connection";
import { Connection as NativeConnection, ConnectionOptions } from "./native.js";
export { ConnectionOptions, WriteOptions, Query } from "./native.js";
export { Connection } from "./connection";
export { Table } from "./table";
export { Data } from "./arrow";
/**
* Connect to a LanceDB instance at the given URI.
*
* Accpeted formats:
*
* - `/path/to/database` - local database
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
* - `db://host:port` - remote database (LanceDB cloud)
*
* @param uri The uri of the database. If the database uri starts with `db://` then it connects to a remote database.
*
* @see {@link ConnectionOptions} for more details on the URI format.
*/
export async function connect(uri: string): Promise<Connection>;
export async function connect(
opts: Partial<ConnectionOptions>
): Promise<Connection>;
export async function connect(
args: string | Partial<ConnectionOptions>
): Promise<Connection> {
let opts: ConnectionOptions;
if (typeof args === "string") {
opts = { uri: args };
} else {
opts = Object.assign(
{
uri: "",
apiKey: "",
hostOverride: "",
},
args
);
}
const nativeConn = await NativeConnection.new(opts.uri);
return new Connection(nativeConn);
}

50
nodejs/vectordb/native.d.ts vendored Normal file
View File

@@ -0,0 +1,50 @@
/* eslint-disable */
/* auto-generated by NAPI-RS */
export interface ConnectionOptions {
uri: string
apiKey?: string
hostOverride?: string
}
/** Write mode for writing a table. */
export const enum WriteMode {
Create = 'Create',
Append = 'Append',
Overwrite = 'Overwrite'
}
/** Write options when creating a Table. */
export interface WriteOptions {
mode?: WriteMode
}
export function connect(options: ConnectionOptions): Promise<Connection>
export class Connection {
/** Create a new Connection instance from the given URI. */
static new(uri: string): Promise<Connection>
/** List all tables in the dataset. */
tableNames(): Promise<Array<string>>
/**
* Create table from a Apache Arrow IPC (file) buffer.
*
* Parameters:
* - name: The name of the table.
* - buf: The buffer containing the IPC file.
*
*/
createTable(name: string, buf: Buffer): Promise<Table>
openTable(name: string): Promise<Table>
/** Drop table with the name. Or raise an error if the table does not exist. */
dropTable(name: string): Promise<void>
}
export class Query {
vector(vector: Float32Array): void
toArrow(): void
}
export class Table {
/** Return Schema as empty Arrow IPC file. */
schema(): Buffer
add(buf: Buffer): Promise<void>
countRows(): Promise<bigint>
delete(predicate: string): Promise<void>
query(): Query
}

303
nodejs/vectordb/native.js Normal file
View File

@@ -0,0 +1,303 @@
/* eslint-disable */
/* prettier-ignore */
/* auto-generated by NAPI-RS */
const { existsSync, readFileSync } = require('fs')
const { join } = require('path')
const { platform, arch } = process
let nativeBinding = null
let localFileExisted = false
let loadError = null
function isMusl() {
// For Node 10
if (!process.report || typeof process.report.getReport !== 'function') {
try {
const lddPath = require('child_process').execSync('which ldd').toString().trim()
return readFileSync(lddPath, 'utf8').includes('musl')
} catch (e) {
return true
}
} else {
const { glibcVersionRuntime } = process.report.getReport().header
return !glibcVersionRuntime
}
}
switch (platform) {
case 'android':
switch (arch) {
case 'arm64':
localFileExisted = existsSync(join(__dirname, 'vectordb-nodejs.android-arm64.node'))
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.android-arm64.node')
} else {
nativeBinding = require('vectordb-android-arm64')
}
} catch (e) {
loadError = e
}
break
case 'arm':
localFileExisted = existsSync(join(__dirname, 'vectordb-nodejs.android-arm-eabi.node'))
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.android-arm-eabi.node')
} else {
nativeBinding = require('vectordb-android-arm-eabi')
}
} catch (e) {
loadError = e
}
break
default:
throw new Error(`Unsupported architecture on Android ${arch}`)
}
break
case 'win32':
switch (arch) {
case 'x64':
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.win32-x64-msvc.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.win32-x64-msvc.node')
} else {
nativeBinding = require('vectordb-win32-x64-msvc')
}
} catch (e) {
loadError = e
}
break
case 'ia32':
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.win32-ia32-msvc.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.win32-ia32-msvc.node')
} else {
nativeBinding = require('vectordb-win32-ia32-msvc')
}
} catch (e) {
loadError = e
}
break
case 'arm64':
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.win32-arm64-msvc.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.win32-arm64-msvc.node')
} else {
nativeBinding = require('vectordb-win32-arm64-msvc')
}
} catch (e) {
loadError = e
}
break
default:
throw new Error(`Unsupported architecture on Windows: ${arch}`)
}
break
case 'darwin':
localFileExisted = existsSync(join(__dirname, 'vectordb-nodejs.darwin-universal.node'))
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.darwin-universal.node')
} else {
nativeBinding = require('vectordb-darwin-universal')
}
break
} catch {}
switch (arch) {
case 'x64':
localFileExisted = existsSync(join(__dirname, 'vectordb-nodejs.darwin-x64.node'))
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.darwin-x64.node')
} else {
nativeBinding = require('vectordb-darwin-x64')
}
} catch (e) {
loadError = e
}
break
case 'arm64':
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.darwin-arm64.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.darwin-arm64.node')
} else {
nativeBinding = require('vectordb-darwin-arm64')
}
} catch (e) {
loadError = e
}
break
default:
throw new Error(`Unsupported architecture on macOS: ${arch}`)
}
break
case 'freebsd':
if (arch !== 'x64') {
throw new Error(`Unsupported architecture on FreeBSD: ${arch}`)
}
localFileExisted = existsSync(join(__dirname, 'vectordb-nodejs.freebsd-x64.node'))
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.freebsd-x64.node')
} else {
nativeBinding = require('vectordb-freebsd-x64')
}
} catch (e) {
loadError = e
}
break
case 'linux':
switch (arch) {
case 'x64':
if (isMusl()) {
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.linux-x64-musl.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.linux-x64-musl.node')
} else {
nativeBinding = require('vectordb-linux-x64-musl')
}
} catch (e) {
loadError = e
}
} else {
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.linux-x64-gnu.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.linux-x64-gnu.node')
} else {
nativeBinding = require('vectordb-linux-x64-gnu')
}
} catch (e) {
loadError = e
}
}
break
case 'arm64':
if (isMusl()) {
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.linux-arm64-musl.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.linux-arm64-musl.node')
} else {
nativeBinding = require('vectordb-linux-arm64-musl')
}
} catch (e) {
loadError = e
}
} else {
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.linux-arm64-gnu.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.linux-arm64-gnu.node')
} else {
nativeBinding = require('vectordb-linux-arm64-gnu')
}
} catch (e) {
loadError = e
}
}
break
case 'arm':
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.linux-arm-gnueabihf.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.linux-arm-gnueabihf.node')
} else {
nativeBinding = require('vectordb-linux-arm-gnueabihf')
}
} catch (e) {
loadError = e
}
break
case 'riscv64':
if (isMusl()) {
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.linux-riscv64-musl.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.linux-riscv64-musl.node')
} else {
nativeBinding = require('vectordb-linux-riscv64-musl')
}
} catch (e) {
loadError = e
}
} else {
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.linux-riscv64-gnu.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.linux-riscv64-gnu.node')
} else {
nativeBinding = require('vectordb-linux-riscv64-gnu')
}
} catch (e) {
loadError = e
}
}
break
case 's390x':
localFileExisted = existsSync(
join(__dirname, 'vectordb-nodejs.linux-s390x-gnu.node')
)
try {
if (localFileExisted) {
nativeBinding = require('./vectordb-nodejs.linux-s390x-gnu.node')
} else {
nativeBinding = require('vectordb-linux-s390x-gnu')
}
} catch (e) {
loadError = e
}
break
default:
throw new Error(`Unsupported architecture on Linux: ${arch}`)
}
break
default:
throw new Error(`Unsupported OS: ${platform}, architecture: ${arch}`)
}
if (!nativeBinding) {
if (loadError) {
throw loadError
}
throw new Error(`Failed to load native binding`)
}
const { Connection, Query, Table, WriteMode, connect } = nativeBinding
module.exports.Connection = Connection
module.exports.Query = Query
module.exports.Table = Table
module.exports.WriteMode = WriteMode
module.exports.connect = connect

93
nodejs/vectordb/query.ts Normal file
View File

@@ -0,0 +1,93 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { RecordBatch } from "apache-arrow";
import { Table } from "./table";
// TODO: re-eanble eslint once we have a real implementation
/* eslint-disable */
class RecordBatchIterator implements AsyncIterator<RecordBatch> {
next(
...args: [] | [undefined]
): Promise<IteratorResult<RecordBatch<any>, any>> {
throw new Error("Method not implemented.");
}
return?(value?: any): Promise<IteratorResult<RecordBatch<any>, any>> {
throw new Error("Method not implemented.");
}
throw?(e?: any): Promise<IteratorResult<RecordBatch<any>, any>> {
throw new Error("Method not implemented.");
}
}
/* eslint-enable */
/** Query executor */
export class Query implements AsyncIterable<RecordBatch> {
private readonly tbl: Table;
private _filter?: string;
private _limit?: number;
// Vector search
private _vector?: Float32Array;
private _nprobes?: number;
private _refine_factor?: number = 1;
constructor(tbl: Table) {
this.tbl = tbl;
}
/** Set the filter predicate, only returns the results that satisfy the filter.
*
*/
filter(predicate: string): Query {
this._filter = predicate;
return this;
}
/**
* Set the limit of rows to return.
*/
limit(limit: number): Query {
this._limit = limit;
return this;
}
/**
* Set the query vector.
*/
vector(vector: number[]): Query {
this._vector = Float32Array.from(vector);
return this;
}
/**
* Set the number of probes to use for the query.
*/
nprobes(nprobes: number): Query {
this._nprobes = nprobes;
return this;
}
/**
* Set the refine factor for the query.
*/
refine_factor(refine_factor: number): Query {
this._refine_factor = refine_factor;
return this;
}
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
throw new RecordBatchIterator();
}
}

68
nodejs/vectordb/table.ts Normal file
View File

@@ -0,0 +1,68 @@
// Copyright 2024 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import { Schema, tableFromIPC } from "apache-arrow";
import { Table as _NativeTable } from "./native";
import { toBuffer, Data } from "./arrow";
import { Query } from "./query";
/**
* A LanceDB Table is the collection of Records.
*
* Each Record has one or more vector fields.
*/
export class Table {
private readonly inner: _NativeTable;
/** Construct a Table. Internal use only. */
constructor(inner: _NativeTable) {
this.inner = inner;
}
/** Get the schema of the table. */
get schema(): Schema {
const schemaBuf = this.inner.schema();
const tbl = tableFromIPC(schemaBuf);
return tbl.schema;
}
/**
* Insert records into this Table.
*
* @param {Data} data Records to be inserted into the Table
* @return The number of rows added to the table
*/
async add(data: Data): Promise<void> {
const buffer = toBuffer(data);
await this.inner.add(buffer);
}
/** Count the total number of rows in the dataset. */
async countRows(): Promise<bigint> {
return await this.inner.countRows();
}
/** Delete the rows that satisfy the predicate. */
async delete(predicate: string): Promise<void> {
await this.inner.delete(predicate);
}
search(vector?: number[]): Query {
const q = new Query(this);
if (vector !== undefined) {
q.vector(vector);
}
return q;
}
}