From f3b6a1f55b5d8670d7996c083e8e1cc1d1cca4c2 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 9 Oct 2024 10:46:27 -0700 Subject: [PATCH] feat(node): bind remote SDK to rust implementation (#1730) Closes [#2509](https://github.com/lancedb/sophon/issues/2509) This is the Node.js analogue of #1700 --- .bumpversion.toml | 5 + nodejs/Cargo.toml | 4 +- nodejs/__test__/remote.test.ts | 93 ++++++++++++ nodejs/lancedb/index.ts | 18 +-- nodejs/lancedb/remote/client.ts | 218 --------------------------- nodejs/lancedb/remote/connection.ts | 193 ------------------------ nodejs/lancedb/remote/index.ts | 3 - nodejs/lancedb/remote/table.ts | 226 ---------------------------- nodejs/package-lock.json | 97 ++++++++++-- nodejs/package.json | 2 +- nodejs/src/connection.rs | 18 +++ nodejs/src/lib.rs | 14 ++ nodejs/src/remote.rs | 120 +++++++++++++++ 13 files changed, 347 insertions(+), 664 deletions(-) create mode 100644 nodejs/__test__/remote.test.ts delete mode 100644 nodejs/lancedb/remote/client.ts delete mode 100644 nodejs/lancedb/remote/connection.ts delete mode 100644 nodejs/lancedb/remote/index.ts delete mode 100644 nodejs/lancedb/remote/table.ts create mode 100644 nodejs/src/remote.rs diff --git a/.bumpversion.toml b/.bumpversion.toml index 7e0caac8..aaf46f1c 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -103,3 +103,8 @@ search = "\nversion = \"{current_version}\"" filename = "rust/lancedb/Cargo.toml" replace = "\nversion = \"{new_version}\"" search = "\nversion = \"{current_version}\"" + +[[tool.bumpversion.files]] +filename = "nodejs/Cargo.toml" +replace = "\nversion = \"{new_version}\"" +search = "\nversion = \"{current_version}\"" diff --git a/nodejs/Cargo.toml b/nodejs/Cargo.toml index b4226e9f..d6d36523 100644 --- a/nodejs/Cargo.toml +++ b/nodejs/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "lancedb-nodejs" edition.workspace = true -version = "0.0.0" +version = "0.11.0-beta.1" license.workspace = true description.workspace = true repository.workspace = true @@ -14,7 +14,7 @@ crate-type = ["cdylib"] [dependencies] arrow-ipc.workspace = true futures.workspace = true -lancedb = { path = "../rust/lancedb" } +lancedb = { path = "../rust/lancedb", features = ["remote"] } napi = { version = "2.16.8", default-features = false, features = [ "napi9", "async", diff --git a/nodejs/__test__/remote.test.ts b/nodejs/__test__/remote.test.ts new file mode 100644 index 00000000..3e693197 --- /dev/null +++ b/nodejs/__test__/remote.test.ts @@ -0,0 +1,93 @@ +// Copyright 2024 Lance Developers. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import * as http from "http"; +import { RequestListener } from "http"; +import { Connection, ConnectionOptions, connect } from "../lancedb"; + +async function withMockDatabase( + listener: RequestListener, + callback: (db: Connection) => void, + connectionOptions?: ConnectionOptions, +) { + const server = http.createServer(listener); + server.listen(8000); + + const db = await connect( + "db://dev", + Object.assign( + { + apiKey: "fake", + hostOverride: "http://localhost:8000", + }, + connectionOptions, + ), + ); + + try { + await callback(db); + } finally { + server.close(); + } +} + +describe("remote connection", () => { + it("should accept partial connection options", async () => { + await connect("db://test", { + apiKey: "fake", + clientConfig: { + timeoutConfig: { readTimeout: 5 }, + retryConfig: { retries: 2 }, + }, + }); + }); + + it("should pass down apiKey and userAgent", async () => { + await withMockDatabase( + (req, res) => { + expect(req.headers["x-api-key"]).toEqual("fake"); + expect(req.headers["user-agent"]).toEqual( + `LanceDB-Node-Client/${process.env.npm_package_version}`, + ); + + const body = JSON.stringify({ tables: [] }); + res.writeHead(200, { "Content-Type": "application/json" }).end(body); + }, + async (db) => { + const tableNames = await db.tableNames(); + expect(tableNames).toEqual([]); + }, + ); + }); + + it("allows customizing user agent", async () => { + await withMockDatabase( + (req, res) => { + expect(req.headers["user-agent"]).toEqual("MyApp/1.0"); + + const body = JSON.stringify({ tables: [] }); + res.writeHead(200, { "Content-Type": "application/json" }).end(body); + }, + async (db) => { + const tableNames = await db.tableNames(); + expect(tableNames).toEqual([]); + }, + { + clientConfig: { + userAgent: "MyApp/1.0", + }, + }, + ); + }); +}); diff --git a/nodejs/lancedb/index.ts b/nodejs/lancedb/index.ts index 1e66bb8c..74da915f 100644 --- a/nodejs/lancedb/index.ts +++ b/nodejs/lancedb/index.ts @@ -23,8 +23,6 @@ import { Connection as LanceDbConnection, } from "./native.js"; -import { RemoteConnection, RemoteConnectionOptions } from "./remote"; - export { WriteOptions, WriteMode, @@ -33,6 +31,9 @@ export { ConnectionOptions, IndexStatistics, IndexConfig, + ClientConfig, + TimeoutConfig, + RetryConfig, } from "./native.js"; export { @@ -87,7 +88,7 @@ export * as embedding from "./embedding"; */ export async function connect( uri: string, - opts?: Partial, + opts?: Partial, ): Promise; /** * Connect to a LanceDB instance at the given URI. @@ -108,13 +109,11 @@ export async function connect( * ``` */ export async function connect( - opts: Partial & { uri: string }, + opts: Partial & { uri: string }, ): Promise; export async function connect( - uriOrOptions: - | string - | (Partial & { uri: string }), - opts: Partial = {}, + uriOrOptions: string | (Partial & { uri: string }), + opts: Partial = {}, ): Promise { let uri: string | undefined; if (typeof uriOrOptions !== "string") { @@ -129,9 +128,6 @@ export async function connect( throw new Error("uri is required"); } - if (uri?.startsWith("db://")) { - return new RemoteConnection(uri, opts as RemoteConnectionOptions); - } opts = (opts as ConnectionOptions) ?? {}; (opts).storageOptions = cleanseStorageOptions( (opts).storageOptions, diff --git a/nodejs/lancedb/remote/client.ts b/nodejs/lancedb/remote/client.ts deleted file mode 100644 index 4e4a92a3..00000000 --- a/nodejs/lancedb/remote/client.ts +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright 2023 LanceDB Developers. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import axios, { - AxiosError, - type AxiosResponse, - type ResponseType, -} from "axios"; -import { Table as ArrowTable } from "../arrow"; -import { tableFromIPC } from "../arrow"; -import { VectorQuery } from "../query"; - -export class RestfulLanceDBClient { - #dbName: string; - #region: string; - #apiKey: string; - #hostOverride?: string; - #closed: boolean = false; - #timeout: number = 12 * 1000; // 12 seconds; - #session?: import("axios").AxiosInstance; - - constructor( - dbName: string, - apiKey: string, - region: string, - hostOverride?: string, - timeout?: number, - ) { - this.#dbName = dbName; - this.#apiKey = apiKey; - this.#region = region; - this.#hostOverride = hostOverride ?? this.#hostOverride; - this.#timeout = timeout ?? this.#timeout; - } - - // todo: cache the session. - get session(): import("axios").AxiosInstance { - if (this.#session !== undefined) { - return this.#session; - } else { - return axios.create({ - baseURL: this.url, - headers: { - // biome-ignore lint: external API - Authorization: `Bearer ${this.#apiKey}`, - }, - transformResponse: decodeErrorData, - timeout: this.#timeout, - }); - } - } - - get url(): string { - return ( - this.#hostOverride ?? - `https://${this.#dbName}.${this.#region}.api.lancedb.com` - ); - } - - get headers(): { [key: string]: string } { - const headers: { [key: string]: string } = { - "x-api-key": this.#apiKey, - "x-request-id": "na", - }; - if (this.#region == "local") { - headers["Host"] = `${this.#dbName}.${this.#region}.api.lancedb.com`; - } - if (this.#hostOverride) { - headers["x-lancedb-database"] = this.#dbName; - } - return headers; - } - - isOpen(): boolean { - return !this.#closed; - } - - private checkNotClosed(): void { - if (this.#closed) { - throw new Error("Connection is closed"); - } - } - - close(): void { - this.#session = undefined; - this.#closed = true; - } - - // biome-ignore lint/suspicious/noExplicitAny: - async get(uri: string, params?: Record): Promise { - this.checkNotClosed(); - uri = new URL(uri, this.url).toString(); - let response; - try { - response = await this.session.get(uri, { - headers: this.headers, - params, - }); - } catch (e) { - if (e instanceof AxiosError && e.response) { - response = e.response; - } else { - throw e; - } - } - - RestfulLanceDBClient.checkStatus(response!); - return response!.data; - } - - // biome-ignore lint/suspicious/noExplicitAny: api response - async post(uri: string, body?: any): Promise; - async post( - uri: string, - // biome-ignore lint/suspicious/noExplicitAny: api request - body: any, - additional: { - config?: { responseType: "arraybuffer" }; - headers?: Record; - params?: Record; - }, - ): Promise; - async post( - uri: string, - // biome-ignore lint/suspicious/noExplicitAny: api request - body?: any, - additional?: { - config?: { responseType: ResponseType }; - headers?: Record; - params?: Record; - }, - // biome-ignore lint/suspicious/noExplicitAny: api response - ): Promise { - this.checkNotClosed(); - uri = new URL(uri, this.url).toString(); - additional = Object.assign( - { config: { responseType: "json" } }, - additional, - ); - - const headers = { ...this.headers, ...additional.headers }; - - if (!headers["Content-Type"]) { - headers["Content-Type"] = "application/json"; - } - let response; - try { - response = await this.session.post(uri, body, { - headers, - responseType: additional!.config!.responseType, - params: new Map(Object.entries(additional.params ?? {})), - }); - } catch (e) { - if (e instanceof AxiosError && e.response) { - response = e.response; - } else { - throw e; - } - } - RestfulLanceDBClient.checkStatus(response!); - if (additional!.config!.responseType === "arraybuffer") { - return response!.data; - } else { - return JSON.parse(response!.data); - } - } - - async listTables(limit = 10, pageToken = ""): Promise { - const json = await this.get("/v1/table", { limit, pageToken }); - return json.tables; - } - - async query(tableName: string, query: VectorQuery): Promise { - const tbl = await this.post(`/v1/table/${tableName}/query`, query, { - config: { - responseType: "arraybuffer", - }, - }); - return tableFromIPC(tbl); - } - - static checkStatus(response: AxiosResponse): void { - if (response.status === 404) { - throw new Error(`Not found: ${response.data}`); - } else if (response.status >= 400 && response.status < 500) { - throw new Error( - `Bad Request: ${response.status}, error: ${response.data}`, - ); - } else if (response.status >= 500 && response.status < 600) { - throw new Error( - `Internal Server Error: ${response.status}, error: ${response.data}`, - ); - } else if (response.status !== 200) { - throw new Error( - `Unknown Error: ${response.status}, error: ${response.data}`, - ); - } - } -} - -function decodeErrorData(data: unknown) { - if (Buffer.isBuffer(data)) { - const decoded = data.toString("utf-8"); - return decoded; - } - return data; -} diff --git a/nodejs/lancedb/remote/connection.ts b/nodejs/lancedb/remote/connection.ts deleted file mode 100644 index 4d914c54..00000000 --- a/nodejs/lancedb/remote/connection.ts +++ /dev/null @@ -1,193 +0,0 @@ -import { Schema } from "apache-arrow"; -import { - Data, - SchemaLike, - fromTableToStreamBuffer, - makeEmptyTable, -} from "../arrow"; -import { - Connection, - CreateTableOptions, - OpenTableOptions, - TableNamesOptions, -} from "../connection"; -import { Table } from "../table"; -import { TTLCache } from "../util"; -import { RestfulLanceDBClient } from "./client"; -import { RemoteTable } from "./table"; - -export interface RemoteConnectionOptions { - apiKey?: string; - region?: string; - hostOverride?: string; - timeout?: number; -} - -export class RemoteConnection extends Connection { - #dbName: string; - #apiKey: string; - #region: string; - #client: RestfulLanceDBClient; - #tableCache = new TTLCache(300_000); - - constructor( - url: string, - { apiKey, region, hostOverride, timeout }: RemoteConnectionOptions, - ) { - super(); - apiKey = apiKey ?? process.env.LANCEDB_API_KEY; - region = region ?? process.env.LANCEDB_REGION; - - if (!apiKey) { - throw new Error("apiKey is required when connecting to LanceDB Cloud"); - } - - if (!region) { - throw new Error("region is required when connecting to LanceDB Cloud"); - } - - const parsed = new URL(url); - if (parsed.protocol !== "db:") { - throw new Error( - `invalid protocol: ${parsed.protocol}, only accepts db://`, - ); - } - - this.#dbName = parsed.hostname; - this.#apiKey = apiKey; - this.#region = region; - this.#client = new RestfulLanceDBClient( - this.#dbName, - this.#apiKey, - this.#region, - hostOverride, - timeout, - ); - } - - isOpen(): boolean { - return this.#client.isOpen(); - } - close(): void { - return this.#client.close(); - } - - display(): string { - return `RemoteConnection(${this.#dbName})`; - } - - async tableNames(options?: Partial): Promise { - const response = await this.#client.get("/v1/table/", { - limit: options?.limit ?? 10, - // biome-ignore lint/style/useNamingConvention: - page_token: options?.startAfter ?? "", - }); - const body = await response.body(); - for (const table of body.tables) { - this.#tableCache.set(table, true); - } - return body.tables; - } - - async openTable( - name: string, - _options?: Partial | undefined, - ): Promise { - if (this.#tableCache.get(name) === undefined) { - await this.#client.post( - `/v1/table/${encodeURIComponent(name)}/describe/`, - ); - this.#tableCache.set(name, true); - } - return new RemoteTable(this.#client, name, this.#dbName); - } - - async createTable( - nameOrOptions: - | string - | ({ name: string; data: Data } & Partial), - data?: Data, - options?: Partial | undefined, - ): Promise
{ - if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) { - const { name, data, ...options } = nameOrOptions; - return this.createTable(name, data, options); - } - if (data === undefined) { - throw new Error("data is required"); - } - if (options?.mode) { - console.warn( - "option 'mode' is not supported in LanceDB Cloud", - "LanceDB Cloud only supports the default 'create' mode.", - "If the table already exists, an error will be thrown.", - ); - } - if (options?.embeddingFunction) { - console.warn( - "embedding_functions is not yet supported on LanceDB Cloud.", - "Please vote https://github.com/lancedb/lancedb/issues/626 ", - "for this feature.", - ); - } - - const { buf } = await Table.parseTableData( - data, - options, - true /** streaming */, - ); - - await this.#client.post( - `/v1/table/${encodeURIComponent(nameOrOptions)}/create/`, - buf, - { - config: { - responseType: "arraybuffer", - }, - headers: { "Content-Type": "application/vnd.apache.arrow.stream" }, - }, - ); - this.#tableCache.set(nameOrOptions, true); - return new RemoteTable(this.#client, nameOrOptions, this.#dbName); - } - - async createEmptyTable( - name: string, - schema: SchemaLike, - options?: Partial | undefined, - ): Promise
{ - if (options?.mode) { - console.warn(`mode is not supported on LanceDB Cloud`); - } - - if (options?.embeddingFunction) { - console.warn( - "embeddingFunction is not yet supported on LanceDB Cloud.", - "Please vote https://github.com/lancedb/lancedb/issues/626 ", - "for this feature.", - ); - } - const emptyTable = makeEmptyTable(schema); - const buf = await fromTableToStreamBuffer(emptyTable); - - await this.#client.post( - `/v1/table/${encodeURIComponent(name)}/create/`, - buf, - { - config: { - responseType: "arraybuffer", - }, - headers: { "Content-Type": "application/vnd.apache.arrow.stream" }, - }, - ); - - this.#tableCache.set(name, true); - return new RemoteTable(this.#client, name, this.#dbName); - } - - async dropTable(name: string): Promise { - await this.#client.post(`/v1/table/${encodeURIComponent(name)}/drop/`); - - this.#tableCache.delete(name); - } -} diff --git a/nodejs/lancedb/remote/index.ts b/nodejs/lancedb/remote/index.ts deleted file mode 100644 index d1faaae9..00000000 --- a/nodejs/lancedb/remote/index.ts +++ /dev/null @@ -1,3 +0,0 @@ -export { RestfulLanceDBClient } from "./client"; -export { type RemoteConnectionOptions, RemoteConnection } from "./connection"; -export { RemoteTable } from "./table"; diff --git a/nodejs/lancedb/remote/table.ts b/nodejs/lancedb/remote/table.ts deleted file mode 100644 index c1712415..00000000 --- a/nodejs/lancedb/remote/table.ts +++ /dev/null @@ -1,226 +0,0 @@ -// Copyright 2023 LanceDB Developers. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { Table as ArrowTable } from "apache-arrow"; - -import { Data, IntoVector } from "../arrow"; - -import { IndexStatistics } from ".."; -import { CreateTableOptions } from "../connection"; -import { IndexOptions } from "../indices"; -import { MergeInsertBuilder } from "../merge"; -import { VectorQuery } from "../query"; -import { AddDataOptions, Table, UpdateOptions } from "../table"; -import { IntoSql, toSQL } from "../util"; -import { RestfulLanceDBClient } from "./client"; - -export class RemoteTable extends Table { - #client: RestfulLanceDBClient; - #name: string; - - // Used in the display() method - #dbName: string; - - get #tablePrefix() { - return `/v1/table/${encodeURIComponent(this.#name)}/`; - } - - get name(): string { - return this.#name; - } - - public constructor( - client: RestfulLanceDBClient, - tableName: string, - dbName: string, - ) { - super(); - this.#client = client; - this.#name = tableName; - this.#dbName = dbName; - } - - isOpen(): boolean { - return !this.#client.isOpen(); - } - - close(): void { - this.#client.close(); - } - - display(): string { - return `RemoteTable(${this.#dbName}; ${this.#name})`; - } - - async schema(): Promise { - const resp = await this.#client.post(`${this.#tablePrefix}/describe/`); - // TODO: parse this into a valid arrow schema - return resp.schema; - } - async add(data: Data, options?: Partial): Promise { - const { buf, mode } = await Table.parseTableData( - data, - options as CreateTableOptions, - true, - ); - await this.#client.post(`${this.#tablePrefix}/insert/`, buf, { - params: { - mode, - }, - headers: { - "Content-Type": "application/vnd.apache.arrow.stream", - }, - }); - } - - async update( - optsOrUpdates: - | (Map | Record) - | ({ - values: Map | Record; - } & Partial) - | ({ - valuesSql: Map | Record; - } & Partial), - options?: Partial, - ): Promise { - const isValues = - "values" in optsOrUpdates && typeof optsOrUpdates.values !== "string"; - const isValuesSql = - "valuesSql" in optsOrUpdates && - typeof optsOrUpdates.valuesSql !== "string"; - const isMap = (obj: unknown): obj is Map => { - return obj instanceof Map; - }; - - let predicate; - let columns: [string, string][]; - switch (true) { - case isMap(optsOrUpdates): - columns = Array.from(optsOrUpdates.entries()); - predicate = options?.where; - break; - case isValues && isMap(optsOrUpdates.values): - columns = Array.from(optsOrUpdates.values.entries()).map(([k, v]) => [ - k, - toSQL(v), - ]); - predicate = optsOrUpdates.where; - break; - case isValues && !isMap(optsOrUpdates.values): - columns = Object.entries(optsOrUpdates.values).map(([k, v]) => [ - k, - toSQL(v), - ]); - predicate = optsOrUpdates.where; - break; - - case isValuesSql && isMap(optsOrUpdates.valuesSql): - columns = Array.from(optsOrUpdates.valuesSql.entries()); - predicate = optsOrUpdates.where; - break; - case isValuesSql && !isMap(optsOrUpdates.valuesSql): - columns = Object.entries(optsOrUpdates.valuesSql).map(([k, v]) => [ - k, - v, - ]); - predicate = optsOrUpdates.where; - break; - default: - columns = Object.entries(optsOrUpdates as Record); - predicate = options?.where; - } - - await this.#client.post(`${this.#tablePrefix}/update/`, { - predicate: predicate ?? null, - updates: columns, - }); - } - async countRows(filter?: unknown): Promise { - const payload = { predicate: filter }; - return await this.#client.post(`${this.#tablePrefix}/count_rows/`, payload); - } - - async delete(predicate: unknown): Promise { - const payload = { predicate }; - await this.#client.post(`${this.#tablePrefix}/delete/`, payload); - } - async createIndex( - column: string, - options?: Partial, - ): Promise { - if (options !== undefined) { - console.warn("options are not yet supported on the LanceDB cloud"); - } - const indexType = "vector"; - const metric = "L2"; - const data = { - column, - // biome-ignore lint/style/useNamingConvention: external API - index_type: indexType, - // biome-ignore lint/style/useNamingConvention: external API - metric_type: metric, - }; - await this.#client.post(`${this.#tablePrefix}/create_index`, data); - } - query(): import("..").Query { - throw new Error("query() is not yet supported on the LanceDB cloud"); - } - - search(_query: string | IntoVector): VectorQuery { - throw new Error("search() is not yet supported on the LanceDB cloud"); - } - vectorSearch(_vector: unknown): import("..").VectorQuery { - throw new Error("vectorSearch() is not yet supported on the LanceDB cloud"); - } - addColumns(_newColumnTransforms: unknown): Promise { - throw new Error("addColumns() is not yet supported on the LanceDB cloud"); - } - alterColumns(_columnAlterations: unknown): Promise { - throw new Error("alterColumns() is not yet supported on the LanceDB cloud"); - } - dropColumns(_columnNames: unknown): Promise { - throw new Error("dropColumns() is not yet supported on the LanceDB cloud"); - } - async version(): Promise { - const resp = await this.#client.post(`${this.#tablePrefix}/describe/`); - return resp.version; - } - checkout(_version: unknown): Promise { - throw new Error("checkout() is not yet supported on the LanceDB cloud"); - } - checkoutLatest(): Promise { - throw new Error( - "checkoutLatest() is not yet supported on the LanceDB cloud", - ); - } - restore(): Promise { - throw new Error("restore() is not yet supported on the LanceDB cloud"); - } - optimize(_options?: unknown): Promise { - throw new Error("optimize() is not yet supported on the LanceDB cloud"); - } - async listIndices(): Promise { - return await this.#client.post(`${this.#tablePrefix}/index/list/`); - } - toArrow(): Promise { - throw new Error("toArrow() is not yet supported on the LanceDB cloud"); - } - mergeInsert(_on: string | string[]): MergeInsertBuilder { - throw new Error("mergeInsert() is not yet supported on the LanceDB cloud"); - } - async indexStats(_name: string): Promise { - throw new Error("indexStats() is not yet supported on the LanceDB cloud"); - } -} diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index e9157ff2..05f7377d 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -1,12 +1,12 @@ { "name": "@lancedb/lancedb", - "version": "0.10.0-beta.1", + "version": "0.11.0-beta.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@lancedb/lancedb", - "version": "0.10.0-beta.1", + "version": "0.11.0-beta.1", "cpu": [ "x64", "arm64" @@ -18,7 +18,6 @@ "win32" ], "dependencies": { - "axios": "^1.7.2", "reflect-metadata": "^0.2.2" }, "devDependencies": { @@ -30,6 +29,7 @@ "@napi-rs/cli": "^2.18.3", "@types/axios": "^0.14.0", "@types/jest": "^29.1.2", + "@types/node": "^22.7.4", "@types/tmp": "^0.2.6", "apache-arrow-13": "npm:apache-arrow@13.0.0", "apache-arrow-14": "npm:apache-arrow@14.0.0", @@ -4648,11 +4648,12 @@ "optional": true }, "node_modules/@types/node": { - "version": "20.14.11", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.11.tgz", - "integrity": "sha512-kprQpL8MMeszbz6ojB5/tU8PLN4kesnN8Gjzw349rDlNgsSzg90lAVj3llK99Dh7JON+t9AuscPPFW6mPbTnSA==", + "version": "22.7.4", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.4.tgz", + "integrity": "sha512-y+NPi1rFzDs1NdQHHToqeiX2TIS79SWEAw9GYhkkx8bD0ChpfqC+n2j5OXOCpzfojBEBt6DnEnnG9MY0zk1XLg==", + "devOptional": true, "dependencies": { - "undici-types": "~5.26.4" + "undici-types": "~6.19.2" } }, "node_modules/@types/node-fetch": { @@ -4665,6 +4666,12 @@ "form-data": "^4.0.0" } }, + "node_modules/@types/node/node_modules/undici-types": { + "version": "6.19.8", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", + "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", + "devOptional": true + }, "node_modules/@types/pad-left": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/@types/pad-left/-/pad-left-2.1.1.tgz", @@ -4963,6 +4970,21 @@ "arrow2csv": "bin/arrow2csv.cjs" } }, + "node_modules/apache-arrow-15/node_modules/@types/node": { + "version": "20.16.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.10.tgz", + "integrity": "sha512-vQUKgWTjEIRFCvK6CyriPH3MZYiYlNy0fKiEYHWbcoWLEgs4opurGGKlebrTLqdSMIbXImH6XExNiIyNUv3WpA==", + "dev": true, + "dependencies": { + "undici-types": "~6.19.2" + } + }, + "node_modules/apache-arrow-15/node_modules/undici-types": { + "version": "6.19.8", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", + "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", + "dev": true + }, "node_modules/apache-arrow-16": { "name": "apache-arrow", "version": "16.0.0", @@ -4984,6 +5006,21 @@ "arrow2csv": "bin/arrow2csv.cjs" } }, + "node_modules/apache-arrow-16/node_modules/@types/node": { + "version": "20.16.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.10.tgz", + "integrity": "sha512-vQUKgWTjEIRFCvK6CyriPH3MZYiYlNy0fKiEYHWbcoWLEgs4opurGGKlebrTLqdSMIbXImH6XExNiIyNUv3WpA==", + "dev": true, + "dependencies": { + "undici-types": "~6.19.2" + } + }, + "node_modules/apache-arrow-16/node_modules/undici-types": { + "version": "6.19.8", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", + "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", + "dev": true + }, "node_modules/apache-arrow-17": { "name": "apache-arrow", "version": "17.0.0", @@ -5011,12 +5048,42 @@ "integrity": "sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg==", "dev": true }, + "node_modules/apache-arrow-17/node_modules/@types/node": { + "version": "20.16.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.10.tgz", + "integrity": "sha512-vQUKgWTjEIRFCvK6CyriPH3MZYiYlNy0fKiEYHWbcoWLEgs4opurGGKlebrTLqdSMIbXImH6XExNiIyNUv3WpA==", + "dev": true, + "dependencies": { + "undici-types": "~6.19.2" + } + }, "node_modules/apache-arrow-17/node_modules/flatbuffers": { "version": "24.3.25", "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-24.3.25.tgz", "integrity": "sha512-3HDgPbgiwWMI9zVB7VYBHaMrbOO7Gm0v+yD2FV/sCKj+9NDeVL7BOBYUuhWAQGKWOzBo8S9WdMvV0eixO233XQ==", "dev": true }, + "node_modules/apache-arrow-17/node_modules/undici-types": { + "version": "6.19.8", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", + "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", + "dev": true + }, + "node_modules/apache-arrow/node_modules/@types/node": { + "version": "20.16.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.16.10.tgz", + "integrity": "sha512-vQUKgWTjEIRFCvK6CyriPH3MZYiYlNy0fKiEYHWbcoWLEgs4opurGGKlebrTLqdSMIbXImH6XExNiIyNUv3WpA==", + "peer": true, + "dependencies": { + "undici-types": "~6.19.2" + } + }, + "node_modules/apache-arrow/node_modules/undici-types": { + "version": "6.19.8", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", + "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", + "peer": true + }, "node_modules/argparse": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", @@ -5046,12 +5113,14 @@ "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "devOptional": true }, "node_modules/axios": { "version": "1.7.2", "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.2.tgz", "integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==", + "dev": true, "dependencies": { "follow-redirects": "^1.15.6", "form-data": "^4.0.0", @@ -5536,6 +5605,7 @@ "version": "1.0.8", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "devOptional": true, "dependencies": { "delayed-stream": "~1.0.0" }, @@ -5723,6 +5793,7 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "devOptional": true, "engines": { "node": ">=0.4.0" } @@ -6248,6 +6319,7 @@ "version": "1.15.6", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", + "dev": true, "funding": [ { "type": "individual", @@ -6267,6 +6339,7 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "devOptional": true, "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", @@ -7773,6 +7846,7 @@ "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "devOptional": true, "engines": { "node": ">= 0.6" } @@ -7781,6 +7855,7 @@ "version": "2.1.35", "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "devOptional": true, "dependencies": { "mime-db": "1.52.0" }, @@ -8393,7 +8468,8 @@ "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", - "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "dev": true }, "node_modules/pump": { "version": "3.0.0", @@ -9561,7 +9637,8 @@ "node_modules/undici-types": { "version": "5.26.5", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "optional": true }, "node_modules/update-browserslist-db": { "version": "1.0.13", diff --git a/nodejs/package.json b/nodejs/package.json index 064f54fa..b3410e47 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -40,6 +40,7 @@ "@napi-rs/cli": "^2.18.3", "@types/axios": "^0.14.0", "@types/jest": "^29.1.2", + "@types/node": "^22.7.4", "@types/tmp": "^0.2.6", "apache-arrow-13": "npm:apache-arrow@13.0.0", "apache-arrow-14": "npm:apache-arrow@14.0.0", @@ -81,7 +82,6 @@ "version": "napi version" }, "dependencies": { - "axios": "^1.7.2", "reflect-metadata": "^0.2.2" }, "optionalDependencies": { diff --git a/nodejs/src/connection.rs b/nodejs/src/connection.rs index 4a454bfa..9f2a7305 100644 --- a/nodejs/src/connection.rs +++ b/nodejs/src/connection.rs @@ -68,6 +68,24 @@ impl Connection { builder = builder.storage_option(key, value); } } + + let client_config = options.client_config.unwrap_or_default(); + builder = builder.client_config(client_config.into()); + + if let Some(api_key) = options.api_key { + builder = builder.api_key(&api_key); + } + + if let Some(region) = options.region { + builder = builder.region(®ion); + } else { + builder = builder.region("us-east-1"); + } + + if let Some(host_override) = options.host_override { + builder = builder.host_override(&host_override); + } + Ok(Self::inner_new( builder .execute() diff --git a/nodejs/src/lib.rs b/nodejs/src/lib.rs index 1c15ff91..54fde9bc 100644 --- a/nodejs/src/lib.rs +++ b/nodejs/src/lib.rs @@ -22,6 +22,7 @@ mod index; mod iterator; pub mod merge; mod query; +pub mod remote; mod table; mod util; @@ -42,6 +43,19 @@ pub struct ConnectionOptions { /// /// The available options are described at https://lancedb.github.io/lancedb/guides/storage/ pub storage_options: Option>, + + /// (For LanceDB cloud only): configuration for the remote HTTP client. + pub client_config: Option, + /// (For LanceDB cloud only): the API key to use with LanceDB Cloud. + /// + /// Can also be set via the environment variable `LANCEDB_API_KEY`. + pub api_key: Option, + /// (For LanceDB cloud only): the region to use for LanceDB cloud. + /// Defaults to 'us-east-1'. + pub region: Option, + /// (For LanceDB cloud only): the host to use for LanceDB cloud. Used + /// for testing purposes. + pub host_override: Option, } /// Write mode for writing a table. diff --git a/nodejs/src/remote.rs b/nodejs/src/remote.rs new file mode 100644 index 00000000..3ed940aa --- /dev/null +++ b/nodejs/src/remote.rs @@ -0,0 +1,120 @@ +// Copyright 2024 Lance Developers. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use napi_derive::*; + +/// Timeout configuration for remote HTTP client. +#[napi(object)] +#[derive(Debug)] +pub struct TimeoutConfig { + /// The timeout for establishing a connection in seconds. Default is 120 + /// seconds (2 minutes). This can also be set via the environment variable + /// `LANCE_CLIENT_CONNECT_TIMEOUT`, as an integer number of seconds. + pub connect_timeout: Option, + /// The timeout for reading data from the server in seconds. Default is 300 + /// seconds (5 minutes). This can also be set via the environment variable + /// `LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds. + pub read_timeout: Option, + /// The timeout for keeping idle connections in the connection pool in seconds. + /// Default is 300 seconds (5 minutes). This can also be set via the + /// environment variable `LANCE_CLIENT_CONNECTION_TIMEOUT`, as an integer + /// number of seconds. + pub pool_idle_timeout: Option, +} + +/// Retry configuration for the remote HTTP client. +#[napi(object)] +#[derive(Debug)] +pub struct RetryConfig { + /// The maximum number of retries for a request. Default is 3. You can also + /// set this via the environment variable `LANCE_CLIENT_MAX_RETRIES`. + pub retries: Option, + /// The maximum number of retries for connection errors. Default is 3. You + /// can also set this via the environment variable `LANCE_CLIENT_CONNECT_RETRIES`. + pub connect_retries: Option, + /// The maximum number of retries for read errors. Default is 3. You can also + /// set this via the environment variable `LANCE_CLIENT_READ_RETRIES`. + pub read_retries: Option, + /// The backoff factor to apply between retries. Default is 0.25. Between each retry + /// the client will wait for the amount of seconds: + /// `{backoff factor} * (2 ** ({number of previous retries}))`. So for the default + /// of 0.25, the first retry will wait 0.25 seconds, the second retry will wait 0.5 + /// seconds, the third retry will wait 1 second, etc. + /// + /// You can also set this via the environment variable + /// `LANCE_CLIENT_RETRY_BACKOFF_FACTOR`. + pub backoff_factor: Option, + /// The jitter to apply to the backoff factor, in seconds. Default is 0.25. + /// + /// A random value between 0 and `backoff_jitter` will be added to the backoff + /// factor in seconds. So for the default of 0.25 seconds, between 0 and 250 + /// milliseconds will be added to the sleep between each retry. + /// + /// You can also set this via the environment variable + /// `LANCE_CLIENT_RETRY_BACKOFF_JITTER`. + pub backoff_jitter: Option, + /// The HTTP status codes for which to retry the request. Default is + /// [429, 500, 502, 503]. + /// + /// You can also set this via the environment variable + /// `LANCE_CLIENT_RETRY_STATUSES`. Use a comma-separated list of integers. + pub statuses: Option>, +} + +#[napi(object)] +#[derive(Debug, Default)] +pub struct ClientConfig { + pub user_agent: Option, + pub retry_config: Option, + pub timeout_config: Option, +} + +impl From for lancedb::remote::TimeoutConfig { + fn from(config: TimeoutConfig) -> Self { + Self { + connect_timeout: config + .connect_timeout + .map(std::time::Duration::from_secs_f64), + read_timeout: config.read_timeout.map(std::time::Duration::from_secs_f64), + pool_idle_timeout: config + .pool_idle_timeout + .map(std::time::Duration::from_secs_f64), + } + } +} + +impl From for lancedb::remote::RetryConfig { + fn from(config: RetryConfig) -> Self { + Self { + retries: config.retries, + connect_retries: config.connect_retries, + read_retries: config.read_retries, + backoff_factor: config.backoff_factor.map(|v| v as f32), + backoff_jitter: config.backoff_jitter.map(|v| v as f32), + statuses: config.statuses, + } + } +} + +impl From for lancedb::remote::ClientConfig { + fn from(config: ClientConfig) -> Self { + Self { + user_agent: config + .user_agent + .unwrap_or(concat!("LanceDB-Node-Client/", env!("CARGO_PKG_VERSION")).to_string()), + retry_config: config.retry_config.map(Into::into).unwrap_or_default(), + timeout_config: config.timeout_config.map(Into::into).unwrap_or_default(), + } + } +}