diff --git a/docs/src/js/functions/connectNamespace.md b/docs/src/js/functions/connectNamespace.md new file mode 100644 index 000000000..d49a8ced7 --- /dev/null +++ b/docs/src/js/functions/connectNamespace.md @@ -0,0 +1,131 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / connectNamespace + +# Function: connectNamespace() + +## connectNamespace(implName, config, options) + +```ts +function connectNamespace( + implName, + config, + options?): Promise +``` + +Connect to a LanceDB database through a namespace. + +Unlike [connect](connect.md), which routes by URI scheme (local path vs. +`db://` cloud), `connectNamespace` always returns a namespace-backed +connection. The `implName` selects the namespace implementation: + +- `"dir"` — directory namespace, configured with [DirNamespaceConfig](../interfaces/DirNamespaceConfig.md). +- `"rest"` — remote REST catalog, configured with [RestNamespaceConfig](../interfaces/RestNamespaceConfig.md). +- Any other string — full module path for a custom implementation, + configured with a free-form string-keyed `properties` map. + +### Parameters + +* **implName**: `"dir"` + +* **config**: [`DirNamespaceConfig`](../interfaces/DirNamespaceConfig.md) + +* **options?**: `Partial`<[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)> + +### Returns + +`Promise`<[`Connection`](../classes/Connection.md)> + +### Examples + +```ts +const db = await connectNamespace("dir", { root: "/path/to/db" }); +await db.createTable("users", [{ id: 1 }]); +``` + +```ts +const db = await connectNamespace("rest", { + uri: "https://catalog.example.com", + headers: { "x-api-key": process.env.CATALOG_KEY ?? "" }, +}); +``` + +```ts +const db = await connectNamespace("my.custom.Namespace", { + endpoint: "...", +}); +``` + +## connectNamespace(implName, config, options) + +```ts +function connectNamespace( + implName, + config, + options?): Promise +``` + +Connect through the built-in REST namespace. + +Configured with [RestNamespaceConfig](../interfaces/RestNamespaceConfig.md). See the function-level +documentation above for the full surface, examples, and how this +relates to [connect](connect.md). + +### Parameters + +* **implName**: `"rest"` + +* **config**: [`RestNamespaceConfig`](../interfaces/RestNamespaceConfig.md) + +* **options?**: `Partial`<[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)> + +### Returns + +`Promise`<[`Connection`](../classes/Connection.md)> + +### Example + +```ts +const db = await connectNamespace("rest", { + uri: "https://catalog.example.com", + headers: { "x-api-key": process.env.CATALOG_KEY ?? "" }, +}); +``` + +## connectNamespace(implName, properties, options) + +```ts +function connectNamespace( + implName, + properties, + options?): Promise +``` + +Connect through a custom namespace implementation by full module path, +configured with a free-form string-keyed `properties` map. Use the +typed overloads above for the built-in `"dir"` and `"rest"` impls. + +See the function-level documentation above for examples and how this +relates to [connect](connect.md). + +### Parameters + +* **implName**: `string` + +* **properties**: `Record`<`string`, `string`> + +* **options?**: `Partial`<[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)> + +### Returns + +`Promise`<[`Connection`](../classes/Connection.md)> + +### Example + +```ts +const db = await connectNamespace("my.custom.Namespace", { + endpoint: "...", +}); +``` diff --git a/docs/src/js/globals.md b/docs/src/js/globals.md index f6f84f4c7..fca33544f 100644 --- a/docs/src/js/globals.md +++ b/docs/src/js/globals.md @@ -51,12 +51,14 @@ - [ClientConfig](interfaces/ClientConfig.md) - [ColumnAlteration](interfaces/ColumnAlteration.md) - [CompactionStats](interfaces/CompactionStats.md) +- [ConnectNamespaceOptions](interfaces/ConnectNamespaceOptions.md) - [ConnectionOptions](interfaces/ConnectionOptions.md) - [CreateNamespaceOptions](interfaces/CreateNamespaceOptions.md) - [CreateNamespaceResponse](interfaces/CreateNamespaceResponse.md) - [CreateTableOptions](interfaces/CreateTableOptions.md) - [DeleteResult](interfaces/DeleteResult.md) - [DescribeNamespaceResponse](interfaces/DescribeNamespaceResponse.md) +- [DirNamespaceConfig](interfaces/DirNamespaceConfig.md) - [DropColumnsResult](interfaces/DropColumnsResult.md) - [DropNamespaceOptions](interfaces/DropNamespaceOptions.md) - [DropNamespaceResponse](interfaces/DropNamespaceResponse.md) @@ -82,6 +84,7 @@ - [OptimizeStats](interfaces/OptimizeStats.md) - [QueryExecutionOptions](interfaces/QueryExecutionOptions.md) - [RemovalStats](interfaces/RemovalStats.md) +- [RestNamespaceConfig](interfaces/RestNamespaceConfig.md) - [RetryConfig](interfaces/RetryConfig.md) - [ShuffleOptions](interfaces/ShuffleOptions.md) - [SplitCalculatedOptions](interfaces/SplitCalculatedOptions.md) @@ -114,6 +117,7 @@ - [RecordBatchIterator](functions/RecordBatchIterator.md) - [connect](functions/connect.md) +- [connectNamespace](functions/connectNamespace.md) - [makeArrowTable](functions/makeArrowTable.md) - [packBits](functions/packBits.md) - [permutationBuilder](functions/permutationBuilder.md) diff --git a/docs/src/js/interfaces/ConnectNamespaceOptions.md b/docs/src/js/interfaces/ConnectNamespaceOptions.md new file mode 100644 index 000000000..f2d6cd0ab --- /dev/null +++ b/docs/src/js/interfaces/ConnectNamespaceOptions.md @@ -0,0 +1,54 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / ConnectNamespaceOptions + +# Interface: ConnectNamespaceOptions + +## Properties + +### namespaceClientProperties? + +```ts +optional namespaceClientProperties: Record; +``` + +Extra properties for the backing namespace client. + +*** + +### readConsistencyInterval? + +```ts +optional readConsistencyInterval: number; +``` + +The interval, in seconds, at which to check for updates to the table +from other processes. If None, then consistency is not checked. For +performance reasons, this is the default. For strong consistency, set +this to zero seconds. Then every read will check for updates from other +processes. As a compromise, you can set this to a non-zero value for +eventual consistency. + +*** + +### session? + +```ts +optional session: Session; +``` + +The session to use for this connection. Holds shared caches and other +session-specific state. + +*** + +### storageOptions? + +```ts +optional storageOptions: Record; +``` + +Configuration for object storage. The available options are described +at https://docs.lancedb.com/storage/ diff --git a/docs/src/js/interfaces/DirNamespaceConfig.md b/docs/src/js/interfaces/DirNamespaceConfig.md new file mode 100644 index 000000000..83313af51 --- /dev/null +++ b/docs/src/js/interfaces/DirNamespaceConfig.md @@ -0,0 +1,47 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / DirNamespaceConfig + +# Interface: DirNamespaceConfig + +Configuration for the built-in directory namespace (`"dir"`). + +The directory namespace stores tables under a single root path (local +filesystem or object storage URI). See +[https://docs.lancedb.com/namespaces](https://docs.lancedb.com/namespaces) for the documented surface; +less-common knobs live under [DirNamespaceConfig.extraProperties](DirNamespaceConfig.md#extraproperties). + +## Properties + +### extraProperties? + +```ts +optional extraProperties: Record; +``` + +Additional raw properties passed verbatim to the namespace +implementation (e.g. `storage.*`, `credential_vendor.*`). Typed +fields above take precedence on key collision. + +*** + +### manifestEnabled? + +```ts +optional manifestEnabled: boolean; +``` + +Whether to maintain a namespace manifest at the root. Required for +child namespaces. Defaults to true on the impl side. + +*** + +### root + +```ts +root: string; +``` + +Root path or URI containing the LanceDB tables. diff --git a/docs/src/js/interfaces/RestNamespaceConfig.md b/docs/src/js/interfaces/RestNamespaceConfig.md new file mode 100644 index 000000000..5d9b09d09 --- /dev/null +++ b/docs/src/js/interfaces/RestNamespaceConfig.md @@ -0,0 +1,47 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / RestNamespaceConfig + +# Interface: RestNamespaceConfig + +Configuration for the built-in REST namespace (`"rest"`). + +The REST namespace talks to a remote catalog server over HTTP. See +[https://docs.lancedb.com/namespaces](https://docs.lancedb.com/namespaces) for the documented surface; +less-common knobs (TLS, metrics) live under +[RestNamespaceConfig.extraProperties](RestNamespaceConfig.md#extraproperties). + +## Properties + +### extraProperties? + +```ts +optional extraProperties: Record; +``` + +Additional raw properties passed verbatim to the namespace +implementation (e.g. `tls.*`, `ops_metrics_enabled`, `delimiter`). +Typed fields above take precedence on key collision. + +*** + +### headers? + +```ts +optional headers: Record; +``` + +HTTP headers forwarded with each request. Keys are passed through +as-is (e.g. `"x-api-key"`, `"Authorization"`). + +*** + +### uri + +```ts +uri: string; +``` + +Catalog endpoint URL. diff --git a/nodejs/__test__/connection.test.ts b/nodejs/__test__/connection.test.ts index f03796e9b..b7bafad7c 100644 --- a/nodejs/__test__/connection.test.ts +++ b/nodejs/__test__/connection.test.ts @@ -4,7 +4,7 @@ import { readdirSync } from "fs"; import { Field, Float64, Schema } from "apache-arrow"; import * as tmp from "tmp"; -import { Connection, Table, connect } from "../lancedb"; +import { Connection, Table, connect, connectNamespace } from "../lancedb"; import { LocalTable } from "../lancedb/table"; describe("when connecting", () => { @@ -397,3 +397,95 @@ describe("namespaces", () => { ).rejects.toThrow(/Invalid behavior 'frobnicate'/); }); }); + +describe("connectNamespace", () => { + let tmpDir: tmp.DirResult; + beforeEach(() => { + tmpDir = tmp.dirSync({ unsafeCleanup: true }); + }); + afterEach(() => tmpDir.removeCallback()); + + it("connects via the dir implementation and supports table ops", async () => { + const db = await connectNamespace("dir", { root: tmpDir.name }); + await db.createTable("users", [{ id: 1 }, { id: 2 }]); + await expect(db.tableNames()).resolves.toContain("users"); + }); + + it("throws a clear error when implName is empty", async () => { + await expect(connectNamespace("", {})).rejects.toThrow( + "implName must be a non-empty string", + ); + }); + + it("throws when the namespace implementation is unknown", async () => { + await expect(connectNamespace("not-a-real-impl", {})).rejects.toThrow(); + }); + + it("passes storage options through to the namespace", async () => { + const db = await connectNamespace( + "dir", + { root: tmpDir.name }, + { storageOptions: { newTableDataStorageVersion: "stable" } }, + ); + await db.createTable("plumbing", [{ id: 1 }]); + await expect(db.tableNames()).resolves.toContain("plumbing"); + }); + + it("supports child namespaces when manifestEnabled is true on the dir config", async () => { + const writer = await connectNamespace("dir", { + root: tmpDir.name, + manifestEnabled: true, + }); + await writer.createNamespace(["analytics"]); + await writer.createTable("orders", [{ id: 1 }, { id: 2 }], ["analytics"]); + await writer.close(); + + const reader = await connectNamespace("dir", { + root: tmpDir.name, + manifestEnabled: true, + }); + await expect(reader.tableNames(["analytics"])).resolves.toContain("orders"); + const orders = await reader.openTable("orders", ["analytics"]); + await expect(orders.countRows()).resolves.toBe(2); + }); + + it("merges extraProperties into the dir config and is overridden by typed fields", async () => { + // Two observable assertions: + // - Typed `root` overrides extraProperties.root: createTable would fail + // under the bogus path if the override didn't happen. + // - extraProperties.manifest_enabled="false" is honored end-to-end. Child + // namespaces require manifest mode (default true), so explicitly + // disabling it via extraProperties must make createNamespace reject. If + // extraProperties pass-through were silently broken, the default would + // let createNamespace succeed. + const db = await connectNamespace("dir", { + root: tmpDir.name, + extraProperties: { + root: "/should/be/overridden", + // biome-ignore lint/style/useNamingConvention: backend property key + manifest_enabled: "false", + }, + }); + await db.createTable("base", [{ id: 1 }]); + await expect(db.tableNames()).resolves.toContain("base"); + await expect(db.createNamespace(["analytics"])).rejects.toThrow(); + }); + + it("flows unknown top-level keys through when implName is dynamic (no silent drop)", async () => { + // Routes via the third overload because `impl` is `string`, not the + // literal `"dir"`. The dispatcher still notices the runtime value is + // "dir", but unknown keys like `manifest_enabled` must not be silently + // dropped during the conversion. + // + // Asserting a *negative* outcome (manifest disabled -> createNamespace + // rejects) is required for observability, since the backend default for + // `manifest_enabled` is true. + const impl: string = "dir"; + const db = await connectNamespace(impl, { + root: tmpDir.name, + // biome-ignore lint/style/useNamingConvention: backend property key + manifest_enabled: "false", + }); + await expect(db.createNamespace(["mixed"])).rejects.toThrow(); + }); +}); diff --git a/nodejs/lancedb/index.ts b/nodejs/lancedb/index.ts index b56055383..8952cf043 100644 --- a/nodejs/lancedb/index.ts +++ b/nodejs/lancedb/index.ts @@ -8,6 +8,7 @@ import { } from "./connection"; import { + ConnectNamespaceOptions, ConnectionOptions, Connection as LanceDbConnection, JsHeaderProvider as NativeJsHeaderProvider, @@ -22,6 +23,7 @@ export { JsHeaderProvider as NativeJsHeaderProvider } from "./native.js"; export { AddColumnsSql, ConnectionOptions, + ConnectNamespaceOptions, IndexStatistics, IndexConfig, ClientConfig, @@ -300,3 +302,197 @@ export async function connect( ); return new LocalConnection(nativeConn); } + +/** + * Configuration for the built-in directory namespace (`"dir"`). + * + * The directory namespace stores tables under a single root path (local + * filesystem or object storage URI). See + * {@link https://docs.lancedb.com/namespaces} for the documented surface; + * less-common knobs live under {@link DirNamespaceConfig.extraProperties}. + */ +export interface DirNamespaceConfig { + /** Root path or URI containing the LanceDB tables. */ + root: string; + /** + * Whether to maintain a namespace manifest at the root. Required for + * child namespaces. Defaults to true on the impl side. + */ + manifestEnabled?: boolean; + /** + * Additional raw properties passed verbatim to the namespace + * implementation (e.g. `storage.*`, `credential_vendor.*`). Typed + * fields above take precedence on key collision. + */ + extraProperties?: Record; +} + +/** + * Configuration for the built-in REST namespace (`"rest"`). + * + * The REST namespace talks to a remote catalog server over HTTP. See + * {@link https://docs.lancedb.com/namespaces} for the documented surface; + * less-common knobs (TLS, metrics) live under + * {@link RestNamespaceConfig.extraProperties}. + */ +export interface RestNamespaceConfig { + /** Catalog endpoint URL. */ + uri: string; + /** + * HTTP headers forwarded with each request. Keys are passed through + * as-is (e.g. `"x-api-key"`, `"Authorization"`). + */ + headers?: Record; + /** + * Additional raw properties passed verbatim to the namespace + * implementation (e.g. `tls.*`, `ops_metrics_enabled`, `delimiter`). + * Typed fields above take precedence on key collision. + */ + extraProperties?: Record; +} + +function dirConfigToProperties( + config: DirNamespaceConfig, +): Record { + // Spread the whole input so that unknown keys (e.g. a raw `manifest_enabled` + // passed via the dynamic-impl path) flow through instead of being dropped. + // Typed transformations layer on top. + const { manifestEnabled, extraProperties, ...rest } = config; + const properties: Record = { + ...(extraProperties ?? {}), + ...(rest as Record), + }; + if (manifestEnabled !== undefined) { + properties.manifest_enabled = String(manifestEnabled); + } + return properties; +} + +function restConfigToProperties( + config: RestNamespaceConfig, +): Record { + const { headers, extraProperties, ...rest } = config; + const properties: Record = { + ...(extraProperties ?? {}), + ...(rest as Record), + }; + if (headers) { + for (const [name, value] of Object.entries(headers)) { + properties[`headers.${name}`] = value; + } + } + return properties; +} + +/** + * Connect to a LanceDB database through a namespace. + * + * Unlike {@link connect}, which routes by URI scheme (local path vs. + * `db://` cloud), `connectNamespace` always returns a namespace-backed + * connection. The `implName` selects the namespace implementation: + * + * - `"dir"` — directory namespace, configured with {@link DirNamespaceConfig}. + * - `"rest"` — remote REST catalog, configured with {@link RestNamespaceConfig}. + * - Any other string — full module path for a custom implementation, + * configured with a free-form string-keyed `properties` map. + * + * @example Typed dir namespace + * ```ts + * const db = await connectNamespace("dir", { root: "/path/to/db" }); + * await db.createTable("users", [{ id: 1 }]); + * ``` + * + * @example Typed REST namespace with auth headers + * ```ts + * const db = await connectNamespace("rest", { + * uri: "https://catalog.example.com", + * headers: { "x-api-key": process.env.CATALOG_KEY ?? "" }, + * }); + * ``` + * + * @example Custom implementation with raw properties + * ```ts + * const db = await connectNamespace("my.custom.Namespace", { + * endpoint: "...", + * }); + * ``` + */ +export function connectNamespace( + implName: "dir", + config: DirNamespaceConfig, + options?: Partial, +): Promise; +/** + * Connect through the built-in REST namespace. + * + * Configured with {@link RestNamespaceConfig}. See the function-level + * documentation above for the full surface, examples, and how this + * relates to {@link connect}. + * + * @example + * ```ts + * const db = await connectNamespace("rest", { + * uri: "https://catalog.example.com", + * headers: { "x-api-key": process.env.CATALOG_KEY ?? "" }, + * }); + * ``` + */ +export function connectNamespace( + implName: "rest", + config: RestNamespaceConfig, + options?: Partial, +): Promise; +/** + * Connect through a custom namespace implementation by full module path, + * configured with a free-form string-keyed `properties` map. Use the + * typed overloads above for the built-in `"dir"` and `"rest"` impls. + * + * See the function-level documentation above for examples and how this + * relates to {@link connect}. + * + * @example + * ```ts + * const db = await connectNamespace("my.custom.Namespace", { + * endpoint: "...", + * }); + * ``` + */ +export function connectNamespace( + implName: string, + properties: Record, + options?: Partial, +): Promise; +export async function connectNamespace( + implName: string, + configOrProperties: + | DirNamespaceConfig + | RestNamespaceConfig + | Record, + options?: Partial, +): Promise { + let properties: Record; + if (implName === "dir") { + properties = dirConfigToProperties( + configOrProperties as DirNamespaceConfig, + ); + } else if (implName === "rest") { + properties = restConfigToProperties( + configOrProperties as RestNamespaceConfig, + ); + } else { + properties = configOrProperties as Record; + } + + const finalOptions: ConnectNamespaceOptions = (options ?? + {}) as ConnectNamespaceOptions; + finalOptions.storageOptions = cleanseStorageOptions( + finalOptions.storageOptions, + ); + + const nativeConn = await LanceDbConnection.newWithNamespace( + implName, + properties, + finalOptions, + ); + return new LocalConnection(nativeConn); +} diff --git a/nodejs/src/connection.rs b/nodejs/src/connection.rs index 058b74f96..eb8bab22e 100644 --- a/nodejs/src/connection.rs +++ b/nodejs/src/connection.rs @@ -8,11 +8,12 @@ use lancedb::database::{CreateTableMode, Database}; use napi::bindgen_prelude::*; use napi_derive::*; +use crate::ConnectNamespaceOptions; use crate::ConnectionOptions; use crate::error::NapiErrorExt; use crate::header::JsHeaderProvider; use crate::table::Table; -use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection}; +use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection, connect_namespace}; use lance_namespace::models::{ CreateNamespaceRequest, DescribeNamespaceRequest, DropNamespaceRequest, ListNamespacesRequest, @@ -132,6 +133,39 @@ impl Connection { Ok(Self::inner_new(builder.execute().await.default_error()?)) } + /// Create a new Connection instance backed by a namespace implementation. + #[napi(factory)] + pub async fn new_with_namespace( + impl_name: String, + properties: HashMap, + options: ConnectNamespaceOptions, + ) -> napi::Result { + if impl_name.is_empty() { + return Err(napi::Error::from_reason( + "implName must be a non-empty string", + )); + } + + let mut builder = connect_namespace(&impl_name, properties); + if let Some(interval) = options.read_consistency_interval { + builder = + builder.read_consistency_interval(std::time::Duration::from_secs_f64(interval)); + } + if let Some(storage_options) = options.storage_options { + for (key, value) in storage_options { + builder = builder.storage_option(key, value); + } + } + if let Some(namespace_client_properties) = options.namespace_client_properties { + builder = builder.namespace_client_properties(namespace_client_properties); + } + if let Some(session) = options.session { + builder = builder.session(session.inner.clone()); + } + + Ok(Self::inner_new(builder.execute().await.default_error()?)) + } + #[napi] pub fn display(&self) -> napi::Result { Ok(self.get_inner()?.to_string()) diff --git a/nodejs/src/lib.rs b/nodejs/src/lib.rs index 87bc97ce7..dab6bad67 100644 --- a/nodejs/src/lib.rs +++ b/nodejs/src/lib.rs @@ -67,6 +67,26 @@ pub struct OpenTableOptions { pub storage_options: Option>, } +#[napi(object)] +#[derive(Debug)] +pub struct ConnectNamespaceOptions { + /// The interval, in seconds, at which to check for updates to the table + /// from other processes. If None, then consistency is not checked. For + /// performance reasons, this is the default. For strong consistency, set + /// this to zero seconds. Then every read will check for updates from other + /// processes. As a compromise, you can set this to a non-zero value for + /// eventual consistency. + pub read_consistency_interval: Option, + /// Configuration for object storage. The available options are described + /// at https://docs.lancedb.com/storage/ + pub storage_options: Option>, + /// Extra properties for the backing namespace client. + pub namespace_client_properties: Option>, + /// The session to use for this connection. Holds shared caches and other + /// session-specific state. + pub session: Option, +} + #[napi_derive::module_init] fn init() { let env = Env::new()