mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-23 15:00:39 +00:00
feat: expose storage options in LanceDB (#1204)
Exposes `storage_options` in LanceDB. This is provided for Python async, Node `lancedb`, and Node `vectordb` (and Rust of course). Python synchronous is omitted because it's not compatible with the PyArrow filesystems we use there currently. In the future, we will move the sync API to wrap the async one, and then it will get support for `storage_options`. 1. Fixes #1168 2. Closes #1165 3. Closes #1082 4. Closes #439 5. Closes #897 6. Closes #642 7. Closes #281 8. Closes #114 9. Closes #990 10. Deprecating `awsCredentials` and `awsRegion`. Users are encouraged to use `storageOptions` instead.
This commit is contained in:
@@ -13,10 +13,32 @@
|
||||
// limitations under the License.
|
||||
|
||||
import { fromTableToBuffer, makeArrowTable, makeEmptyTable } from "./arrow";
|
||||
import { Connection as LanceDbConnection } from "./native";
|
||||
import { ConnectionOptions, Connection as LanceDbConnection } from "./native";
|
||||
import { Table } from "./table";
|
||||
import { Table as ArrowTable, Schema } from "apache-arrow";
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI.
|
||||
*
|
||||
* Accpeted formats:
|
||||
*
|
||||
* - `/path/to/database` - local database
|
||||
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
||||
* - `db://host:port` - remote database (LanceDB cloud)
|
||||
* @param {string} uri - The uri of the database. If the database uri starts
|
||||
* with `db://` then it connects to a remote database.
|
||||
* @see {@link ConnectionOptions} for more details on the URI format.
|
||||
*/
|
||||
export async function connect(
|
||||
uri: string,
|
||||
opts?: Partial<ConnectionOptions>,
|
||||
): Promise<Connection> {
|
||||
opts = opts ?? {};
|
||||
opts.storageOptions = cleanseStorageOptions(opts.storageOptions);
|
||||
const nativeConn = await LanceDbConnection.new(uri, opts);
|
||||
return new Connection(nativeConn);
|
||||
}
|
||||
|
||||
export interface CreateTableOptions {
|
||||
/**
|
||||
* The mode to use when creating the table.
|
||||
@@ -33,6 +55,28 @@ export interface CreateTableOptions {
|
||||
* then no error will be raised.
|
||||
*/
|
||||
existOk: boolean;
|
||||
|
||||
/**
|
||||
* Configuration for object storage.
|
||||
*
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface OpenTableOptions {
|
||||
/**
|
||||
* Configuration for object storage.
|
||||
*
|
||||
* Options already set on the connection will be inherited by the table,
|
||||
* but can be overridden here.
|
||||
*
|
||||
* The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
*/
|
||||
storageOptions?: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface TableNamesOptions {
|
||||
@@ -109,8 +153,14 @@ export class Connection {
|
||||
* Open a table in the database.
|
||||
* @param {string} name - The name of the table
|
||||
*/
|
||||
async openTable(name: string): Promise<Table> {
|
||||
const innerTable = await this.inner.openTable(name);
|
||||
async openTable(
|
||||
name: string,
|
||||
options?: Partial<OpenTableOptions>,
|
||||
): Promise<Table> {
|
||||
const innerTable = await this.inner.openTable(
|
||||
name,
|
||||
cleanseStorageOptions(options?.storageOptions),
|
||||
);
|
||||
return new Table(innerTable);
|
||||
}
|
||||
|
||||
@@ -139,7 +189,12 @@ export class Connection {
|
||||
table = makeArrowTable(data);
|
||||
}
|
||||
const buf = await fromTableToBuffer(table);
|
||||
const innerTable = await this.inner.createTable(name, buf, mode);
|
||||
const innerTable = await this.inner.createTable(
|
||||
name,
|
||||
buf,
|
||||
mode,
|
||||
cleanseStorageOptions(options?.storageOptions),
|
||||
);
|
||||
return new Table(innerTable);
|
||||
}
|
||||
|
||||
@@ -162,7 +217,12 @@ export class Connection {
|
||||
|
||||
const table = makeEmptyTable(schema);
|
||||
const buf = await fromTableToBuffer(table);
|
||||
const innerTable = await this.inner.createEmptyTable(name, buf, mode);
|
||||
const innerTable = await this.inner.createEmptyTable(
|
||||
name,
|
||||
buf,
|
||||
mode,
|
||||
cleanseStorageOptions(options?.storageOptions),
|
||||
);
|
||||
return new Table(innerTable);
|
||||
}
|
||||
|
||||
@@ -174,3 +234,43 @@ export class Connection {
|
||||
return this.inner.dropTable(name);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes storage options and makes all the keys snake case.
|
||||
*/
|
||||
function cleanseStorageOptions(
|
||||
options?: Record<string, string>,
|
||||
): Record<string, string> | undefined {
|
||||
if (options === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
const result: Record<string, string> = {};
|
||||
for (const [key, value] of Object.entries(options)) {
|
||||
if (value !== undefined) {
|
||||
const newKey = camelToSnakeCase(key);
|
||||
result[newKey] = value;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a string to snake case. It might already be snake case, in which case it is
|
||||
* returned unchanged.
|
||||
*/
|
||||
function camelToSnakeCase(camel: string): string {
|
||||
if (camel.includes("_")) {
|
||||
// Assume if there is at least one underscore, it is already snake case
|
||||
return camel;
|
||||
}
|
||||
if (camel.toLocaleUpperCase() === camel) {
|
||||
// Assume if the string is all uppercase, it is already snake case
|
||||
return camel;
|
||||
}
|
||||
|
||||
let result = camel.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
|
||||
if (result.startsWith("_")) {
|
||||
result = result.slice(1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -12,12 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import { Connection } from "./connection";
|
||||
import {
|
||||
Connection as LanceDbConnection,
|
||||
ConnectionOptions,
|
||||
} from "./native.js";
|
||||
|
||||
export {
|
||||
WriteOptions,
|
||||
WriteMode,
|
||||
@@ -32,6 +26,7 @@ export {
|
||||
VectorColumnOptions,
|
||||
} from "./arrow";
|
||||
export {
|
||||
connect,
|
||||
Connection,
|
||||
CreateTableOptions,
|
||||
TableNamesOptions,
|
||||
@@ -46,24 +41,3 @@ export {
|
||||
export { Index, IndexOptions, IvfPqOptions } from "./indices";
|
||||
export { Table, AddDataOptions, IndexConfig, UpdateOptions } from "./table";
|
||||
export * as embedding from "./embedding";
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI.
|
||||
*
|
||||
* Accpeted formats:
|
||||
*
|
||||
* - `/path/to/database` - local database
|
||||
* - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
|
||||
* - `db://host:port` - remote database (LanceDB cloud)
|
||||
* @param {string} uri - The uri of the database. If the database uri starts
|
||||
* with `db://` then it connects to a remote database.
|
||||
* @see {@link ConnectionOptions} for more details on the URI format.
|
||||
*/
|
||||
export async function connect(
|
||||
uri: string,
|
||||
opts?: Partial<ConnectionOptions>,
|
||||
): Promise<Connection> {
|
||||
opts = opts ?? {};
|
||||
const nativeConn = await LanceDbConnection.new(uri, opts);
|
||||
return new Connection(nativeConn);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user