feat: expose storage options in LanceDB (#1204)

Exposes `storage_options` in LanceDB. This is provided for Python async,
Node `lancedb`, and Node `vectordb` (and Rust of course). Python
synchronous is omitted because it's not compatible with the PyArrow
filesystems we use there currently. In the future, we will move the sync
API to wrap the async one, and then it will get support for
`storage_options`.

1. Fixes #1168
2. Closes #1165
3. Closes #1082
4. Closes #439
5. Closes #897
6. Closes #642
7. Closes #281
8. Closes #114
9. Closes #990
10. Deprecating `awsCredentials` and `awsRegion`. Users are encouraged
to use `storageOptions` instead.
This commit is contained in:
Will Jones
2024-04-10 10:12:04 -07:00
committed by GitHub
parent 25dea4e859
commit 1d23af213b
31 changed files with 3128 additions and 262 deletions

View File

@@ -78,12 +78,25 @@ export interface ConnectionOptions {
/** User provided AWS crednetials.
*
* If not provided, LanceDB will use the default credentials provider chain.
*
* @deprecated Pass `aws_access_key_id`, `aws_secret_access_key`, and `aws_session_token`
* through `storageOptions` instead.
*/
awsCredentials?: AwsCredentials
/** AWS region to connect to. Default is {@link defaultAwsRegion}. */
/** AWS region to connect to. Default is {@link defaultAwsRegion}
*
* @deprecated Pass `region` through `storageOptions` instead.
*/
awsRegion?: string
/**
* User provided options for object storage. For example, S3 credentials or request timeouts.
*
* The various options are described at https://lancedb.github.io/lancedb/guides/storage/
*/
storageOptions?: Record<string, string>
/**
* API key for the remote connections
*
@@ -176,7 +189,6 @@ export async function connect (
if (typeof arg === 'string') {
opts = { uri: arg }
} else {
// opts = { uri: arg.uri, awsCredentials = arg.awsCredentials }
const keys = Object.keys(arg)
if (keys.length === 1 && keys[0] === 'uri' && typeof arg.uri === 'string') {
opts = { uri: arg.uri }
@@ -198,12 +210,26 @@ export async function connect (
// Remote connection
return new RemoteConnection(opts)
}
const storageOptions = opts.storageOptions ?? {};
if (opts.awsCredentials?.accessKeyId !== undefined) {
storageOptions.aws_access_key_id = opts.awsCredentials.accessKeyId
}
if (opts.awsCredentials?.secretKey !== undefined) {
storageOptions.aws_secret_access_key = opts.awsCredentials.secretKey
}
if (opts.awsCredentials?.sessionToken !== undefined) {
storageOptions.aws_session_token = opts.awsCredentials.sessionToken
}
if (opts.awsRegion !== undefined) {
storageOptions.region = opts.awsRegion
}
// It's a pain to pass a record to Rust, so we convert it to an array of key-value pairs
const storageOptionsArr = Object.entries(storageOptions);
const db = await databaseNew(
opts.uri,
opts.awsCredentials?.accessKeyId,
opts.awsCredentials?.secretKey,
opts.awsCredentials?.sessionToken,
opts.awsRegion,
storageOptionsArr,
opts.readConsistencyInterval
)
return new LocalConnection(db, opts)
@@ -720,7 +746,6 @@ export class LocalConnection implements Connection {
const tbl = await databaseOpenTable.call(
this._db,
name,
...getAwsArgs(this._options())
)
if (embeddings !== undefined) {
return new LocalTable(tbl, name, this._options(), embeddings)

View File

@@ -75,6 +75,19 @@ describe('LanceDB client', function () {
assert.equal(con.uri, uri)
})
it('should accept custom storage options', async function () {
const uri = await createTestDB()
const storageOptions = {
region: 'us-west-2',
timeout: '30s'
};
const con = await lancedb.connect({
uri,
storageOptions
})
assert.equal(con.uri, uri)
})
it('should return the existing table names', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)