From 25dea4e859b970aec576f0743ad33d8d2f7d6a0c Mon Sep 17 00:00:00 2001 From: Bert Date: Wed, 10 Apr 2024 11:54:47 -0400 Subject: [PATCH 01/19] BREAKING CHANGE: Check if remote table exists when opening (with caching) (#1214) - make open table behaviour consistent: - remote tables will check if the table exists by calling /describe and throwing an error if the call doesn't succeed - this is similar to the behaviour for local tables where we will raise an exception when opening the table if the local dataset doesn't exist - The table names are cached in the client with a TTL - Also fixes a small bug where if the remote error response was deserialized from JSON as an object, we'd print it resulting in the unhelpful error message: `Error: Server Error, status: 404, message: Not Found: [object Object]` --- node/src/remote/client.ts | 6 +++++- node/src/remote/index.ts | 14 ++++++++++++- node/src/test/test.ts | 18 +++++++++++++++- node/src/util.ts | 33 ++++++++++++++++++++++++++++++ python/python/lancedb/remote/db.py | 18 ++++++++-------- 5 files changed, 76 insertions(+), 13 deletions(-) diff --git a/node/src/remote/client.ts b/node/src/remote/client.ts index 04f8e43a..9992abbb 100644 --- a/node/src/remote/client.ts +++ b/node/src/remote/client.ts @@ -111,7 +111,11 @@ async function decodeErrorData( if (responseType === 'arraybuffer') { return new TextDecoder().decode(errorData) } else { - return errorData + if (typeof errorData === 'object') { + return JSON.stringify(errorData) + } + + return errorData } } diff --git a/node/src/remote/index.ts b/node/src/remote/index.ts index 17b9443d..29f8a2fb 100644 --- a/node/src/remote/index.ts +++ b/node/src/remote/index.ts @@ -38,7 +38,7 @@ import { fromRecordsToStreamBuffer, fromTableToStreamBuffer } from '../arrow' -import { toSQL } from '../util' +import { toSQL, TTLCache } from '../util' import { type HttpMiddleware } from '../middleware' /** @@ -47,6 +47,7 @@ import { type HttpMiddleware } from '../middleware' export class RemoteConnection implements Connection { private _client: HttpLancedbClient private readonly _dbName: string + private readonly _tableCache = new TTLCache(300_000) constructor (opts: ConnectionOptions) { if (!opts.uri.startsWith('db://')) { @@ -89,6 +90,9 @@ export class RemoteConnection implements Connection { page_token: pageToken }) const body = await response.body() + for (const table of body.tables) { + this._tableCache.set(table, true) + } return body.tables } @@ -101,6 +105,12 @@ export class RemoteConnection implements Connection { name: string, embeddings?: EmbeddingFunction ): Promise> { + // check if the table exists + if (this._tableCache.get(name) === undefined) { + await this._client.post(`/v1/table/${encodeURIComponent(name)}/describe/`) + this._tableCache.set(name, true) + } + if (embeddings !== undefined) { return new RemoteTable(this._client, name, embeddings) } else { @@ -169,6 +179,7 @@ export class RemoteConnection implements Connection { ) } + this._tableCache.set(tableName, true) if (embeddings === undefined) { return new RemoteTable(this._client, tableName) } else { @@ -178,6 +189,7 @@ export class RemoteConnection implements Connection { async dropTable (name: string): Promise { await this._client.post(`/v1/table/${encodeURIComponent(name)}/drop/`) + this._tableCache.delete(name) } withMiddleware (middleware: HttpMiddleware): Connection { diff --git a/node/src/test/test.ts b/node/src/test/test.ts index af87ee41..d3c5d2aa 100644 --- a/node/src/test/test.ts +++ b/node/src/test/test.ts @@ -42,6 +42,7 @@ import { Float16, Int64 } from 'apache-arrow' +import type { RemoteRequest, RemoteResponse } from '../middleware' const expect = chai.expect const assert = chai.assert @@ -913,7 +914,22 @@ describe('Remote LanceDB client', function () { } // Search - const table = await con.openTable('vectors') + const table = await con.withMiddleware(new (class { + async onRemoteRequest(req: RemoteRequest, next: (req: RemoteRequest) => Promise) { + // intercept call to check if the table exists and make the call succeed + if (req.uri.endsWith('/describe/')) { + return { + status: 200, + statusText: 'OK', + headers: new Map(), + body: async () => ({}) + } + } + + return await next(req) + } + })()).openTable('vectors') + try { await table.search([0.1, 0.3]).execute() } catch (err) { diff --git a/node/src/util.ts b/node/src/util.ts index 242a4caf..a84fc29d 100644 --- a/node/src/util.ts +++ b/node/src/util.ts @@ -42,3 +42,36 @@ export function toSQL (value: Literal): string { // eslint-disable-next-line @typescript-eslint/restrict-template-expressions throw new Error(`Unsupported value type: ${typeof value} value: (${value})`) } + +export class TTLCache { + private readonly cache: Map + + /** + * @param ttl Time to live in milliseconds + */ + constructor (private readonly ttl: number) { + this.cache = new Map() + } + + get (key: string): any | undefined { + const entry = this.cache.get(key) + if (entry === undefined) { + return undefined + } + + if (entry.expires < Date.now()) { + this.cache.delete(key) + return undefined + } + + return entry.value + } + + set (key: string, value: any): void { + this.cache.set(key, { value, expires: Date.now() + this.ttl }) + } + + delete (key: string): void { + this.cache.delete(key) + } +} diff --git a/python/python/lancedb/remote/db.py b/python/python/lancedb/remote/db.py index 9dff65c5..c252fc5c 100644 --- a/python/python/lancedb/remote/db.py +++ b/python/python/lancedb/remote/db.py @@ -18,6 +18,7 @@ from concurrent.futures import ThreadPoolExecutor from typing import Iterable, List, Optional, Union from urllib.parse import urlparse +from cachetools import TTLCache import pyarrow as pa from overrides import override @@ -29,7 +30,6 @@ from ..table import Table, _sanitize_data from ..util import validate_table_name from .arrow import to_ipc_binary from .client import ARROW_STREAM_CONTENT_TYPE, RestfulLanceDBClient -from .errors import LanceDBClientError class RemoteDBConnection(DBConnection): @@ -60,6 +60,7 @@ class RemoteDBConnection(DBConnection): read_timeout=read_timeout, ) self._request_thread_pool = request_thread_pool + self._table_cache = TTLCache(maxsize=10000, ttl=300) def __repr__(self) -> str: return f"RemoteConnect(name={self.db_name})" @@ -89,6 +90,7 @@ class RemoteDBConnection(DBConnection): else: break for item in result: + self._table_cache[item] = True yield item @override @@ -109,16 +111,10 @@ class RemoteDBConnection(DBConnection): self._client.mount_retry_adapter_for_table(name) # check if table exists - try: + if self._table_cache.get(name) is None: self._client.post(f"/v1/table/{name}/describe/") - except LanceDBClientError as err: - if str(err).startswith("Not found"): - logging.error( - "Table %s does not exist. Please first call " - "db.create_table(%s, data).", - name, - name, - ) + self._table_cache[name] = True + return RemoteTable(self, name) @override @@ -267,6 +263,7 @@ class RemoteDBConnection(DBConnection): content_type=ARROW_STREAM_CONTENT_TYPE, ) + self._table_cache[name] = True return RemoteTable(self, name) @override @@ -282,6 +279,7 @@ class RemoteDBConnection(DBConnection): self._client.post( f"/v1/table/{name}/drop/", ) + self._table_cache.pop(name) async def close(self): """Close the connection to the database.""" From 1d23af213be176a4db76d11f937d607078cbeecb Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 10 Apr 2024 10:12:04 -0700 Subject: [PATCH 02/19] feat: expose storage options in LanceDB (#1204) Exposes `storage_options` in LanceDB. This is provided for Python async, Node `lancedb`, and Node `vectordb` (and Rust of course). Python synchronous is omitted because it's not compatible with the PyArrow filesystems we use there currently. In the future, we will move the sync API to wrap the async one, and then it will get support for `storage_options`. 1. Fixes #1168 2. Closes #1165 3. Closes #1082 4. Closes #439 5. Closes #897 6. Closes #642 7. Closes #281 8. Closes #114 9. Closes #990 10. Deprecating `awsCredentials` and `awsRegion`. Users are encouraged to use `storageOptions` instead. --- .github/workflows/node.yml | 1 + .github/workflows/nodejs.yml | 5 + .github/workflows/python.yml | 4 +- .github/workflows/run_tests/action.yml | 16 +- .github/workflows/rust.yml | 6 +- Cargo.toml | 8 +- docker-compose.yml | 6 +- docs/src/guides/storage.md | 289 +++- docs/test/md_testing.py | 15 +- node/src/index.ts | 39 +- node/src/test/test.ts | 13 + nodejs/__test__/s3_integration.test.ts | 219 +++ nodejs/lancedb/connection.ts | 110 +- nodejs/lancedb/index.ts | 28 +- nodejs/package-lock.json | 1636 +++++++++++++++++++++++ nodejs/package.json | 3 + nodejs/src/connection.rs | 46 +- nodejs/src/lib.rs | 13 +- python/README.md | 2 +- python/pyproject.toml | 4 +- python/python/lancedb/__init__.py | 7 +- python/python/lancedb/_lancedb.pyi | 12 +- python/python/lancedb/db.py | 25 +- python/python/tests/test_s3.py | 158 +++ python/src/connection.rs | 47 +- rust/ffi/node/src/lib.rs | 135 +- rust/ffi/node/src/table.rs | 23 +- rust/lancedb/Cargo.toml | 7 +- rust/lancedb/src/connection.rs | 190 ++- rust/lancedb/src/table.rs | 33 +- rust/lancedb/tests/object_store_test.rs | 290 ++++ 31 files changed, 3128 insertions(+), 262 deletions(-) create mode 100644 nodejs/__test__/s3_integration.test.ts create mode 100644 python/python/tests/test_s3.py create mode 100644 rust/lancedb/tests/object_store_test.rs diff --git a/.github/workflows/node.yml b/.github/workflows/node.yml index 09b15afa..fc04b99a 100644 --- a/.github/workflows/node.yml +++ b/.github/workflows/node.yml @@ -107,6 +107,7 @@ jobs: AWS_ENDPOINT: http://localhost:4566 # this one is for dynamodb DYNAMODB_ENDPOINT: http://localhost:4566 + ALLOW_HTTP: true steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index 6f1372bb..128309ed 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -85,7 +85,12 @@ jobs: run: | npm ci npm run build + - name: Setup localstack + working-directory: . + run: docker compose up --detach --wait - name: Test + env: + S3_TEST: "1" run: npm run test macos: timeout-minutes: 30 diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index ef004b40..0659766b 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -99,6 +99,8 @@ jobs: workspaces: python - uses: ./.github/workflows/build_linux_wheel - uses: ./.github/workflows/run_tests + with: + integration: true # Make sure wheels are not included in the Rust cache - name: Delete wheels run: rm -rf target/wheels @@ -190,4 +192,4 @@ jobs: pip install -e .[tests] pip install tantivy - name: Run tests - run: pytest -m "not slow" -x -v --durations=30 python/tests + run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests diff --git a/.github/workflows/run_tests/action.yml b/.github/workflows/run_tests/action.yml index 6140cb5c..2954d256 100644 --- a/.github/workflows/run_tests/action.yml +++ b/.github/workflows/run_tests/action.yml @@ -5,6 +5,10 @@ inputs: python-minor-version: required: true description: "8 9 10 11 12" + integration: + required: false + description: "Run integration tests" + default: "false" runs: using: "composite" steps: @@ -12,6 +16,16 @@ runs: shell: bash run: | pip3 install $(ls target/wheels/lancedb-*.whl)[tests,dev] - - name: pytest + - name: Setup localstack for integration tests + if: ${{ inputs.integration == 'true' }} shell: bash + working-directory: . + run: docker compose up --detach --wait + - name: pytest (with integration) + shell: bash + if: ${{ inputs.integration == 'true' }} run: pytest -m "not slow" -x -v --durations=30 python/python/tests + - name: pytest (no integration tests) + shell: bash + if: ${{ inputs.integration != 'true' }} + run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/python/tests diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 464bc01a..8a1201d3 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -76,6 +76,9 @@ jobs: sudo apt install -y protobuf-compiler libssl-dev - name: Build run: cargo build --all-features + - name: Start S3 integration test environment + working-directory: . + run: docker compose up --detach --wait - name: Run tests run: cargo test --all-features - name: Run examples @@ -105,7 +108,8 @@ jobs: - name: Build run: cargo build --all-features - name: Run tests - run: cargo test --all-features + # Run with everything except the integration tests. + run: cargo test --features remote,fp16kernels windows: runs-on: windows-2022 steps: diff --git a/Cargo.toml b/Cargo.toml index 8417d586..f1689478 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"] categories = ["database-implementations"] [workspace.dependencies] -lance = { "version" = "=0.10.9", "features" = ["dynamodb"] } -lance-index = { "version" = "=0.10.9" } -lance-linalg = { "version" = "=0.10.9" } -lance-testing = { "version" = "=0.10.9" } +lance = { "version" = "=0.10.10", "features" = ["dynamodb"] } +lance-index = { "version" = "=0.10.10" } +lance-linalg = { "version" = "=0.10.10" } +lance-testing = { "version" = "=0.10.10" } # Note that this one does not include pyarrow arrow = { version = "50.0", optional = false } arrow-array = "50.0" diff --git a/docker-compose.yml b/docker-compose.yml index f46ef168..8b1196d1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,18 +1,18 @@ version: "3.9" services: localstack: - image: localstack/localstack:0.14 + image: localstack/localstack:3.3 ports: - 4566:4566 environment: - - SERVICES=s3,dynamodb + - SERVICES=s3,dynamodb,kms - DEBUG=1 - LS_LOG=trace - DOCKER_HOST=unix:///var/run/docker.sock - AWS_ACCESS_KEY_ID=ACCESSKEY - AWS_SECRET_ACCESS_KEY=SECRETKEY healthcheck: - test: [ "CMD", "curl", "-f", "http://localhost:4566/health" ] + test: [ "CMD", "curl", "-s", "http://localhost:4566/_localstack/health" ] interval: 5s retries: 3 start_period: 10s diff --git a/docs/src/guides/storage.md b/docs/src/guides/storage.md index 940cf141..bd014b35 100644 --- a/docs/src/guides/storage.md +++ b/docs/src/guides/storage.md @@ -55,18 +55,139 @@ LanceDB OSS supports object stores such as AWS S3 (and compatible stores), Azure const db = await lancedb.connect("az://bucket/path"); ``` -In most cases, when running in the respective cloud and permissions are set up correctly, no additional configuration is required. When running outside of the respective cloud, authentication credentials must be provided using environment variables. In general, these environment variables are the same as those used by the respective cloud SDKs. The sections below describe the environment variables that can be used to configure each object store. +In most cases, when running in the respective cloud and permissions are set up correctly, no additional configuration is required. When running outside of the respective cloud, authentication credentials must be provided. Credentials and other configuration options can be set in two ways: first, by setting environment variables. And second, by passing a `storage_options` object to the `connect` function. For example, to increase the request timeout to 60 seconds, you can set the `TIMEOUT` environment variable to `60s`: -LanceDB OSS uses the [object-store](https://docs.rs/object_store/latest/object_store/) Rust crate for object store access. There are general environment variables that can be used to configure the object store, such as the request timeout and proxy configuration. See the [object_store ClientConfigKey](https://docs.rs/object_store/latest/object_store/enum.ClientConfigKey.html) doc for available configuration options. The environment variables that can be set are the snake-cased versions of these variable names. For example, to set `ProxyUrl` use the environment variable `PROXY_URL`. (Don't let the Rust docs intimidate you! We link to them so you can see an up-to-date list of the available options.) +```bash +export TIMEOUT=60s +``` + +!!! note "`storage_options` availability" + + The `storage_options` parameter is only available in Python *async* API and JavaScript API. + It is not yet supported in the Python synchronous API. + +If you only want this to apply to one particular connection, you can pass the `storage_options` argument when opening the connection: + +=== "Python" + + ```python + import lancedb + db = await lancedb.connect_async( + "s3://bucket/path", + storage_options={"timeout": "60s"} + ) + ``` + +=== "JavaScript" + + ```javascript + const lancedb = require("lancedb"); + const db = await lancedb.connect("s3://bucket/path", + {storageOptions: {timeout: "60s"}}); + ``` + +Getting even more specific, you can set the `timeout` for only a particular table: + +=== "Python" + + + ```python + import lancedb + db = await lancedb.connect_async("s3://bucket/path") + table = await db.create_table( + "table", + [{"a": 1, "b": 2}], + storage_options={"timeout": "60s"} + ) + ``` + +=== "JavaScript" + + + ```javascript + const lancedb = require("lancedb"); + const db = await lancedb.connect("s3://bucket/path"); + const table = db.createTable( + "table", + [{ a: 1, b: 2}], + {storageOptions: {timeout: "60s"}} + ); + ``` + +!!! info "Storage option casing" + + The storage option keys are case-insensitive. So `connect_timeout` and `CONNECT_TIMEOUT` are the same setting. Usually lowercase is used in the `storage_options` argument and uppercase is used for environment variables. In the `lancedb` Node package, the keys can also be provided in `camelCase` capitalization. For example, `connectTimeout` is equivalent to `connect_timeout`. + +### General configuration + +There are several options that can be set for all object stores, mostly related to network client configuration. + + + +| Key | Description | +|----------------------------|--------------------------------------------------------------------------------------------------| +| `allow_http` | Allow non-TLS, i.e. non-HTTPS connections. Default: `False`. | +| `allow_invalid_certificates`| Skip certificate validation on HTTPS connections. Default: `False`. | +| `connect_timeout` | Timeout for only the connect phase of a Client. Default: `5s`. | +| `timeout` | Timeout for the entire request, from connection until the response body has finished. Default: `30s`. | +| `user_agent` | User agent string to use in requests. | +| `proxy_url` | URL of a proxy server to use for requests. Default: `None`. | +| `proxy_ca_certificate` | PEM-formatted CA certificate for proxy connections. | +| `proxy_excludes` | List of hosts that bypass the proxy. This is a comma-separated list of domains and IP masks. Any subdomain of the provided domain will be bypassed. For example, `example.com, 192.168.1.0/24` would bypass `https://api.example.com`, `https://www.example.com`, and any IP in the range `192.168.1.0/24`. | ### AWS S3 -To configure credentials for AWS S3, you can use the `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_SESSION_TOKEN` environment variables. +To configure credentials for AWS S3, you can use the `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_SESSION_TOKEN` keys. Region can also be set, but it is not mandatory when using AWS. +These can be set as environment variables or passed in the `storage_options` parameter: + +=== "Python" + + ```python + import lancedb + db = await lancedb.connect_async( + "s3://bucket/path", + storage_options={ + "aws_access_key_id": "my-access-key", + "aws_secret_access_key": "my-secret-key", + "aws_session_token": "my-session-token", + } + ) + ``` + +=== "JavaScript" + + ```javascript + const lancedb = require("lancedb"); + const db = await lancedb.connect( + "s3://bucket/path", + { + storageOptions: { + awsAccessKeyId: "my-access-key", + awsSecretAccessKey: "my-secret-key", + awsSessionToken: "my-session-token", + } + } + ); + ``` Alternatively, if you are using AWS SSO, you can use the `AWS_PROFILE` and `AWS_DEFAULT_REGION` environment variables. -You can see a full list of environment variables [here](https://docs.rs/object_store/latest/object_store/aws/struct.AmazonS3Builder.html#method.from_env). +The following keys can be used as both environment variables or keys in the `storage_options` parameter: + +| Key | Description | +|------------------------------------|------------------------------------------------------------------------------------------------------| +| `aws_region` / `region` | The AWS region the bucket is in. This can be automatically detected when using AWS S3, but must be specified for S3-compatible stores. | +| `aws_access_key_id` / `access_key_id` | The AWS access key ID to use. | +| `aws_secret_access_key` / `secret_access_key` | The AWS secret access key to use. | +| `aws_session_token` / `session_token` | The AWS session token to use. | +| `aws_endpoint` / `endpoint` | The endpoint to use for S3-compatible stores. | +| `aws_virtual_hosted_style_request` / `virtual_hosted_style_request` | Whether to use virtual hosted-style requests, where the bucket name is part of the endpoint. Meant to be used with `aws_endpoint`. Default: `False`. | +| `aws_s3_express` / `s3_express` | Whether to use S3 Express One Zone endpoints. Default: `False`. See more details below. | +| `aws_server_side_encryption` | The server-side encryption algorithm to use. Must be one of `"AES256"`, `"aws:kms"`, or `"aws:kms:dsse"`. Default: `None`. | +| `aws_sse_kms_key_id` | The KMS key ID to use for server-side encryption. If set, `aws_server_side_encryption` must be `"aws:kms"` or `"aws:kms:dsse"`. | +| `aws_sse_bucket_key_enabled` | Whether to use bucket keys for server-side encryption. | + !!! tip "Automatic cleanup for failed writes" @@ -146,22 +267,174 @@ For **read-only access**, LanceDB will need a policy such as: #### S3-compatible stores -LanceDB can also connect to S3-compatible stores, such as MinIO. To do so, you must specify two environment variables: `AWS_ENDPOINT` and `AWS_DEFAULT_REGION`. `AWS_ENDPOINT` should be the URL of the S3-compatible store, and `AWS_DEFAULT_REGION` should be the region to use. +LanceDB can also connect to S3-compatible stores, such as MinIO. To do so, you must specify both region and endpoint: + +=== "Python" + + ```python + import lancedb + db = await lancedb.connect_async( + "s3://bucket/path", + storage_options={ + "region": "us-east-1", + "endpoint": "http://minio:9000", + } + ) + ``` + +=== "JavaScript" + + ```javascript + const lancedb = require("lancedb"); + const db = await lancedb.connect( + "s3://bucket/path", + { + storageOptions: { + region: "us-east-1", + endpoint: "http://minio:9000", + } + } + ); + ``` + +This can also be done with the ``AWS_ENDPOINT`` and ``AWS_DEFAULT_REGION`` environment variables. + +#### S3 Express + +LanceDB supports [S3 Express One Zone](https://aws.amazon.com/s3/storage-classes/express-one-zone/) endpoints, but requires additional configuration. Also, S3 Express endpoints only support connecting from an EC2 instance within the same region. + +To configure LanceDB to use an S3 Express endpoint, you must set the storage option `s3_express`. The bucket name in your table URI should **include the suffix**. + +=== "Python" + + ```python + import lancedb + db = await lancedb.connect_async( + "s3://my-bucket--use1-az4--x-s3/path", + storage_options={ + "region": "us-east-1", + "s3_express": "true", + } + ) + ``` + +=== "JavaScript" + + ```javascript + const lancedb = require("lancedb"); + const db = await lancedb.connect( + "s3://my-bucket--use1-az4--x-s3/path", + { + storageOptions: { + region: "us-east-1", + s3Express: "true", + } + } + ); + ``` - ### Google Cloud Storage -GCS credentials are configured by setting the `GOOGLE_SERVICE_ACCOUNT` environment variable to the path of a JSON file containing the service account credentials. There are several aliases for this environment variable, documented [here](https://docs.rs/object_store/latest/object_store/gcp/struct.GoogleCloudStorageBuilder.html#method.from_env). +GCS credentials are configured by setting the `GOOGLE_SERVICE_ACCOUNT` environment variable to the path of a JSON file containing the service account credentials. Alternatively, you can pass the path to the JSON file in the `storage_options`: + +=== "Python" + + + ```python + import lancedb + db = await lancedb.connect_async( + "gs://my-bucket/my-database", + storage_options={ + "service_account": "path/to/service-account.json", + } + ) + ``` + +=== "JavaScript" + + ```javascript + const lancedb = require("lancedb"); + const db = await lancedb.connect( + "gs://my-bucket/my-database", + { + storageOptions: { + serviceAccount: "path/to/service-account.json", + } + } + ); + ``` !!! info "HTTP/2 support" By default, GCS uses HTTP/1 for communication, as opposed to HTTP/2. This improves maximum throughput significantly. However, if you wish to use HTTP/2 for some reason, you can set the environment variable `HTTP1_ONLY` to `false`. + +The following keys can be used as both environment variables or keys in the `storage_options` parameter: + + +| Key | Description | +|---------------------------------------|----------------------------------------------| +| ``google_service_account`` / `service_account` | Path to the service account JSON file. | +| ``google_service_account_key`` | The serialized service account key. | +| ``google_application_credentials`` | Path to the application credentials. | + + ### Azure Blob Storage -Azure Blob Storage credentials can be configured by setting the `AZURE_STORAGE_ACCOUNT_NAME` and ``AZURE_STORAGE_ACCOUNT_KEY`` environment variables. The full list of environment variables that can be set are documented [here](https://docs.rs/object_store/latest/object_store/azure/struct.MicrosoftAzureBuilder.html#method.from_env). +Azure Blob Storage credentials can be configured by setting the `AZURE_STORAGE_ACCOUNT_NAME`and `AZURE_STORAGE_ACCOUNT_KEY` environment variables. Alternatively, you can pass the account name and key in the `storage_options` parameter: +=== "Python" + + + ```python + import lancedb + db = await lancedb.connect_async( + "az://my-container/my-database", + storage_options={ + account_name: "some-account", + account_key: "some-key", + } + ) + ``` + +=== "JavaScript" + + ```javascript + const lancedb = require("lancedb"); + const db = await lancedb.connect( + "az://my-container/my-database", + { + storageOptions: { + accountName: "some-account", + accountKey: "some-key", + } + } + ); + ``` + +These keys can be used as both environment variables or keys in the `storage_options` parameter: + + + +| Key | Description | +|---------------------------------------|--------------------------------------------------------------------------------------------------| +| ``azure_storage_account_name`` | The name of the azure storage account. | +| ``azure_storage_account_key`` | The serialized service account key. | +| ``azure_client_id`` | Service principal client id for authorizing requests. | +| ``azure_client_secret`` | Service principal client secret for authorizing requests. | +| ``azure_tenant_id`` | Tenant id used in oauth flows. | +| ``azure_storage_sas_key`` | Shared access signature. The signature is expected to be percent-encoded, much like they are provided in the azure storage explorer or azure portal. | +| ``azure_storage_token`` | Bearer token. | +| ``azure_storage_use_emulator`` | Use object store with azurite storage emulator. | +| ``azure_endpoint`` | Override the endpoint used to communicate with blob storage. | +| ``azure_use_fabric_endpoint`` | Use object store with url scheme account.dfs.fabric.microsoft.com. | +| ``azure_msi_endpoint`` | Endpoint to request a imds managed identity token. | +| ``azure_object_id`` | Object id for use with managed identity authentication. | +| ``azure_msi_resource_id`` | Msi resource id for use with managed identity authentication. | +| ``azure_federated_token_file`` | File containing token for Azure AD workload identity federation. | +| ``azure_use_azure_cli`` | Use azure cli for acquiring access token. | +| ``azure_disable_tagging`` | Disables tagging objects. This can be desirable if not supported by the backing store. | \ No newline at end of file diff --git a/docs/test/md_testing.py b/docs/test/md_testing.py index a3ff6f52..305e3668 100644 --- a/docs/test/md_testing.py +++ b/docs/test/md_testing.py @@ -1,5 +1,5 @@ import glob -from typing import Iterator +from typing import Iterator, List from pathlib import Path glob_string = "../src/**/*.md" @@ -50,11 +50,24 @@ def yield_lines(lines: Iterator[str], prefix: str, suffix: str): yield line[strip_length:] +def wrap_async(lines: List[str]) -> List[str]: + # Indent all the lines + lines = [" " + line for line in lines] + # Put all lines in `async def main():` + lines = ["async def main():\n"] + lines + # Put `import asyncio\n asyncio.run(main())` at the end + lines = lines + ["\n", "import asyncio\n", "asyncio.run(main())\n"] + return lines + + for file in filter(lambda file: file not in excluded_files, files): with open(file, "r") as f: lines = list(yield_lines(iter(f), "```", "```")) if len(lines) > 0: + if any("await" in line for line in lines): + lines = wrap_async(lines) + print(lines) out_path = ( Path(python_folder) diff --git a/node/src/index.ts b/node/src/index.ts index 72153d48..fe6656d4 100644 --- a/node/src/index.ts +++ b/node/src/index.ts @@ -78,12 +78,25 @@ export interface ConnectionOptions { /** User provided AWS crednetials. * * If not provided, LanceDB will use the default credentials provider chain. + * + * @deprecated Pass `aws_access_key_id`, `aws_secret_access_key`, and `aws_session_token` + * through `storageOptions` instead. */ awsCredentials?: AwsCredentials - /** AWS region to connect to. Default is {@link defaultAwsRegion}. */ + /** AWS region to connect to. Default is {@link defaultAwsRegion} + * + * @deprecated Pass `region` through `storageOptions` instead. + */ awsRegion?: string + /** + * User provided options for object storage. For example, S3 credentials or request timeouts. + * + * The various options are described at https://lancedb.github.io/lancedb/guides/storage/ + */ + storageOptions?: Record + /** * API key for the remote connections * @@ -176,7 +189,6 @@ export async function connect ( if (typeof arg === 'string') { opts = { uri: arg } } else { - // opts = { uri: arg.uri, awsCredentials = arg.awsCredentials } const keys = Object.keys(arg) if (keys.length === 1 && keys[0] === 'uri' && typeof arg.uri === 'string') { opts = { uri: arg.uri } @@ -198,12 +210,26 @@ export async function connect ( // Remote connection return new RemoteConnection(opts) } + + const storageOptions = opts.storageOptions ?? {}; + if (opts.awsCredentials?.accessKeyId !== undefined) { + storageOptions.aws_access_key_id = opts.awsCredentials.accessKeyId + } + if (opts.awsCredentials?.secretKey !== undefined) { + storageOptions.aws_secret_access_key = opts.awsCredentials.secretKey + } + if (opts.awsCredentials?.sessionToken !== undefined) { + storageOptions.aws_session_token = opts.awsCredentials.sessionToken + } + if (opts.awsRegion !== undefined) { + storageOptions.region = opts.awsRegion + } + // It's a pain to pass a record to Rust, so we convert it to an array of key-value pairs + const storageOptionsArr = Object.entries(storageOptions); + const db = await databaseNew( opts.uri, - opts.awsCredentials?.accessKeyId, - opts.awsCredentials?.secretKey, - opts.awsCredentials?.sessionToken, - opts.awsRegion, + storageOptionsArr, opts.readConsistencyInterval ) return new LocalConnection(db, opts) @@ -720,7 +746,6 @@ export class LocalConnection implements Connection { const tbl = await databaseOpenTable.call( this._db, name, - ...getAwsArgs(this._options()) ) if (embeddings !== undefined) { return new LocalTable(tbl, name, this._options(), embeddings) diff --git a/node/src/test/test.ts b/node/src/test/test.ts index d3c5d2aa..75d6351b 100644 --- a/node/src/test/test.ts +++ b/node/src/test/test.ts @@ -75,6 +75,19 @@ describe('LanceDB client', function () { assert.equal(con.uri, uri) }) + it('should accept custom storage options', async function () { + const uri = await createTestDB() + const storageOptions = { + region: 'us-west-2', + timeout: '30s' + }; + const con = await lancedb.connect({ + uri, + storageOptions + }) + assert.equal(con.uri, uri) + }) + it('should return the existing table names', async function () { const uri = await createTestDB() const con = await lancedb.connect(uri) diff --git a/nodejs/__test__/s3_integration.test.ts b/nodejs/__test__/s3_integration.test.ts new file mode 100644 index 00000000..fea98ebe --- /dev/null +++ b/nodejs/__test__/s3_integration.test.ts @@ -0,0 +1,219 @@ +// Copyright 2024 Lance Developers. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* eslint-disable @typescript-eslint/naming-convention */ + +import { connect } from "../dist"; +import { + CreateBucketCommand, + DeleteBucketCommand, + DeleteObjectCommand, + HeadObjectCommand, + ListObjectsV2Command, + S3Client, +} from "@aws-sdk/client-s3"; +import { + CreateKeyCommand, + ScheduleKeyDeletionCommand, + KMSClient, +} from "@aws-sdk/client-kms"; + +// Skip these tests unless the S3_TEST environment variable is set +const maybeDescribe = process.env.S3_TEST ? describe : describe.skip; + +// These are all keys that are accepted by storage_options +const CONFIG = { + allowHttp: "true", + awsAccessKeyId: "ACCESSKEY", + awsSecretAccessKey: "SECRETKEY", + awsEndpoint: "http://127.0.0.1:4566", + awsRegion: "us-east-1", +}; + +class S3Bucket { + name: string; + constructor(name: string) { + this.name = name; + } + + static s3Client() { + return new S3Client({ + region: CONFIG.awsRegion, + credentials: { + accessKeyId: CONFIG.awsAccessKeyId, + secretAccessKey: CONFIG.awsSecretAccessKey, + }, + endpoint: CONFIG.awsEndpoint, + }); + } + + public static async create(name: string): Promise { + const client = this.s3Client(); + // Delete the bucket if it already exists + try { + await this.deleteBucket(client, name); + } catch (e) { + // It's fine if the bucket doesn't exist + } + await client.send(new CreateBucketCommand({ Bucket: name })); + return new S3Bucket(name); + } + + public async delete() { + const client = S3Bucket.s3Client(); + await S3Bucket.deleteBucket(client, this.name); + } + + static async deleteBucket(client: S3Client, name: string) { + // Must delete all objects before we can delete the bucket + const objects = await client.send( + new ListObjectsV2Command({ Bucket: name }), + ); + if (objects.Contents) { + for (const object of objects.Contents) { + await client.send( + new DeleteObjectCommand({ Bucket: name, Key: object.Key }), + ); + } + } + + await client.send(new DeleteBucketCommand({ Bucket: name })); + } + + public async assertAllEncrypted(path: string, keyId: string) { + const client = S3Bucket.s3Client(); + const objects = await client.send( + new ListObjectsV2Command({ Bucket: this.name, Prefix: path }), + ); + if (objects.Contents) { + for (const object of objects.Contents) { + const metadata = await client.send( + new HeadObjectCommand({ Bucket: this.name, Key: object.Key }), + ); + expect(metadata.ServerSideEncryption).toBe("aws:kms"); + expect(metadata.SSEKMSKeyId).toContain(keyId); + } + } + } +} + +class KmsKey { + keyId: string; + constructor(keyId: string) { + this.keyId = keyId; + } + + static kmsClient() { + return new KMSClient({ + region: CONFIG.awsRegion, + credentials: { + accessKeyId: CONFIG.awsAccessKeyId, + secretAccessKey: CONFIG.awsSecretAccessKey, + }, + endpoint: CONFIG.awsEndpoint, + }); + } + + public static async create(): Promise { + const client = this.kmsClient(); + const key = await client.send(new CreateKeyCommand({})); + const keyId = key?.KeyMetadata?.KeyId; + if (!keyId) { + throw new Error("Failed to create KMS key"); + } + return new KmsKey(keyId); + } + + public async delete() { + const client = KmsKey.kmsClient(); + await client.send(new ScheduleKeyDeletionCommand({ KeyId: this.keyId })); + } +} + +maybeDescribe("storage_options", () => { + let bucket: S3Bucket; + let kmsKey: KmsKey; + beforeAll(async () => { + bucket = await S3Bucket.create("lancedb"); + kmsKey = await KmsKey.create(); + }); + afterAll(async () => { + await kmsKey.delete(); + await bucket.delete(); + }); + + it("can be used to configure auth and endpoints", async () => { + const uri = `s3://${bucket.name}/test`; + const db = await connect(uri, { storageOptions: CONFIG }); + + let table = await db.createTable("test", [{ a: 1, b: 2 }]); + + let rowCount = await table.countRows(); + expect(rowCount).toBe(1); + + let tableNames = await db.tableNames(); + expect(tableNames).toEqual(["test"]); + + table = await db.openTable("test"); + rowCount = await table.countRows(); + expect(rowCount).toBe(1); + + await table.add([ + { a: 2, b: 3 }, + { a: 3, b: 4 }, + ]); + rowCount = await table.countRows(); + expect(rowCount).toBe(3); + + await db.dropTable("test"); + + tableNames = await db.tableNames(); + expect(tableNames).toEqual([]); + }); + + it("can configure encryption at connection and table level", async () => { + const uri = `s3://${bucket.name}/test`; + let db = await connect(uri, { storageOptions: CONFIG }); + + let table = await db.createTable("table1", [{ a: 1, b: 2 }], { + storageOptions: { + awsServerSideEncryption: "aws:kms", + awsSseKmsKeyId: kmsKey.keyId, + }, + }); + + let rowCount = await table.countRows(); + expect(rowCount).toBe(1); + + await table.add([{ a: 2, b: 3 }]); + + await bucket.assertAllEncrypted("test/table1.lance", kmsKey.keyId); + + // Now with encryption settings at connection level + db = await connect(uri, { + storageOptions: { + ...CONFIG, + awsServerSideEncryption: "aws:kms", + awsSseKmsKeyId: kmsKey.keyId, + }, + }); + table = await db.createTable("table2", [{ a: 1, b: 2 }]); + rowCount = await table.countRows(); + expect(rowCount).toBe(1); + + await table.add([{ a: 2, b: 3 }]); + + await bucket.assertAllEncrypted("test/table2.lance", kmsKey.keyId); + }); +}); diff --git a/nodejs/lancedb/connection.ts b/nodejs/lancedb/connection.ts index 5a8f6f32..66502f35 100644 --- a/nodejs/lancedb/connection.ts +++ b/nodejs/lancedb/connection.ts @@ -13,10 +13,32 @@ // limitations under the License. import { fromTableToBuffer, makeArrowTable, makeEmptyTable } from "./arrow"; -import { Connection as LanceDbConnection } from "./native"; +import { ConnectionOptions, Connection as LanceDbConnection } from "./native"; import { Table } from "./table"; import { Table as ArrowTable, Schema } from "apache-arrow"; +/** + * Connect to a LanceDB instance at the given URI. + * + * Accpeted formats: + * + * - `/path/to/database` - local database + * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage + * - `db://host:port` - remote database (LanceDB cloud) + * @param {string} uri - The uri of the database. If the database uri starts + * with `db://` then it connects to a remote database. + * @see {@link ConnectionOptions} for more details on the URI format. + */ +export async function connect( + uri: string, + opts?: Partial, +): Promise { + opts = opts ?? {}; + opts.storageOptions = cleanseStorageOptions(opts.storageOptions); + const nativeConn = await LanceDbConnection.new(uri, opts); + return new Connection(nativeConn); +} + export interface CreateTableOptions { /** * The mode to use when creating the table. @@ -33,6 +55,28 @@ export interface CreateTableOptions { * then no error will be raised. */ existOk: boolean; + + /** + * Configuration for object storage. + * + * Options already set on the connection will be inherited by the table, + * but can be overridden here. + * + * The available options are described at https://lancedb.github.io/lancedb/guides/storage/ + */ + storageOptions?: Record; +} + +export interface OpenTableOptions { + /** + * Configuration for object storage. + * + * Options already set on the connection will be inherited by the table, + * but can be overridden here. + * + * The available options are described at https://lancedb.github.io/lancedb/guides/storage/ + */ + storageOptions?: Record; } export interface TableNamesOptions { @@ -109,8 +153,14 @@ export class Connection { * Open a table in the database. * @param {string} name - The name of the table */ - async openTable(name: string): Promise { - const innerTable = await this.inner.openTable(name); + async openTable( + name: string, + options?: Partial, + ): Promise
{ + const innerTable = await this.inner.openTable( + name, + cleanseStorageOptions(options?.storageOptions), + ); return new Table(innerTable); } @@ -139,7 +189,12 @@ export class Connection { table = makeArrowTable(data); } const buf = await fromTableToBuffer(table); - const innerTable = await this.inner.createTable(name, buf, mode); + const innerTable = await this.inner.createTable( + name, + buf, + mode, + cleanseStorageOptions(options?.storageOptions), + ); return new Table(innerTable); } @@ -162,7 +217,12 @@ export class Connection { const table = makeEmptyTable(schema); const buf = await fromTableToBuffer(table); - const innerTable = await this.inner.createEmptyTable(name, buf, mode); + const innerTable = await this.inner.createEmptyTable( + name, + buf, + mode, + cleanseStorageOptions(options?.storageOptions), + ); return new Table(innerTable); } @@ -174,3 +234,43 @@ export class Connection { return this.inner.dropTable(name); } } + +/** + * Takes storage options and makes all the keys snake case. + */ +function cleanseStorageOptions( + options?: Record, +): Record | undefined { + if (options === undefined) { + return undefined; + } + const result: Record = {}; + for (const [key, value] of Object.entries(options)) { + if (value !== undefined) { + const newKey = camelToSnakeCase(key); + result[newKey] = value; + } + } + return result; +} + +/** + * Convert a string to snake case. It might already be snake case, in which case it is + * returned unchanged. + */ +function camelToSnakeCase(camel: string): string { + if (camel.includes("_")) { + // Assume if there is at least one underscore, it is already snake case + return camel; + } + if (camel.toLocaleUpperCase() === camel) { + // Assume if the string is all uppercase, it is already snake case + return camel; + } + + let result = camel.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`); + if (result.startsWith("_")) { + result = result.slice(1); + } + return result; +} diff --git a/nodejs/lancedb/index.ts b/nodejs/lancedb/index.ts index 40bcc645..2cb7cf3f 100644 --- a/nodejs/lancedb/index.ts +++ b/nodejs/lancedb/index.ts @@ -12,12 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -import { Connection } from "./connection"; -import { - Connection as LanceDbConnection, - ConnectionOptions, -} from "./native.js"; - export { WriteOptions, WriteMode, @@ -32,6 +26,7 @@ export { VectorColumnOptions, } from "./arrow"; export { + connect, Connection, CreateTableOptions, TableNamesOptions, @@ -46,24 +41,3 @@ export { export { Index, IndexOptions, IvfPqOptions } from "./indices"; export { Table, AddDataOptions, IndexConfig, UpdateOptions } from "./table"; export * as embedding from "./embedding"; - -/** - * Connect to a LanceDB instance at the given URI. - * - * Accpeted formats: - * - * - `/path/to/database` - local database - * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage - * - `db://host:port` - remote database (LanceDB cloud) - * @param {string} uri - The uri of the database. If the database uri starts - * with `db://` then it connects to a remote database. - * @see {@link ConnectionOptions} for more details on the URI format. - */ -export async function connect( - uri: string, - opts?: Partial, -): Promise { - opts = opts ?? {}; - const nativeConn = await LanceDbConnection.new(uri, opts); - return new Connection(nativeConn); -} diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index 138dcb59..15e50e21 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -22,6 +22,8 @@ "openai": "^4.29.2" }, "devDependencies": { + "@aws-sdk/client-kms": "^3.33.0", + "@aws-sdk/client-s3": "^3.33.0", "@napi-rs/cli": "^2.18.0", "@types/jest": "^29.1.2", "@types/tmp": "^0.2.6", @@ -94,6 +96,906 @@ "node": ">=6.0.0" } }, + "node_modules/@aws-crypto/crc32": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-3.0.0.tgz", + "integrity": "sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==", + "dev": true, + "dependencies": { + "@aws-crypto/util": "^3.0.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^1.11.1" + } + }, + "node_modules/@aws-crypto/crc32/node_modules/tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "node_modules/@aws-crypto/crc32c": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/crc32c/-/crc32c-3.0.0.tgz", + "integrity": "sha512-ENNPPManmnVJ4BTXlOjAgD7URidbAznURqD0KvfREyc4o20DPYdEldU1f5cQ7Jbj0CJJSPaMIk/9ZshdB3210w==", + "dev": true, + "dependencies": { + "@aws-crypto/util": "^3.0.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^1.11.1" + } + }, + "node_modules/@aws-crypto/crc32c/node_modules/tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "node_modules/@aws-crypto/ie11-detection": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/ie11-detection/-/ie11-detection-3.0.0.tgz", + "integrity": "sha512-341lBBkiY1DfDNKai/wXM3aujNBkXR7tq1URPQDL9wi3AUbI80NR74uF1TXHMm7po1AcnFk8iu2S2IeU/+/A+Q==", + "dev": true, + "dependencies": { + "tslib": "^1.11.1" + } + }, + "node_modules/@aws-crypto/ie11-detection/node_modules/tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "node_modules/@aws-crypto/sha1-browser": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/sha1-browser/-/sha1-browser-3.0.0.tgz", + "integrity": "sha512-NJth5c997GLHs6nOYTzFKTbYdMNA6/1XlKVgnZoaZcQ7z7UJlOgj2JdbHE8tiYLS3fzXNCguct77SPGat2raSw==", + "dev": true, + "dependencies": { + "@aws-crypto/ie11-detection": "^3.0.0", + "@aws-crypto/supports-web-crypto": "^3.0.0", + "@aws-crypto/util": "^3.0.0", + "@aws-sdk/types": "^3.222.0", + "@aws-sdk/util-locate-window": "^3.0.0", + "@aws-sdk/util-utf8-browser": "^3.0.0", + "tslib": "^1.11.1" + } + }, + "node_modules/@aws-crypto/sha1-browser/node_modules/tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "node_modules/@aws-crypto/sha256-browser": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/sha256-browser/-/sha256-browser-3.0.0.tgz", + "integrity": "sha512-8VLmW2B+gjFbU5uMeqtQM6Nj0/F1bro80xQXCW6CQBWgosFWXTx77aeOF5CAIAmbOK64SdMBJdNr6J41yP5mvQ==", + "dev": true, + "dependencies": { + "@aws-crypto/ie11-detection": "^3.0.0", + "@aws-crypto/sha256-js": "^3.0.0", + "@aws-crypto/supports-web-crypto": "^3.0.0", + "@aws-crypto/util": "^3.0.0", + "@aws-sdk/types": "^3.222.0", + "@aws-sdk/util-locate-window": "^3.0.0", + "@aws-sdk/util-utf8-browser": "^3.0.0", + "tslib": "^1.11.1" + } + }, + "node_modules/@aws-crypto/sha256-browser/node_modules/tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "node_modules/@aws-crypto/sha256-js": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/sha256-js/-/sha256-js-3.0.0.tgz", + "integrity": "sha512-PnNN7os0+yd1XvXAy23CFOmTbMaDxgxXtTKHybrJ39Y8kGzBATgBFibWJKH6BhytLI/Zyszs87xCOBNyBig6vQ==", + "dev": true, + "dependencies": { + "@aws-crypto/util": "^3.0.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^1.11.1" + } + }, + "node_modules/@aws-crypto/sha256-js/node_modules/tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "node_modules/@aws-crypto/supports-web-crypto": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/supports-web-crypto/-/supports-web-crypto-3.0.0.tgz", + "integrity": "sha512-06hBdMwUAb2WFTuGG73LSC0wfPu93xWwo5vL2et9eymgmu3Id5vFAHBbajVWiGhPO37qcsdCap/FqXvJGJWPIg==", + "dev": true, + "dependencies": { + "tslib": "^1.11.1" + } + }, + "node_modules/@aws-crypto/supports-web-crypto/node_modules/tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "node_modules/@aws-crypto/util": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/util/-/util-3.0.0.tgz", + "integrity": "sha512-2OJlpeJpCR48CC8r+uKVChzs9Iungj9wkZrl8Z041DWEWvyIHILYKCPNzJghKsivj+S3mLo6BVc7mBNzdxA46w==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "^3.222.0", + "@aws-sdk/util-utf8-browser": "^3.0.0", + "tslib": "^1.11.1" + } + }, + "node_modules/@aws-crypto/util/node_modules/tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", + "dev": true + }, + "node_modules/@aws-sdk/client-kms": { + "version": "3.549.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-kms/-/client-kms-3.549.0.tgz", + "integrity": "sha512-sbeG8ss4bL2sz0/ZBsvAw4o3rGI1z7RY9+rTn+8z7dKFjMPPZ+4DDr9GBtToCRktGla9sCNxcdhDeXZkdKBR9g==", + "dev": true, + "dependencies": { + "@aws-crypto/sha256-browser": "3.0.0", + "@aws-crypto/sha256-js": "3.0.0", + "@aws-sdk/client-sts": "3.549.0", + "@aws-sdk/core": "3.549.0", + "@aws-sdk/credential-provider-node": "3.549.0", + "@aws-sdk/middleware-host-header": "3.535.0", + "@aws-sdk/middleware-logger": "3.535.0", + "@aws-sdk/middleware-recursion-detection": "3.535.0", + "@aws-sdk/middleware-user-agent": "3.540.0", + "@aws-sdk/region-config-resolver": "3.535.0", + "@aws-sdk/types": "3.535.0", + "@aws-sdk/util-endpoints": "3.540.0", + "@aws-sdk/util-user-agent-browser": "3.535.0", + "@aws-sdk/util-user-agent-node": "3.535.0", + "@smithy/config-resolver": "^2.2.0", + "@smithy/core": "^1.4.1", + "@smithy/fetch-http-handler": "^2.5.0", + "@smithy/hash-node": "^2.2.0", + "@smithy/invalid-dependency": "^2.2.0", + "@smithy/middleware-content-length": "^2.2.0", + "@smithy/middleware-endpoint": "^2.5.0", + "@smithy/middleware-retry": "^2.3.0", + "@smithy/middleware-serde": "^2.3.0", + "@smithy/middleware-stack": "^2.2.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/node-http-handler": "^2.5.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/smithy-client": "^2.5.0", + "@smithy/types": "^2.12.0", + "@smithy/url-parser": "^2.2.0", + "@smithy/util-base64": "^2.3.0", + "@smithy/util-body-length-browser": "^2.2.0", + "@smithy/util-body-length-node": "^2.3.0", + "@smithy/util-defaults-mode-browser": "^2.2.0", + "@smithy/util-defaults-mode-node": "^2.3.0", + "@smithy/util-endpoints": "^1.2.0", + "@smithy/util-middleware": "^2.2.0", + "@smithy/util-retry": "^2.2.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/client-s3": { + "version": "3.550.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-s3/-/client-s3-3.550.0.tgz", + "integrity": "sha512-45jjDQI0Q37PIteWhywhlExxYaiUeOsTsbE62b+U/FOjYV8tirC8uBY9eHeHaP4IPVGHeQWvEYrFJHNU+qsQLQ==", + "dev": true, + "dependencies": { + "@aws-crypto/sha1-browser": "3.0.0", + "@aws-crypto/sha256-browser": "3.0.0", + "@aws-crypto/sha256-js": "3.0.0", + "@aws-sdk/client-sts": "3.549.0", + "@aws-sdk/core": "3.549.0", + "@aws-sdk/credential-provider-node": "3.549.0", + "@aws-sdk/middleware-bucket-endpoint": "3.535.0", + "@aws-sdk/middleware-expect-continue": "3.535.0", + "@aws-sdk/middleware-flexible-checksums": "3.535.0", + "@aws-sdk/middleware-host-header": "3.535.0", + "@aws-sdk/middleware-location-constraint": "3.535.0", + "@aws-sdk/middleware-logger": "3.535.0", + "@aws-sdk/middleware-recursion-detection": "3.535.0", + "@aws-sdk/middleware-sdk-s3": "3.535.0", + "@aws-sdk/middleware-signing": "3.535.0", + "@aws-sdk/middleware-ssec": "3.537.0", + "@aws-sdk/middleware-user-agent": "3.540.0", + "@aws-sdk/region-config-resolver": "3.535.0", + "@aws-sdk/signature-v4-multi-region": "3.535.0", + "@aws-sdk/types": "3.535.0", + "@aws-sdk/util-endpoints": "3.540.0", + "@aws-sdk/util-user-agent-browser": "3.535.0", + "@aws-sdk/util-user-agent-node": "3.535.0", + "@aws-sdk/xml-builder": "3.535.0", + "@smithy/config-resolver": "^2.2.0", + "@smithy/core": "^1.4.1", + "@smithy/eventstream-serde-browser": "^2.2.0", + "@smithy/eventstream-serde-config-resolver": "^2.2.0", + "@smithy/eventstream-serde-node": "^2.2.0", + "@smithy/fetch-http-handler": "^2.5.0", + "@smithy/hash-blob-browser": "^2.2.0", + "@smithy/hash-node": "^2.2.0", + "@smithy/hash-stream-node": "^2.2.0", + "@smithy/invalid-dependency": "^2.2.0", + "@smithy/md5-js": "^2.2.0", + "@smithy/middleware-content-length": "^2.2.0", + "@smithy/middleware-endpoint": "^2.5.0", + "@smithy/middleware-retry": "^2.3.0", + "@smithy/middleware-serde": "^2.3.0", + "@smithy/middleware-stack": "^2.2.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/node-http-handler": "^2.5.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/smithy-client": "^2.5.0", + "@smithy/types": "^2.12.0", + "@smithy/url-parser": "^2.2.0", + "@smithy/util-base64": "^2.3.0", + "@smithy/util-body-length-browser": "^2.2.0", + "@smithy/util-body-length-node": "^2.3.0", + "@smithy/util-defaults-mode-browser": "^2.2.0", + "@smithy/util-defaults-mode-node": "^2.3.0", + "@smithy/util-endpoints": "^1.2.0", + "@smithy/util-retry": "^2.2.0", + "@smithy/util-stream": "^2.2.0", + "@smithy/util-utf8": "^2.3.0", + "@smithy/util-waiter": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/client-sso": { + "version": "3.549.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso/-/client-sso-3.549.0.tgz", + "integrity": "sha512-lz+yflOAj5Q263FlCsKpNqttaCb2NPh8jC76gVCqCt7TPxRDBYVaqg0OZYluDaETIDNJi4DwN2Azcck7ilwuPw==", + "dev": true, + "dependencies": { + "@aws-crypto/sha256-browser": "3.0.0", + "@aws-crypto/sha256-js": "3.0.0", + "@aws-sdk/core": "3.549.0", + "@aws-sdk/middleware-host-header": "3.535.0", + "@aws-sdk/middleware-logger": "3.535.0", + "@aws-sdk/middleware-recursion-detection": "3.535.0", + "@aws-sdk/middleware-user-agent": "3.540.0", + "@aws-sdk/region-config-resolver": "3.535.0", + "@aws-sdk/types": "3.535.0", + "@aws-sdk/util-endpoints": "3.540.0", + "@aws-sdk/util-user-agent-browser": "3.535.0", + "@aws-sdk/util-user-agent-node": "3.535.0", + "@smithy/config-resolver": "^2.2.0", + "@smithy/core": "^1.4.1", + "@smithy/fetch-http-handler": "^2.5.0", + "@smithy/hash-node": "^2.2.0", + "@smithy/invalid-dependency": "^2.2.0", + "@smithy/middleware-content-length": "^2.2.0", + "@smithy/middleware-endpoint": "^2.5.0", + "@smithy/middleware-retry": "^2.3.0", + "@smithy/middleware-serde": "^2.3.0", + "@smithy/middleware-stack": "^2.2.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/node-http-handler": "^2.5.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/smithy-client": "^2.5.0", + "@smithy/types": "^2.12.0", + "@smithy/url-parser": "^2.2.0", + "@smithy/util-base64": "^2.3.0", + "@smithy/util-body-length-browser": "^2.2.0", + "@smithy/util-body-length-node": "^2.3.0", + "@smithy/util-defaults-mode-browser": "^2.2.0", + "@smithy/util-defaults-mode-node": "^2.3.0", + "@smithy/util-endpoints": "^1.2.0", + "@smithy/util-middleware": "^2.2.0", + "@smithy/util-retry": "^2.2.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/client-sso-oidc": { + "version": "3.549.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.549.0.tgz", + "integrity": "sha512-FbB4A78ILAb8sM4TfBd+3CrQcfZIhe0gtVZNbaxpq5cJZh1K7oZ8vPfKw4do9JWkDUXPLsD9Bwz12f8/JpAb6Q==", + "dev": true, + "dependencies": { + "@aws-crypto/sha256-browser": "3.0.0", + "@aws-crypto/sha256-js": "3.0.0", + "@aws-sdk/client-sts": "3.549.0", + "@aws-sdk/core": "3.549.0", + "@aws-sdk/middleware-host-header": "3.535.0", + "@aws-sdk/middleware-logger": "3.535.0", + "@aws-sdk/middleware-recursion-detection": "3.535.0", + "@aws-sdk/middleware-user-agent": "3.540.0", + "@aws-sdk/region-config-resolver": "3.535.0", + "@aws-sdk/types": "3.535.0", + "@aws-sdk/util-endpoints": "3.540.0", + "@aws-sdk/util-user-agent-browser": "3.535.0", + "@aws-sdk/util-user-agent-node": "3.535.0", + "@smithy/config-resolver": "^2.2.0", + "@smithy/core": "^1.4.1", + "@smithy/fetch-http-handler": "^2.5.0", + "@smithy/hash-node": "^2.2.0", + "@smithy/invalid-dependency": "^2.2.0", + "@smithy/middleware-content-length": "^2.2.0", + "@smithy/middleware-endpoint": "^2.5.0", + "@smithy/middleware-retry": "^2.3.0", + "@smithy/middleware-serde": "^2.3.0", + "@smithy/middleware-stack": "^2.2.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/node-http-handler": "^2.5.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/smithy-client": "^2.5.0", + "@smithy/types": "^2.12.0", + "@smithy/url-parser": "^2.2.0", + "@smithy/util-base64": "^2.3.0", + "@smithy/util-body-length-browser": "^2.2.0", + "@smithy/util-body-length-node": "^2.3.0", + "@smithy/util-defaults-mode-browser": "^2.2.0", + "@smithy/util-defaults-mode-node": "^2.3.0", + "@smithy/util-endpoints": "^1.2.0", + "@smithy/util-middleware": "^2.2.0", + "@smithy/util-retry": "^2.2.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "@aws-sdk/credential-provider-node": "^3.549.0" + } + }, + "node_modules/@aws-sdk/client-sts": { + "version": "3.549.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.549.0.tgz", + "integrity": "sha512-63IreJ598Dzvpb+6sy81KfIX5iQxnrWSEtlyeCdC2GO6gmSQVwJzc9kr5pAC83lHmlZcm/Q3KZr3XBhRQqP0og==", + "dev": true, + "dependencies": { + "@aws-crypto/sha256-browser": "3.0.0", + "@aws-crypto/sha256-js": "3.0.0", + "@aws-sdk/core": "3.549.0", + "@aws-sdk/middleware-host-header": "3.535.0", + "@aws-sdk/middleware-logger": "3.535.0", + "@aws-sdk/middleware-recursion-detection": "3.535.0", + "@aws-sdk/middleware-user-agent": "3.540.0", + "@aws-sdk/region-config-resolver": "3.535.0", + "@aws-sdk/types": "3.535.0", + "@aws-sdk/util-endpoints": "3.540.0", + "@aws-sdk/util-user-agent-browser": "3.535.0", + "@aws-sdk/util-user-agent-node": "3.535.0", + "@smithy/config-resolver": "^2.2.0", + "@smithy/core": "^1.4.1", + "@smithy/fetch-http-handler": "^2.5.0", + "@smithy/hash-node": "^2.2.0", + "@smithy/invalid-dependency": "^2.2.0", + "@smithy/middleware-content-length": "^2.2.0", + "@smithy/middleware-endpoint": "^2.5.0", + "@smithy/middleware-retry": "^2.3.0", + "@smithy/middleware-serde": "^2.3.0", + "@smithy/middleware-stack": "^2.2.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/node-http-handler": "^2.5.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/smithy-client": "^2.5.0", + "@smithy/types": "^2.12.0", + "@smithy/url-parser": "^2.2.0", + "@smithy/util-base64": "^2.3.0", + "@smithy/util-body-length-browser": "^2.2.0", + "@smithy/util-body-length-node": "^2.3.0", + "@smithy/util-defaults-mode-browser": "^2.2.0", + "@smithy/util-defaults-mode-node": "^2.3.0", + "@smithy/util-endpoints": "^1.2.0", + "@smithy/util-middleware": "^2.2.0", + "@smithy/util-retry": "^2.2.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "@aws-sdk/credential-provider-node": "^3.549.0" + } + }, + "node_modules/@aws-sdk/core": { + "version": "3.549.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.549.0.tgz", + "integrity": "sha512-jC61OxJn72r/BbuDRCcluiw05Xw9eVLG0CwxQpF3RocxfxyZqlrGYaGecZ8Wy+7g/3sqGRC/Ar5eUhU1YcLx7w==", + "dev": true, + "dependencies": { + "@smithy/core": "^1.4.1", + "@smithy/protocol-http": "^3.3.0", + "@smithy/signature-v4": "^2.2.0", + "@smithy/smithy-client": "^2.5.0", + "@smithy/types": "^2.12.0", + "fast-xml-parser": "4.2.5", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-env": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.535.0.tgz", + "integrity": "sha512-XppwO8c0GCGSAvdzyJOhbtktSEaShg14VJKg8mpMa1XcgqzmcqqHQjtDWbx5rZheY1VdpXZhpEzJkB6LpQejpA==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-http": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-http/-/credential-provider-http-3.535.0.tgz", + "integrity": "sha512-kdj1wCmOMZ29jSlUskRqN04S6fJ4dvt0Nq9Z32SA6wO7UG8ht6Ot9h/au/eTWJM3E1somZ7D771oK7dQt9b8yw==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/fetch-http-handler": "^2.5.0", + "@smithy/node-http-handler": "^2.5.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/smithy-client": "^2.5.0", + "@smithy/types": "^2.12.0", + "@smithy/util-stream": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-ini": { + "version": "3.549.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.549.0.tgz", + "integrity": "sha512-k6IIrluZjQpzui5Din8fW3bFFhHaJ64XrsfYx0Ks1mb7xan84dJxmYP3tdDDmLzUeJv5h95ag88taHfjY9rakA==", + "dev": true, + "dependencies": { + "@aws-sdk/client-sts": "3.549.0", + "@aws-sdk/credential-provider-env": "3.535.0", + "@aws-sdk/credential-provider-process": "3.535.0", + "@aws-sdk/credential-provider-sso": "3.549.0", + "@aws-sdk/credential-provider-web-identity": "3.549.0", + "@aws-sdk/types": "3.535.0", + "@smithy/credential-provider-imds": "^2.3.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/shared-ini-file-loader": "^2.4.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-node": { + "version": "3.549.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.549.0.tgz", + "integrity": "sha512-f3YgalsMuywEAVX4AUm9tojqrBdfpAac0+D320ePzas0Ntbp7ItYu9ceKIhgfzXO3No7P3QK0rCrOxL+ABTn8Q==", + "dev": true, + "dependencies": { + "@aws-sdk/credential-provider-env": "3.535.0", + "@aws-sdk/credential-provider-http": "3.535.0", + "@aws-sdk/credential-provider-ini": "3.549.0", + "@aws-sdk/credential-provider-process": "3.535.0", + "@aws-sdk/credential-provider-sso": "3.549.0", + "@aws-sdk/credential-provider-web-identity": "3.549.0", + "@aws-sdk/types": "3.535.0", + "@smithy/credential-provider-imds": "^2.3.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/shared-ini-file-loader": "^2.4.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-process": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.535.0.tgz", + "integrity": "sha512-9O1OaprGCnlb/kYl8RwmH7Mlg8JREZctB8r9sa1KhSsWFq/SWO0AuJTyowxD7zL5PkeS4eTvzFFHWCa3OO5epA==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/shared-ini-file-loader": "^2.4.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-sso": { + "version": "3.549.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.549.0.tgz", + "integrity": "sha512-BGopRKHs7W8zkoH8qmSHrjudj263kXbhVkAUPxVUz0I28+CZNBgJC/RfVCbOpzmysIQEpwSqvOv1y0k+DQzIJQ==", + "dev": true, + "dependencies": { + "@aws-sdk/client-sso": "3.549.0", + "@aws-sdk/token-providers": "3.549.0", + "@aws-sdk/types": "3.535.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/shared-ini-file-loader": "^2.4.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-web-identity": { + "version": "3.549.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.549.0.tgz", + "integrity": "sha512-QzclVXPxuwSI7515l34sdvliVq5leroO8P7RQFKRgfyQKO45o1psghierwG3PgV6jlMiv78FIAGJBr/n4qZ7YA==", + "dev": true, + "dependencies": { + "@aws-sdk/client-sts": "3.549.0", + "@aws-sdk/types": "3.535.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-bucket-endpoint": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-bucket-endpoint/-/middleware-bucket-endpoint-3.535.0.tgz", + "integrity": "sha512-7sijlfQsc4UO9Fsl11mU26Y5f9E7g6UoNg/iJUBpC5pgvvmdBRO5UEhbB/gnqvOEPsBXyhmfzbstebq23Qdz7A==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@aws-sdk/util-arn-parser": "3.535.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/types": "^2.12.0", + "@smithy/util-config-provider": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-expect-continue": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-expect-continue/-/middleware-expect-continue-3.535.0.tgz", + "integrity": "sha512-hFKyqUBky0NWCVku8iZ9+PACehx0p6vuMw5YnZf8FVgHP0fode0b/NwQY6UY7oor/GftvRsAlRUAWGNFEGUpwA==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-flexible-checksums": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-flexible-checksums/-/middleware-flexible-checksums-3.535.0.tgz", + "integrity": "sha512-rBIzldY9jjRATxICDX7t77aW6ctqmVDgnuAOgbVT5xgHftt4o7PGWKoMvl/45hYqoQgxVFnCBof9bxkqSBebVA==", + "dev": true, + "dependencies": { + "@aws-crypto/crc32": "3.0.0", + "@aws-crypto/crc32c": "3.0.0", + "@aws-sdk/types": "3.535.0", + "@smithy/is-array-buffer": "^2.2.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/types": "^2.12.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-host-header": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-host-header/-/middleware-host-header-3.535.0.tgz", + "integrity": "sha512-0h6TWjBWtDaYwHMQJI9ulafeS4lLaw1vIxRjbpH0svFRt6Eve+Sy8NlVhECfTU2hNz/fLubvrUxsXoThaLBIew==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-location-constraint": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-location-constraint/-/middleware-location-constraint-3.535.0.tgz", + "integrity": "sha512-SxfS9wfidUZZ+WnlKRTCRn3h+XTsymXRXPJj8VV6hNRNeOwzNweoG3YhQbTowuuNfXf89m9v6meYkBBtkdacKw==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-logger": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-logger/-/middleware-logger-3.535.0.tgz", + "integrity": "sha512-huNHpONOrEDrdRTvSQr1cJiRMNf0S52NDXtaPzdxiubTkP+vni2MohmZANMOai/qT0olmEVX01LhZ0ZAOgmg6A==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-recursion-detection": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-recursion-detection/-/middleware-recursion-detection-3.535.0.tgz", + "integrity": "sha512-am2qgGs+gwqmR4wHLWpzlZ8PWhm4ktj5bYSgDrsOfjhdBlWNxvPoID9/pDAz5RWL48+oH7I6SQzMqxXsFDikrw==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-sdk-s3": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-sdk-s3/-/middleware-sdk-s3-3.535.0.tgz", + "integrity": "sha512-/dLG/E3af6ohxkQ5GBHT8tZfuPIg6eItKxCXuulvYj0Tqgf3Mb+xTsvSkxQsJF06RS4sH7Qsg/PnB8ZfrJrXpg==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@aws-sdk/util-arn-parser": "3.535.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/signature-v4": "^2.2.0", + "@smithy/smithy-client": "^2.5.0", + "@smithy/types": "^2.12.0", + "@smithy/util-config-provider": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-signing": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-signing/-/middleware-signing-3.535.0.tgz", + "integrity": "sha512-Rb4sfus1Gc5paRl9JJgymJGsb/i3gJKK/rTuFZICdd1PBBE5osIOHP5CpzWYBtc5LlyZE1a2QoxPMCyG+QUGPw==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/signature-v4": "^2.2.0", + "@smithy/types": "^2.12.0", + "@smithy/util-middleware": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-ssec": { + "version": "3.537.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-ssec/-/middleware-ssec-3.537.0.tgz", + "integrity": "sha512-2QWMrbwd5eBy5KCYn9a15JEWBgrK2qFEKQN2lqb/6z0bhtevIOxIRfC99tzvRuPt6nixFQ+ynKuBjcfT4ZFrdQ==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/middleware-user-agent": { + "version": "3.540.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.540.0.tgz", + "integrity": "sha512-8Rd6wPeXDnOYzWj1XCmOKcx/Q87L0K1/EHqOBocGjLVbN3gmRxBvpmR1pRTjf7IsWfnnzN5btqtcAkfDPYQUMQ==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@aws-sdk/util-endpoints": "3.540.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/region-config-resolver": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/region-config-resolver/-/region-config-resolver-3.535.0.tgz", + "integrity": "sha512-IXOznDiaItBjsQy4Fil0kzX/J3HxIOknEphqHbOfUf+LpA5ugcsxuQQONrbEQusCBnfJyymrldBvBhFmtlU9Wg==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/types": "^2.12.0", + "@smithy/util-config-provider": "^2.3.0", + "@smithy/util-middleware": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4-multi-region": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/signature-v4-multi-region/-/signature-v4-multi-region-3.535.0.tgz", + "integrity": "sha512-tqCsEsEj8icW0SAh3NvyhRUq54Gz2pu4NM2tOSrFp7SO55heUUaRLSzYteNZCTOupH//AAaZvbN/UUTO/DrOog==", + "dev": true, + "dependencies": { + "@aws-sdk/middleware-sdk-s3": "3.535.0", + "@aws-sdk/types": "3.535.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/signature-v4": "^2.2.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/token-providers": { + "version": "3.549.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.549.0.tgz", + "integrity": "sha512-rJyeXkXknLukRFGuMQOgKnPBa+kLODJtOqEBf929SpQ96f1I6ytdndmWbB5B/OQN5Fu5DOOQUQqJypDQVl5ibQ==", + "dev": true, + "dependencies": { + "@aws-sdk/client-sso-oidc": "3.549.0", + "@aws-sdk/types": "3.535.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/shared-ini-file-loader": "^2.4.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/types": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/types/-/types-3.535.0.tgz", + "integrity": "sha512-aY4MYfduNj+sRR37U7XxYR8wemfbKP6lx00ze2M2uubn7mZotuVrWYAafbMSXrdEMSToE5JDhr28vArSOoLcSg==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/util-arn-parser": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-arn-parser/-/util-arn-parser-3.535.0.tgz", + "integrity": "sha512-smVo29nUPAOprp8Z5Y3GHuhiOtw6c8/EtLCm5AVMtRsTPw4V414ZXL2H66tzmb5kEeSzQlbfBSBEdIFZoxO9kg==", + "dev": true, + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/util-endpoints": { + "version": "3.540.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-endpoints/-/util-endpoints-3.540.0.tgz", + "integrity": "sha512-1kMyQFAWx6f8alaI6UT65/5YW/7pDWAKAdNwL6vuJLea03KrZRX3PMoONOSJpAS5m3Ot7HlWZvf3wZDNTLELZw==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/types": "^2.12.0", + "@smithy/util-endpoints": "^1.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/util-locate-window": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-locate-window/-/util-locate-window-3.535.0.tgz", + "integrity": "sha512-PHJ3SL6d2jpcgbqdgiPxkXpu7Drc2PYViwxSIqvvMKhDwzSB1W3mMvtpzwKM4IE7zLFodZo0GKjJ9AsoXndXhA==", + "dev": true, + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/util-user-agent-browser": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-browser/-/util-user-agent-browser-3.535.0.tgz", + "integrity": "sha512-RWMcF/xV5n+nhaA/Ff5P3yNP3Kur/I+VNZngog4TEs92oB/nwOdAg/2JL8bVAhUbMrjTjpwm7PItziYFQoqyig==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/types": "^2.12.0", + "bowser": "^2.11.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-sdk/util-user-agent-node": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.535.0.tgz", + "integrity": "sha512-dRek0zUuIT25wOWJlsRm97nTkUlh1NDcLsQZIN2Y8KxhwoXXWtJs5vaDPT+qAg+OpcNj80i1zLR/CirqlFg/TQ==", + "dev": true, + "dependencies": { + "@aws-sdk/types": "3.535.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "aws-crt": ">=1.0.0" + }, + "peerDependenciesMeta": { + "aws-crt": { + "optional": true + } + } + }, + "node_modules/@aws-sdk/util-utf8-browser": { + "version": "3.259.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-utf8-browser/-/util-utf8-browser-3.259.0.tgz", + "integrity": "sha512-UvFa/vR+e19XookZF8RzFZBrw2EUkQWxiBW0yYQAhvk3C+QVGl0H3ouca8LDBlBfQKXwmW3huo/59H8rwb1wJw==", + "dev": true, + "dependencies": { + "tslib": "^2.3.1" + } + }, + "node_modules/@aws-sdk/xml-builder": { + "version": "3.535.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/xml-builder/-/xml-builder-3.535.0.tgz", + "integrity": "sha512-VXAq/Jz8KIrU84+HqsOJhIKZqG0PNTdi6n6PFQ4xJf44ZQHD/5C7ouH4qCFX5XgZXcgbRIcMVVYGC6Jye0dRng==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/@babel/code-frame": { "version": "7.23.5", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.23.5.tgz", @@ -1469,6 +2371,693 @@ "@sinonjs/commons": "^3.0.0" } }, + "node_modules/@smithy/abort-controller": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/abort-controller/-/abort-controller-2.2.0.tgz", + "integrity": "sha512-wRlta7GuLWpTqtFfGo+nZyOO1vEvewdNR1R4rTxpC8XU6vG/NDyrFBhwLZsqg1NUoR1noVaXJPC/7ZK47QCySw==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/chunked-blob-reader": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/chunked-blob-reader/-/chunked-blob-reader-2.2.0.tgz", + "integrity": "sha512-3GJNvRwXBGdkDZZOGiziVYzDpn4j6zfyULHMDKAGIUo72yHALpE9CbhfQp/XcLNVoc1byfMpn6uW5H2BqPjgaQ==", + "dev": true, + "dependencies": { + "tslib": "^2.6.2" + } + }, + "node_modules/@smithy/chunked-blob-reader-native": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/chunked-blob-reader-native/-/chunked-blob-reader-native-2.2.0.tgz", + "integrity": "sha512-VNB5+1oCgX3Fzs072yuRsUoC2N4Zg/LJ11DTxX3+Qu+Paa6AmbIF0E9sc2wthz9Psrk/zcOlTCyuposlIhPjZQ==", + "dev": true, + "dependencies": { + "@smithy/util-base64": "^2.3.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@smithy/config-resolver": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/config-resolver/-/config-resolver-2.2.0.tgz", + "integrity": "sha512-fsiMgd8toyUba6n1WRmr+qACzXltpdDkPTAaDqc8QqPBUzO+/JKwL6bUBseHVi8tu9l+3JOK+tSf7cay+4B3LA==", + "dev": true, + "dependencies": { + "@smithy/node-config-provider": "^2.3.0", + "@smithy/types": "^2.12.0", + "@smithy/util-config-provider": "^2.3.0", + "@smithy/util-middleware": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/core": { + "version": "1.4.2", + "resolved": "https://registry.npmjs.org/@smithy/core/-/core-1.4.2.tgz", + "integrity": "sha512-2fek3I0KZHWJlRLvRTqxTEri+qV0GRHrJIoLFuBMZB4EMg4WgeBGfF0X6abnrNYpq55KJ6R4D6x4f0vLnhzinA==", + "dev": true, + "dependencies": { + "@smithy/middleware-endpoint": "^2.5.1", + "@smithy/middleware-retry": "^2.3.1", + "@smithy/middleware-serde": "^2.3.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/smithy-client": "^2.5.1", + "@smithy/types": "^2.12.0", + "@smithy/util-middleware": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/credential-provider-imds": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/credential-provider-imds/-/credential-provider-imds-2.3.0.tgz", + "integrity": "sha512-BWB9mIukO1wjEOo1Ojgl6LrG4avcaC7T/ZP6ptmAaW4xluhSIPZhY+/PI5YKzlk+jsm+4sQZB45Bt1OfMeQa3w==", + "dev": true, + "dependencies": { + "@smithy/node-config-provider": "^2.3.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/types": "^2.12.0", + "@smithy/url-parser": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/eventstream-codec": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/eventstream-codec/-/eventstream-codec-2.2.0.tgz", + "integrity": "sha512-8janZoJw85nJmQZc4L8TuePp2pk1nxLgkxIR0TUjKJ5Dkj5oelB9WtiSSGXCQvNsJl0VSTvK/2ueMXxvpa9GVw==", + "dev": true, + "dependencies": { + "@aws-crypto/crc32": "3.0.0", + "@smithy/types": "^2.12.0", + "@smithy/util-hex-encoding": "^2.2.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@smithy/eventstream-serde-browser": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/eventstream-serde-browser/-/eventstream-serde-browser-2.2.0.tgz", + "integrity": "sha512-UaPf8jKbcP71BGiO0CdeLmlg+RhWnlN8ipsMSdwvqBFigl5nil3rHOI/5GE3tfiuX8LvY5Z9N0meuU7Rab7jWw==", + "dev": true, + "dependencies": { + "@smithy/eventstream-serde-universal": "^2.2.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-config-resolver": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/eventstream-serde-config-resolver/-/eventstream-serde-config-resolver-2.2.0.tgz", + "integrity": "sha512-RHhbTw/JW3+r8QQH7PrganjNCiuiEZmpi6fYUAetFfPLfZ6EkiA08uN3EFfcyKubXQxOwTeJRZSQmDDCdUshaA==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-node": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/eventstream-serde-node/-/eventstream-serde-node-2.2.0.tgz", + "integrity": "sha512-zpQMtJVqCUMn+pCSFcl9K/RPNtQE0NuMh8sKpCdEHafhwRsjP50Oq/4kMmvxSRy6d8Jslqd8BLvDngrUtmN9iA==", + "dev": true, + "dependencies": { + "@smithy/eventstream-serde-universal": "^2.2.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/eventstream-serde-universal": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/eventstream-serde-universal/-/eventstream-serde-universal-2.2.0.tgz", + "integrity": "sha512-pvoe/vvJY0mOpuF84BEtyZoYfbehiFj8KKWk1ds2AT0mTLYFVs+7sBJZmioOFdBXKd48lfrx1vumdPdmGlCLxA==", + "dev": true, + "dependencies": { + "@smithy/eventstream-codec": "^2.2.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/fetch-http-handler": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/@smithy/fetch-http-handler/-/fetch-http-handler-2.5.0.tgz", + "integrity": "sha512-BOWEBeppWhLn/no/JxUL/ghTfANTjT7kg3Ww2rPqTUY9R4yHPXxJ9JhMe3Z03LN3aPwiwlpDIUcVw1xDyHqEhw==", + "dev": true, + "dependencies": { + "@smithy/protocol-http": "^3.3.0", + "@smithy/querystring-builder": "^2.2.0", + "@smithy/types": "^2.12.0", + "@smithy/util-base64": "^2.3.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@smithy/hash-blob-browser": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/hash-blob-browser/-/hash-blob-browser-2.2.0.tgz", + "integrity": "sha512-SGPoVH8mdXBqrkVCJ1Hd1X7vh1zDXojNN1yZyZTZsCno99hVue9+IYzWDjq/EQDDXxmITB0gBmuyPh8oAZSTcg==", + "dev": true, + "dependencies": { + "@smithy/chunked-blob-reader": "^2.2.0", + "@smithy/chunked-blob-reader-native": "^2.2.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@smithy/hash-node": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/hash-node/-/hash-node-2.2.0.tgz", + "integrity": "sha512-zLWaC/5aWpMrHKpoDF6nqpNtBhlAYKF/7+9yMN7GpdR8CzohnWfGtMznPybnwSS8saaXBMxIGwJqR4HmRp6b3g==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "@smithy/util-buffer-from": "^2.2.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/hash-stream-node": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/hash-stream-node/-/hash-stream-node-2.2.0.tgz", + "integrity": "sha512-aT+HCATOSRMGpPI7bi7NSsTNVZE/La9IaxLXWoVAYMxHT5hGO3ZOGEMZQg8A6nNL+pdFGtZQtND1eoY084HgHQ==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/invalid-dependency": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/invalid-dependency/-/invalid-dependency-2.2.0.tgz", + "integrity": "sha512-nEDASdbKFKPXN2O6lOlTgrEEOO9NHIeO+HVvZnkqc8h5U9g3BIhWsvzFo+UcUbliMHvKNPD/zVxDrkP1Sbgp8Q==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@smithy/is-array-buffer": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-2.2.0.tgz", + "integrity": "sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==", + "dev": true, + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/md5-js": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/md5-js/-/md5-js-2.2.0.tgz", + "integrity": "sha512-M26XTtt9IIusVMOWEAhIvFIr9jYj4ISPPGJROqw6vXngO3IYJCnVVSMFn4Tx1rUTG5BiKJNg9u2nxmBiZC5IlQ==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@smithy/middleware-content-length": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/middleware-content-length/-/middleware-content-length-2.2.0.tgz", + "integrity": "sha512-5bl2LG1Ah/7E5cMSC+q+h3IpVHMeOkG0yLRyQT1p2aMJkSrZG7RlXHPuAgb7EyaFeidKEnnd/fNaLLaKlHGzDQ==", + "dev": true, + "dependencies": { + "@smithy/protocol-http": "^3.3.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/middleware-endpoint": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@smithy/middleware-endpoint/-/middleware-endpoint-2.5.1.tgz", + "integrity": "sha512-1/8kFp6Fl4OsSIVTWHnNjLnTL8IqpIb/D3sTSczrKFnrE9VMNWxnrRKNvpUHOJ6zpGD5f62TPm7+17ilTJpiCQ==", + "dev": true, + "dependencies": { + "@smithy/middleware-serde": "^2.3.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/shared-ini-file-loader": "^2.4.0", + "@smithy/types": "^2.12.0", + "@smithy/url-parser": "^2.2.0", + "@smithy/util-middleware": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/middleware-retry": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/@smithy/middleware-retry/-/middleware-retry-2.3.1.tgz", + "integrity": "sha512-P2bGufFpFdYcWvqpyqqmalRtwFUNUA8vHjJR5iGqbfR6mp65qKOLcUd6lTr4S9Gn/enynSrSf3p3FVgVAf6bXA==", + "dev": true, + "dependencies": { + "@smithy/node-config-provider": "^2.3.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/service-error-classification": "^2.1.5", + "@smithy/smithy-client": "^2.5.1", + "@smithy/types": "^2.12.0", + "@smithy/util-middleware": "^2.2.0", + "@smithy/util-retry": "^2.2.0", + "tslib": "^2.6.2", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/middleware-serde": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/middleware-serde/-/middleware-serde-2.3.0.tgz", + "integrity": "sha512-sIADe7ojwqTyvEQBe1nc/GXB9wdHhi9UwyX0lTyttmUWDJLP655ZYE1WngnNyXREme8I27KCaUhyhZWRXL0q7Q==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/middleware-stack": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/middleware-stack/-/middleware-stack-2.2.0.tgz", + "integrity": "sha512-Qntc3jrtwwrsAC+X8wms8zhrTr0sFXnyEGhZd9sLtsJ/6gGQKFzNB+wWbOcpJd7BR8ThNCoKt76BuQahfMvpeA==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/node-config-provider": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/node-config-provider/-/node-config-provider-2.3.0.tgz", + "integrity": "sha512-0elK5/03a1JPWMDPaS726Iw6LpQg80gFut1tNpPfxFuChEEklo2yL823V94SpTZTxmKlXFtFgsP55uh3dErnIg==", + "dev": true, + "dependencies": { + "@smithy/property-provider": "^2.2.0", + "@smithy/shared-ini-file-loader": "^2.4.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/node-http-handler": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/@smithy/node-http-handler/-/node-http-handler-2.5.0.tgz", + "integrity": "sha512-mVGyPBzkkGQsPoxQUbxlEfRjrj6FPyA3u3u2VXGr9hT8wilsoQdZdvKpMBFMB8Crfhv5dNkKHIW0Yyuc7eABqA==", + "dev": true, + "dependencies": { + "@smithy/abort-controller": "^2.2.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/querystring-builder": "^2.2.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/property-provider": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/property-provider/-/property-provider-2.2.0.tgz", + "integrity": "sha512-+xiil2lFhtTRzXkx8F053AV46QnIw6e7MV8od5Mi68E1ICOjCeCHw2XfLnDEUHnT9WGUIkwcqavXjfwuJbGlpg==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/protocol-http": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/@smithy/protocol-http/-/protocol-http-3.3.0.tgz", + "integrity": "sha512-Xy5XK1AFWW2nlY/biWZXu6/krgbaf2dg0q492D8M5qthsnU2H+UgFeZLbM76FnH7s6RO/xhQRkj+T6KBO3JzgQ==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/querystring-builder": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/querystring-builder/-/querystring-builder-2.2.0.tgz", + "integrity": "sha512-L1kSeviUWL+emq3CUVSgdogoM/D9QMFaqxL/dd0X7PCNWmPXqt+ExtrBjqT0V7HLN03Vs9SuiLrG3zy3JGnE5A==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "@smithy/util-uri-escape": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/querystring-parser": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/querystring-parser/-/querystring-parser-2.2.0.tgz", + "integrity": "sha512-BvHCDrKfbG5Yhbpj4vsbuPV2GgcpHiAkLeIlcA1LtfpMz3jrqizP1+OguSNSj1MwBHEiN+jwNisXLGdajGDQJA==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/service-error-classification": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@smithy/service-error-classification/-/service-error-classification-2.1.5.tgz", + "integrity": "sha512-uBDTIBBEdAQryvHdc5W8sS5YX7RQzF683XrHePVdFmAgKiMofU15FLSM0/HU03hKTnazdNRFa0YHS7+ArwoUSQ==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/shared-ini-file-loader": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/@smithy/shared-ini-file-loader/-/shared-ini-file-loader-2.4.0.tgz", + "integrity": "sha512-WyujUJL8e1B6Z4PBfAqC/aGY1+C7T0w20Gih3yrvJSk97gpiVfB+y7c46T4Nunk+ZngLq0rOIdeVeIklk0R3OA==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/signature-v4": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/@smithy/signature-v4/-/signature-v4-2.2.1.tgz", + "integrity": "sha512-j5fHgL1iqKTsKJ1mTcw88p0RUcidDu95AWSeZTgiYJb+QcfwWU/UpBnaqiB59FNH5MiAZuSbOBnZlwzeeY2tIw==", + "dev": true, + "dependencies": { + "@smithy/is-array-buffer": "^2.2.0", + "@smithy/types": "^2.12.0", + "@smithy/util-hex-encoding": "^2.2.0", + "@smithy/util-middleware": "^2.2.0", + "@smithy/util-uri-escape": "^2.2.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/smithy-client": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@smithy/smithy-client/-/smithy-client-2.5.1.tgz", + "integrity": "sha512-jrbSQrYCho0yDaaf92qWgd+7nAeap5LtHTI51KXqmpIFCceKU3K9+vIVTUH72bOJngBMqa4kyu1VJhRcSrk/CQ==", + "dev": true, + "dependencies": { + "@smithy/middleware-endpoint": "^2.5.1", + "@smithy/middleware-stack": "^2.2.0", + "@smithy/protocol-http": "^3.3.0", + "@smithy/types": "^2.12.0", + "@smithy/util-stream": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/types": { + "version": "2.12.0", + "resolved": "https://registry.npmjs.org/@smithy/types/-/types-2.12.0.tgz", + "integrity": "sha512-QwYgloJ0sVNBeBuBs65cIkTbfzV/Q6ZNPCJ99EICFEdJYG50nGIY/uYXp+TbsdJReIuPr0a0kXmCvren3MbRRw==", + "dev": true, + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/url-parser": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/url-parser/-/url-parser-2.2.0.tgz", + "integrity": "sha512-hoA4zm61q1mNTpksiSWp2nEl1dt3j726HdRhiNgVJQMj7mLp7dprtF57mOB6JvEk/x9d2bsuL5hlqZbBuHQylQ==", + "dev": true, + "dependencies": { + "@smithy/querystring-parser": "^2.2.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@smithy/util-base64": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/util-base64/-/util-base64-2.3.0.tgz", + "integrity": "sha512-s3+eVwNeJuXUwuMbusncZNViuhv2LjVJ1nMwTqSA0XAC7gjKhqqxRdJPhR8+YrkoZ9IiIbFk/yK6ACe/xlF+hw==", + "dev": true, + "dependencies": { + "@smithy/util-buffer-from": "^2.2.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/util-body-length-browser": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-body-length-browser/-/util-body-length-browser-2.2.0.tgz", + "integrity": "sha512-dtpw9uQP7W+n3vOtx0CfBD5EWd7EPdIdsQnWTDoFf77e3VUf05uA7R7TGipIo8e4WL2kuPdnsr3hMQn9ziYj5w==", + "dev": true, + "dependencies": { + "tslib": "^2.6.2" + } + }, + "node_modules/@smithy/util-body-length-node": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/util-body-length-node/-/util-body-length-node-2.3.0.tgz", + "integrity": "sha512-ITWT1Wqjubf2CJthb0BuT9+bpzBfXeMokH/AAa5EJQgbv9aPMVfnM76iFIZVFf50hYXGbtiV71BHAthNWd6+dw==", + "dev": true, + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/util-buffer-from": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-2.2.0.tgz", + "integrity": "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==", + "dev": true, + "dependencies": { + "@smithy/is-array-buffer": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/util-config-provider": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/util-config-provider/-/util-config-provider-2.3.0.tgz", + "integrity": "sha512-HZkzrRcuFN1k70RLqlNK4FnPXKOpkik1+4JaBoHNJn+RnJGYqaa3c5/+XtLOXhlKzlRgNvyaLieHTW2VwGN0VQ==", + "dev": true, + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/util-defaults-mode-browser": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-browser/-/util-defaults-mode-browser-2.2.1.tgz", + "integrity": "sha512-RtKW+8j8skk17SYowucwRUjeh4mCtnm5odCL0Lm2NtHQBsYKrNW0od9Rhopu9wF1gHMfHeWF7i90NwBz/U22Kw==", + "dev": true, + "dependencies": { + "@smithy/property-provider": "^2.2.0", + "@smithy/smithy-client": "^2.5.1", + "@smithy/types": "^2.12.0", + "bowser": "^2.11.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">= 10.0.0" + } + }, + "node_modules/@smithy/util-defaults-mode-node": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-node/-/util-defaults-mode-node-2.3.1.tgz", + "integrity": "sha512-vkMXHQ0BcLFysBMWgSBLSk3+leMpFSyyFj8zQtv5ZyUBx8/owVh1/pPEkzmW/DR/Gy/5c8vjLDD9gZjXNKbrpA==", + "dev": true, + "dependencies": { + "@smithy/config-resolver": "^2.2.0", + "@smithy/credential-provider-imds": "^2.3.0", + "@smithy/node-config-provider": "^2.3.0", + "@smithy/property-provider": "^2.2.0", + "@smithy/smithy-client": "^2.5.1", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">= 10.0.0" + } + }, + "node_modules/@smithy/util-endpoints": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-endpoints/-/util-endpoints-1.2.0.tgz", + "integrity": "sha512-BuDHv8zRjsE5zXd3PxFXFknzBG3owCpjq8G3FcsXW3CykYXuEqM3nTSsmLzw5q+T12ZYuDlVUZKBdpNbhVtlrQ==", + "dev": true, + "dependencies": { + "@smithy/node-config-provider": "^2.3.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@smithy/util-hex-encoding": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-hex-encoding/-/util-hex-encoding-2.2.0.tgz", + "integrity": "sha512-7iKXR+/4TpLK194pVjKiasIyqMtTYJsgKgM242Y9uzt5dhHnUDvMNb+3xIhRJ9QhvqGii/5cRUt4fJn3dtXNHQ==", + "dev": true, + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/util-middleware": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-middleware/-/util-middleware-2.2.0.tgz", + "integrity": "sha512-L1qpleXf9QD6LwLCJ5jddGkgWyuSvWBkJwWAZ6kFkdifdso+sk3L3O1HdmPvCdnCK3IS4qWyPxev01QMnfHSBw==", + "dev": true, + "dependencies": { + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/util-retry": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-retry/-/util-retry-2.2.0.tgz", + "integrity": "sha512-q9+pAFPTfftHXRytmZ7GzLFFrEGavqapFc06XxzZFcSIGERXMerXxCitjOG1prVDR9QdjqotF40SWvbqcCpf8g==", + "dev": true, + "dependencies": { + "@smithy/service-error-classification": "^2.1.5", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@smithy/util-stream": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-stream/-/util-stream-2.2.0.tgz", + "integrity": "sha512-17faEXbYWIRst1aU9SvPZyMdWmqIrduZjVOqCPMIsWFNxs5yQQgFrJL6b2SdiCzyW9mJoDjFtgi53xx7EH+BXA==", + "dev": true, + "dependencies": { + "@smithy/fetch-http-handler": "^2.5.0", + "@smithy/node-http-handler": "^2.5.0", + "@smithy/types": "^2.12.0", + "@smithy/util-base64": "^2.3.0", + "@smithy/util-buffer-from": "^2.2.0", + "@smithy/util-hex-encoding": "^2.2.0", + "@smithy/util-utf8": "^2.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/util-uri-escape": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-uri-escape/-/util-uri-escape-2.2.0.tgz", + "integrity": "sha512-jtmJMyt1xMD/d8OtbVJ2gFZOSKc+ueYJZPW20ULW1GOp/q/YIM0wNh+u8ZFao9UaIGz4WoPW8hC64qlWLIfoDA==", + "dev": true, + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/util-utf8": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-2.3.0.tgz", + "integrity": "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==", + "dev": true, + "dependencies": { + "@smithy/util-buffer-from": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@smithy/util-waiter": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-waiter/-/util-waiter-2.2.0.tgz", + "integrity": "sha512-IHk53BVw6MPMi2Gsn+hCng8rFA3ZmR3Rk7GllxDUW9qFJl/hiSvskn7XldkECapQVkIg/1dHpMAxI9xSTaLLSA==", + "dev": true, + "dependencies": { + "@smithy/abort-controller": "^2.2.0", + "@smithy/types": "^2.12.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/@swc/helpers": { "version": "0.5.6", "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.6.tgz", @@ -2201,6 +3790,12 @@ "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" }, + "node_modules/bowser": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/bowser/-/bowser-2.11.0.tgz", + "integrity": "sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA==", + "dev": true + }, "node_modules/brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -2989,6 +4584,28 @@ "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", "dev": true }, + "node_modules/fast-xml-parser": { + "version": "4.2.5", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.2.5.tgz", + "integrity": "sha512-B9/wizE4WngqQftFPmdaMYlXoJlJOYxGQOanC77fq9k8+Z0v5dDSVh+3glErdIROP//s/jgb7ZuxKfB8nVyo0g==", + "dev": true, + "funding": [ + { + "type": "paypal", + "url": "https://paypal.me/naturalintelligence" + }, + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + } + ], + "dependencies": { + "strnum": "^1.0.5" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, "node_modules/fastq": { "version": "1.16.0", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.16.0.tgz", @@ -5417,6 +7034,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/strnum": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz", + "integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==", + "dev": true + }, "node_modules/supports-color": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", @@ -5992,6 +7615,19 @@ "punycode": "^2.1.0" } }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "dev": true, + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/v8-to-istanbul": { "version": "9.2.0", "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.2.0.tgz", diff --git a/nodejs/package.json b/nodejs/package.json index f569825c..c9e06faf 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -18,6 +18,8 @@ }, "license": "Apache 2.0", "devDependencies": { + "@aws-sdk/client-s3": "^3.33.0", + "@aws-sdk/client-kms": "^3.33.0", "@napi-rs/cli": "^2.18.0", "@types/jest": "^29.1.2", "@types/tmp": "^0.2.6", @@ -63,6 +65,7 @@ "lint": "eslint lancedb && eslint __test__", "prepublishOnly": "napi prepublish -t npm", "test": "npm run build && jest --verbose", + "integration": "S3_TEST=1 npm run test", "universal": "napi universal", "version": "napi version" }, diff --git a/nodejs/src/connection.rs b/nodejs/src/connection.rs index 6f569473..6ce4dfe2 100644 --- a/nodejs/src/connection.rs +++ b/nodejs/src/connection.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; + use napi::bindgen_prelude::*; use napi_derive::*; @@ -64,6 +66,11 @@ impl Connection { builder = builder.read_consistency_interval(std::time::Duration::from_secs_f64(interval)); } + if let Some(storage_options) = options.storage_options { + for (key, value) in storage_options { + builder = builder.storage_option(key, value); + } + } Ok(Self::inner_new( builder .execute() @@ -118,14 +125,18 @@ impl Connection { name: String, buf: Buffer, mode: String, + storage_options: Option>, ) -> napi::Result
{ let batches = ipc_file_to_batches(buf.to_vec()) .map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?; let mode = Self::parse_create_mode_str(&mode)?; - let tbl = self - .get_inner()? - .create_table(&name, batches) - .mode(mode) + let mut builder = self.get_inner()?.create_table(&name, batches).mode(mode); + if let Some(storage_options) = storage_options { + for (key, value) in storage_options { + builder = builder.storage_option(key, value); + } + } + let tbl = builder .execute() .await .map_err(|e| napi::Error::from_reason(format!("{}", e)))?; @@ -138,15 +149,22 @@ impl Connection { name: String, schema_buf: Buffer, mode: String, + storage_options: Option>, ) -> napi::Result
{ let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| { napi::Error::from_reason(format!("Failed to marshal schema from JS to Rust: {}", e)) })?; let mode = Self::parse_create_mode_str(&mode)?; - let tbl = self + let mut builder = self .get_inner()? .create_empty_table(&name, schema) - .mode(mode) + .mode(mode); + if let Some(storage_options) = storage_options { + for (key, value) in storage_options { + builder = builder.storage_option(key, value); + } + } + let tbl = builder .execute() .await .map_err(|e| napi::Error::from_reason(format!("{}", e)))?; @@ -154,10 +172,18 @@ impl Connection { } #[napi] - pub async fn open_table(&self, name: String) -> napi::Result
{ - let tbl = self - .get_inner()? - .open_table(&name) + pub async fn open_table( + &self, + name: String, + storage_options: Option>, + ) -> napi::Result
{ + let mut builder = self.get_inner()?.open_table(&name); + if let Some(storage_options) = storage_options { + for (key, value) in storage_options { + builder = builder.storage_option(key, value); + } + } + let tbl = builder .execute() .await .map_err(|e| napi::Error::from_reason(format!("{}", e)))?; diff --git a/nodejs/src/lib.rs b/nodejs/src/lib.rs index 37de4885..a3289b24 100644 --- a/nodejs/src/lib.rs +++ b/nodejs/src/lib.rs @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use connection::Connection; +use std::collections::HashMap; + use napi_derive::*; mod connection; @@ -38,6 +39,10 @@ pub struct ConnectionOptions { /// Note: this consistency only applies to read operations. Write operations are /// always consistent. pub read_consistency_interval: Option, + /// (For LanceDB OSS only): configuration for object storage. + /// + /// The available options are described at https://lancedb.github.io/lancedb/guides/storage/ + pub storage_options: Option>, } /// Write mode for writing a table. @@ -54,7 +59,7 @@ pub struct WriteOptions { pub mode: Option, } -#[napi] -pub async fn connect(uri: String, options: ConnectionOptions) -> napi::Result { - Connection::new(uri, options).await +#[napi(object)] +pub struct OpenTableOptions { + pub storage_options: Option>, } diff --git a/python/README.md b/python/README.md index cbcec70a..2fca1534 100644 --- a/python/README.md +++ b/python/README.md @@ -41,7 +41,7 @@ To build the python package you can use maturin: ```bash # This will build the rust bindings and place them in the appropriate place # in your venv or conda environment -matruin develop +maturin develop ``` To run the unit tests: diff --git a/python/pyproject.toml b/python/pyproject.toml index 5a93500c..f457f262 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -3,7 +3,7 @@ name = "lancedb" version = "0.6.7" dependencies = [ "deprecation", - "pylance==0.10.9", + "pylance==0.10.10", "ratelimiter~=1.0", "requests>=2.31.0", "retry>=0.9.2", @@ -49,6 +49,7 @@ repository = "https://github.com/lancedb/lancedb" [project.optional-dependencies] tests = [ "aiohttp", + "boto3", "pandas>=1.4", "pytest", "pytest-mock", @@ -98,4 +99,5 @@ addopts = "--strict-markers --ignore-glob=lancedb/embeddings/*.py" markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", "asyncio", + "s3_test" ] diff --git a/python/python/lancedb/__init__.py b/python/python/lancedb/__init__.py index 76f7427d..99d66f58 100644 --- a/python/python/lancedb/__init__.py +++ b/python/python/lancedb/__init__.py @@ -15,7 +15,7 @@ import importlib.metadata import os from concurrent.futures import ThreadPoolExecutor from datetime import timedelta -from typing import Optional, Union +from typing import Dict, Optional, Union __version__ = importlib.metadata.version("lancedb") @@ -118,6 +118,7 @@ async def connect_async( host_override: Optional[str] = None, read_consistency_interval: Optional[timedelta] = None, request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None, + storage_options: Optional[Dict[str, str]] = None, ) -> AsyncConnection: """Connect to a LanceDB database. @@ -144,6 +145,9 @@ async def connect_async( the last check, then the table will be checked for updates. Note: this consistency only applies to read operations. Write operations are always consistent. + storage_options: dict, optional + Additional options for the storage backend. See available options at + https://lancedb.github.io/lancedb/guides/storage/ Examples -------- @@ -172,6 +176,7 @@ async def connect_async( region, host_override, read_consistency_interval_secs, + storage_options, ) ) diff --git a/python/python/lancedb/_lancedb.pyi b/python/python/lancedb/_lancedb.pyi index d16f8a1a..7ee7b7da 100644 --- a/python/python/lancedb/_lancedb.pyi +++ b/python/python/lancedb/_lancedb.pyi @@ -19,10 +19,18 @@ class Connection(object): self, start_after: Optional[str], limit: Optional[int] ) -> list[str]: ... async def create_table( - self, name: str, mode: str, data: pa.RecordBatchReader + self, + name: str, + mode: str, + data: pa.RecordBatchReader, + storage_options: Optional[Dict[str, str]] = None, ) -> Table: ... async def create_empty_table( - self, name: str, mode: str, schema: pa.Schema + self, + name: str, + mode: str, + schema: pa.Schema, + storage_options: Optional[Dict[str, str]] = None, ) -> Table: ... class Table: diff --git a/python/python/lancedb/db.py b/python/python/lancedb/db.py index 23c73e58..c5d3a5b9 100644 --- a/python/python/lancedb/db.py +++ b/python/python/lancedb/db.py @@ -18,7 +18,7 @@ import inspect import os from abc import abstractmethod from pathlib import Path -from typing import TYPE_CHECKING, Iterable, List, Literal, Optional, Union +from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union import pyarrow as pa from overrides import EnforceOverrides, override @@ -533,6 +533,7 @@ class AsyncConnection(object): exist_ok: Optional[bool] = None, on_bad_vectors: Optional[str] = None, fill_value: Optional[float] = None, + storage_options: Optional[Dict[str, str]] = None, ) -> AsyncTable: """Create an [AsyncTable][lancedb.table.AsyncTable] in the database. @@ -570,6 +571,12 @@ class AsyncConnection(object): One of "error", "drop", "fill". fill_value: float The value to use when filling vectors. Only used if on_bad_vectors="fill". + storage_options: dict, optional + Additional options for the storage backend. Options already set on the + connection will be inherited by the table, but can be overridden here. + See available options at + https://lancedb.github.io/lancedb/guides/storage/ + Returns ------- @@ -729,30 +736,40 @@ class AsyncConnection(object): mode = "exist_ok" if data is None: - new_table = await self._inner.create_empty_table(name, mode, schema) + new_table = await self._inner.create_empty_table( + name, mode, schema, storage_options=storage_options + ) else: data = data_to_reader(data, schema) new_table = await self._inner.create_table( name, mode, data, + storage_options=storage_options, ) return AsyncTable(new_table) - async def open_table(self, name: str) -> Table: + async def open_table( + self, name: str, storage_options: Optional[Dict[str, str]] = None + ) -> Table: """Open a Lance Table in the database. Parameters ---------- name: str The name of the table. + storage_options: dict, optional + Additional options for the storage backend. Options already set on the + connection will be inherited by the table, but can be overridden here. + See available options at + https://lancedb.github.io/lancedb/guides/storage/ Returns ------- A LanceTable object representing the table. """ - table = await self._inner.open_table(name) + table = await self._inner.open_table(name, storage_options) return AsyncTable(table) async def drop_table(self, name: str): diff --git a/python/python/tests/test_s3.py b/python/python/tests/test_s3.py new file mode 100644 index 00000000..9766fc24 --- /dev/null +++ b/python/python/tests/test_s3.py @@ -0,0 +1,158 @@ +# Copyright 2024 Lance Developers +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import copy + +import pytest +import pyarrow as pa +import lancedb + + +# These are all keys that are accepted by storage_options +CONFIG = { + "allow_http": "true", + "aws_access_key_id": "ACCESSKEY", + "aws_secret_access_key": "SECRETKEY", + "aws_endpoint": "http://localhost:4566", + "aws_region": "us-east-1", +} + + +def get_boto3_client(*args, **kwargs): + import boto3 + + return boto3.client( + *args, + region_name=CONFIG["aws_region"], + aws_access_key_id=CONFIG["aws_access_key_id"], + aws_secret_access_key=CONFIG["aws_secret_access_key"], + **kwargs, + ) + + +@pytest.fixture(scope="module") +def s3_bucket(): + s3 = get_boto3_client("s3", endpoint_url=CONFIG["aws_endpoint"]) + bucket_name = "lance-integtest" + # if bucket exists, delete it + try: + delete_bucket(s3, bucket_name) + except s3.exceptions.NoSuchBucket: + pass + s3.create_bucket(Bucket=bucket_name) + yield bucket_name + + delete_bucket(s3, bucket_name) + + +def delete_bucket(s3, bucket_name): + # Delete all objects first + for obj in s3.list_objects(Bucket=bucket_name).get("Contents", []): + s3.delete_object(Bucket=bucket_name, Key=obj["Key"]) + s3.delete_bucket(Bucket=bucket_name) + + +@pytest.mark.s3_test +def test_s3_lifecycle(s3_bucket: str): + storage_options = copy.copy(CONFIG) + + uri = f"s3://{s3_bucket}/test_lifecycle" + data = pa.table({"x": [1, 2, 3]}) + + async def test(): + db = await lancedb.connect_async(uri, storage_options=storage_options) + + table = await db.create_table("test", schema=data.schema) + assert await table.count_rows() == 0 + + table = await db.create_table("test", data, mode="overwrite") + assert await table.count_rows() == 3 + + await table.add(data, mode="append") + assert await table.count_rows() == 6 + + table = await db.open_table("test") + assert await table.count_rows() == 6 + + await db.drop_table("test") + + await db.drop_database() + + asyncio.run(test()) + + +@pytest.fixture() +def kms_key(): + kms = get_boto3_client("kms", endpoint_url=CONFIG["aws_endpoint"]) + key_id = kms.create_key()["KeyMetadata"]["KeyId"] + yield key_id + kms.schedule_key_deletion(KeyId=key_id, PendingWindowInDays=7) + + +def validate_objects_encrypted(bucket: str, path: str, kms_key: str): + s3 = get_boto3_client("s3", endpoint_url=CONFIG["aws_endpoint"]) + objects = s3.list_objects_v2(Bucket=bucket, Prefix=path)["Contents"] + for obj in objects: + info = s3.head_object(Bucket=bucket, Key=obj["Key"]) + assert info["ServerSideEncryption"] == "aws:kms", ( + "object %s not encrypted" % obj["Key"] + ) + assert info["SSEKMSKeyId"].endswith(kms_key), ( + "object %s not encrypted with correct key" % obj["Key"] + ) + + +@pytest.mark.s3_test +def test_s3_sse(s3_bucket: str, kms_key: str): + storage_options = copy.copy(CONFIG) + + uri = f"s3://{s3_bucket}/test_lifecycle" + data = pa.table({"x": [1, 2, 3]}) + + async def test(): + # Create a table with SSE + db = await lancedb.connect_async(uri, storage_options=storage_options) + + table = await db.create_table( + "table1", + schema=data.schema, + storage_options={ + "aws_server_side_encryption": "aws:kms", + "aws_sse_kms_key_id": kms_key, + }, + ) + await table.add(data) + await table.update({"x": "1"}) + + path = "test_lifecycle/table1.lance" + validate_objects_encrypted(s3_bucket, path, kms_key) + + # Test we can set encryption at connection level too. + db = await lancedb.connect_async( + uri, + storage_options=dict( + aws_server_side_encryption="aws:kms", + aws_sse_kms_key_id=kms_key, + **storage_options, + ), + ) + + table = await db.create_table("table2", schema=data.schema) + await table.add(data) + await table.update({"x": "1"}) + + path = "test_lifecycle/table2.lance" + validate_objects_encrypted(s3_bucket, path, kms_key) + + asyncio.run(test()) diff --git a/python/src/connection.rs b/python/src/connection.rs index 22f626b4..0ffe9eae 100644 --- a/python/src/connection.rs +++ b/python/src/connection.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::{sync::Arc, time::Duration}; +use std::{collections::HashMap, sync::Arc, time::Duration}; use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow}; use lancedb::connection::{Connection as LanceConnection, CreateTableMode}; @@ -90,19 +90,21 @@ impl Connection { name: String, mode: &str, data: &PyAny, + storage_options: Option>, ) -> PyResult<&'a PyAny> { let inner = self_.get_inner()?.clone(); let mode = Self::parse_create_mode_str(mode)?; let batches = ArrowArrayStreamReader::from_pyarrow(data)?; + let mut builder = inner.create_table(name, batches).mode(mode); + + if let Some(storage_options) = storage_options { + builder = builder.storage_options(storage_options); + } + future_into_py(self_.py(), async move { - let table = inner - .create_table(name, batches) - .mode(mode) - .execute() - .await - .infer_error()?; + let table = builder.execute().await.infer_error()?; Ok(Table::new(table)) }) } @@ -112,6 +114,7 @@ impl Connection { name: String, mode: &str, schema: &PyAny, + storage_options: Option>, ) -> PyResult<&'a PyAny> { let inner = self_.get_inner()?.clone(); @@ -119,21 +122,31 @@ impl Connection { let schema = Schema::from_pyarrow(schema)?; + let mut builder = inner.create_empty_table(name, Arc::new(schema)).mode(mode); + + if let Some(storage_options) = storage_options { + builder = builder.storage_options(storage_options); + } + future_into_py(self_.py(), async move { - let table = inner - .create_empty_table(name, Arc::new(schema)) - .mode(mode) - .execute() - .await - .infer_error()?; + let table = builder.execute().await.infer_error()?; Ok(Table::new(table)) }) } - pub fn open_table(self_: PyRef<'_, Self>, name: String) -> PyResult<&PyAny> { + #[pyo3(signature = (name, storage_options = None))] + pub fn open_table( + self_: PyRef<'_, Self>, + name: String, + storage_options: Option>, + ) -> PyResult<&PyAny> { let inner = self_.get_inner()?.clone(); + let mut builder = inner.open_table(name); + if let Some(storage_options) = storage_options { + builder = builder.storage_options(storage_options); + } future_into_py(self_.py(), async move { - let table = inner.open_table(&name).execute().await.infer_error()?; + let table = builder.execute().await.infer_error()?; Ok(Table::new(table)) }) } @@ -162,6 +175,7 @@ pub fn connect( region: Option, host_override: Option, read_consistency_interval: Option, + storage_options: Option>, ) -> PyResult<&PyAny> { future_into_py(py, async move { let mut builder = lancedb::connect(&uri); @@ -178,6 +192,9 @@ pub fn connect( let read_consistency_interval = Duration::from_secs_f64(read_consistency_interval); builder = builder.read_consistency_interval(read_consistency_interval); } + if let Some(storage_options) = storage_options { + builder = builder.storage_options(storage_options); + } Ok(Connection::new(builder.execute().await.infer_error()?)) }) } diff --git a/rust/ffi/node/src/lib.rs b/rust/ffi/node/src/lib.rs index eee75144..26afc78b 100644 --- a/rust/ffi/node/src/lib.rs +++ b/rust/ffi/node/src/lib.rs @@ -12,19 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - -use async_trait::async_trait; -use lance::io::ObjectStoreParams; use neon::prelude::*; -use object_store::aws::{AwsCredential, AwsCredentialProvider}; -use object_store::CredentialProvider; use once_cell::sync::OnceCell; use tokio::runtime::Runtime; use lancedb::connect; use lancedb::connection::Connection; -use lancedb::table::ReadParams; use crate::error::ResultExt; use crate::query::JsQuery; @@ -44,33 +37,6 @@ struct JsDatabase { impl Finalize for JsDatabase {} -// TODO: object_store didn't export this type so I copied it. -// Make a request to object_store to export this type -#[derive(Debug)] -pub struct StaticCredentialProvider { - credential: Arc, -} - -impl StaticCredentialProvider { - pub fn new(credential: T) -> Self { - Self { - credential: Arc::new(credential), - } - } -} - -#[async_trait] -impl CredentialProvider for StaticCredentialProvider -where - T: std::fmt::Debug + Send + Sync, -{ - type Credential = T; - - async fn get_credential(&self) -> object_store::Result> { - Ok(Arc::clone(&self.credential)) - } -} - fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> { static RUNTIME: OnceCell = OnceCell::new(); static LOG: OnceCell<()> = OnceCell::new(); @@ -82,29 +48,28 @@ fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> { fn database_new(mut cx: FunctionContext) -> JsResult { let path = cx.argument::(0)?.value(&mut cx); - let aws_creds = get_aws_creds(&mut cx, 1)?; - let region = get_aws_region(&mut cx, 4)?; let read_consistency_interval = cx .argument_opt(5) .and_then(|arg| arg.downcast::(&mut cx).ok()) .map(|v| v.value(&mut cx)) .map(std::time::Duration::from_secs_f64); + let storage_options_js = cx.argument::(1)?.to_vec(&mut cx)?; + let mut storage_options: Vec<(String, String)> = Vec::with_capacity(storage_options_js.len()); + for handle in storage_options_js { + let obj = handle.downcast::(&mut cx).unwrap(); + let key = obj.get::(&mut cx, 0)?.value(&mut cx); + let value = obj.get::(&mut cx, 0)?.value(&mut cx); + + storage_options.push((key, value)); + } + let rt = runtime(&mut cx)?; let channel = cx.channel(); let (deferred, promise) = cx.promise(); - let mut conn_builder = connect(&path); - if let Some(region) = region { - conn_builder = conn_builder.region(®ion); - } - if let Some(aws_creds) = aws_creds { - conn_builder = conn_builder.aws_creds(AwsCredential { - key_id: aws_creds.key_id, - secret_key: aws_creds.secret_key, - token: aws_creds.token, - }); - } + let mut conn_builder = connect(&path).storage_options(storage_options); + if let Some(interval) = read_consistency_interval { conn_builder = conn_builder.read_consistency_interval(interval); } @@ -143,93 +108,19 @@ fn database_table_names(mut cx: FunctionContext) -> JsResult { Ok(promise) } -/// Get AWS creds arguments from the context -/// Consumes 3 arguments -fn get_aws_creds( - cx: &mut FunctionContext, - arg_starting_location: i32, -) -> NeonResult> { - let secret_key_id = cx - .argument_opt(arg_starting_location) - .filter(|arg| arg.is_a::(cx)) - .and_then(|arg| arg.downcast_or_throw::(cx).ok()) - .map(|v| v.value(cx)); - - let secret_key = cx - .argument_opt(arg_starting_location + 1) - .filter(|arg| arg.is_a::(cx)) - .and_then(|arg| arg.downcast_or_throw::(cx).ok()) - .map(|v| v.value(cx)); - - let temp_token = cx - .argument_opt(arg_starting_location + 2) - .filter(|arg| arg.is_a::(cx)) - .and_then(|arg| arg.downcast_or_throw::(cx).ok()) - .map(|v| v.value(cx)); - - match (secret_key_id, secret_key, temp_token) { - (Some(key_id), Some(key), optional_token) => Ok(Some(AwsCredential { - key_id, - secret_key: key, - token: optional_token, - })), - (None, None, None) => Ok(None), - _ => cx.throw_error("Invalid credentials configuration"), - } -} - -fn get_aws_credential_provider( - cx: &mut FunctionContext, - arg_starting_location: i32, -) -> NeonResult> { - Ok(get_aws_creds(cx, arg_starting_location)?.map(|aws_cred| { - Arc::new(StaticCredentialProvider::new(aws_cred)) - as Arc> - })) -} - -/// Get AWS region arguments from the context -fn get_aws_region(cx: &mut FunctionContext, arg_location: i32) -> NeonResult> { - let region = cx - .argument_opt(arg_location) - .filter(|arg| arg.is_a::(cx)) - .map(|arg| arg.downcast_or_throw::(cx)); - - match region { - Some(Ok(region)) => Ok(Some(region.value(cx))), - None => Ok(None), - Some(Err(e)) => Err(e), - } -} - fn database_open_table(mut cx: FunctionContext) -> JsResult { let db = cx .this() .downcast_or_throw::, _>(&mut cx)?; let table_name = cx.argument::(0)?.value(&mut cx); - let aws_creds = get_aws_credential_provider(&mut cx, 1)?; - - let aws_region = get_aws_region(&mut cx, 4)?; - - let params = ReadParams { - store_options: Some(ObjectStoreParams::with_aws_credentials( - aws_creds, aws_region, - )), - ..ReadParams::default() - }; - let rt = runtime(&mut cx)?; let channel = cx.channel(); let database = db.database.clone(); let (deferred, promise) = cx.promise(); rt.spawn(async move { - let table_rst = database - .open_table(&table_name) - .lance_read_params(params) - .execute() - .await; + let table_rst = database.open_table(&table_name).execute().await; deferred.settle_with(&channel, move |mut cx| { let js_table = JsTable::from(table_rst.or_throw(&mut cx)?); diff --git a/rust/ffi/node/src/table.rs b/rust/ffi/node/src/table.rs index 13f1e895..cf4c4322 100644 --- a/rust/ffi/node/src/table.rs +++ b/rust/ffi/node/src/table.rs @@ -17,7 +17,6 @@ use std::ops::Deref; use arrow_array::{RecordBatch, RecordBatchIterator}; use lance::dataset::optimize::CompactionOptions; use lance::dataset::{ColumnAlteration, NewColumnTransform, WriteMode, WriteParams}; -use lance::io::ObjectStoreParams; use lancedb::table::{OptimizeAction, WriteOptions}; use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer}; @@ -26,7 +25,7 @@ use neon::prelude::*; use neon::types::buffer::TypedArray; use crate::error::ResultExt; -use crate::{convert, get_aws_credential_provider, get_aws_region, runtime, JsDatabase}; +use crate::{convert, runtime, JsDatabase}; pub struct JsTable { pub table: LanceDbTable, @@ -59,6 +58,10 @@ impl JsTable { return cx.throw_error("Table::create only supports 'overwrite' and 'create' modes") } }; + let params = WriteParams { + mode, + ..WriteParams::default() + }; let rt = runtime(&mut cx)?; let channel = cx.channel(); @@ -66,17 +69,6 @@ impl JsTable { let (deferred, promise) = cx.promise(); let database = db.database.clone(); - let aws_creds = get_aws_credential_provider(&mut cx, 3)?; - let aws_region = get_aws_region(&mut cx, 6)?; - - let params = WriteParams { - store_params: Some(ObjectStoreParams::with_aws_credentials( - aws_creds, aws_region, - )), - mode, - ..WriteParams::default() - }; - rt.spawn(async move { let batch_reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema); let table_rst = database @@ -112,13 +104,8 @@ impl JsTable { "overwrite" => WriteMode::Overwrite, s => return cx.throw_error(format!("invalid write mode {}", s)), }; - let aws_creds = get_aws_credential_provider(&mut cx, 2)?; - let aws_region = get_aws_region(&mut cx, 5)?; let params = WriteParams { - store_params: Some(ObjectStoreParams::with_aws_credentials( - aws_creds, aws_region, - )), mode: write_mode, ..WriteParams::default() }; diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index d31e869f..7c6c5487 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -46,8 +46,13 @@ tempfile = "3.5.0" rand = { version = "0.8.3", features = ["small_rng"] } uuid = { version = "1.7.0", features = ["v4"] } walkdir = "2" +# For s3 integration tests (dev deps aren't allowed to be optional atm) +aws-sdk-s3 = { version = "1.0" } +aws-sdk-kms = { version = "1.0" } +aws-config = { version = "1.0" } [features] default = ["remote"] remote = ["dep:reqwest"] -fp16kernels = ["lance-linalg/fp16kernels"] \ No newline at end of file +fp16kernels = ["lance-linalg/fp16kernels"] +s3-test = [] \ No newline at end of file diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index e0bcc65a..b2675c5a 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -14,6 +14,7 @@ //! LanceDB Database +use std::collections::HashMap; use std::fs::create_dir_all; use std::path::Path; use std::sync::Arc; @@ -22,9 +23,7 @@ use arrow_array::{RecordBatchIterator, RecordBatchReader}; use arrow_schema::SchemaRef; use lance::dataset::{ReadParams, WriteMode}; use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore}; -use object_store::{ - aws::AwsCredential, local::LocalFileSystem, CredentialProvider, StaticCredentialProvider, -}; +use object_store::{aws::AwsCredential, local::LocalFileSystem}; use snafu::prelude::*; use crate::arrow::IntoArrow; @@ -208,6 +207,50 @@ impl CreateTableBuilder { self.mode = mode; self } + + /// Set an option for the storage layer. + /// + /// Options already set on the connection will be inherited by the table, + /// but can be overridden here. + /// + /// See available options at + pub fn storage_option(mut self, key: impl Into, value: impl Into) -> Self { + let store_options = self + .write_options + .lance_write_params + .get_or_insert(Default::default()) + .store_params + .get_or_insert(Default::default()) + .storage_options + .get_or_insert(Default::default()); + store_options.insert(key.into(), value.into()); + self + } + + /// Set multiple options for the storage layer. + /// + /// Options already set on the connection will be inherited by the table, + /// but can be overridden here. + /// + /// See available options at + pub fn storage_options( + mut self, + pairs: impl IntoIterator, impl Into)>, + ) -> Self { + let store_options = self + .write_options + .lance_write_params + .get_or_insert(Default::default()) + .store_params + .get_or_insert(Default::default()) + .storage_options + .get_or_insert(Default::default()); + + for (key, value) in pairs { + store_options.insert(key.into(), value.into()); + } + self + } } #[derive(Clone, Debug)] @@ -252,6 +295,48 @@ impl OpenTableBuilder { self } + /// Set an option for the storage layer. + /// + /// Options already set on the connection will be inherited by the table, + /// but can be overridden here. + /// + /// See available options at + pub fn storage_option(mut self, key: impl Into, value: impl Into) -> Self { + let storage_options = self + .lance_read_params + .get_or_insert(Default::default()) + .store_options + .get_or_insert(Default::default()) + .storage_options + .get_or_insert(Default::default()); + storage_options.insert(key.into(), value.into()); + self + } + + /// Set multiple options for the storage layer. + /// + /// Options already set on the connection will be inherited by the table, + /// but can be overridden here. + /// + /// See available options at + pub fn storage_options( + mut self, + pairs: impl IntoIterator, impl Into)>, + ) -> Self { + let storage_options = self + .lance_read_params + .get_or_insert(Default::default()) + .store_options + .get_or_insert(Default::default()) + .storage_options + .get_or_insert(Default::default()); + + for (key, value) in pairs { + storage_options.insert(key.into(), value.into()); + } + self + } + /// Open the table pub async fn execute(self) -> Result
{ self.parent.clone().do_open_table(self).await @@ -385,8 +470,7 @@ pub struct ConnectBuilder { /// LanceDB Cloud host override, only required if using an on-premises Lance Cloud instance host_override: Option, - /// User provided AWS credentials - aws_creds: Option, + storage_options: HashMap, /// The interval at which to check for updates from other processes. /// @@ -409,8 +493,8 @@ impl ConnectBuilder { api_key: None, region: None, host_override: None, - aws_creds: None, read_consistency_interval: None, + storage_options: HashMap::new(), } } @@ -430,8 +514,37 @@ impl ConnectBuilder { } /// [`AwsCredential`] to use when connecting to S3. + #[deprecated(note = "Pass through storage_options instead")] pub fn aws_creds(mut self, aws_creds: AwsCredential) -> Self { - self.aws_creds = Some(aws_creds); + self.storage_options + .insert("aws_access_key_id".into(), aws_creds.key_id.clone()); + self.storage_options + .insert("aws_secret_access_key".into(), aws_creds.secret_key.clone()); + if let Some(token) = &aws_creds.token { + self.storage_options + .insert("aws_session_token".into(), token.clone()); + } + self + } + + /// Set an option for the storage layer. + /// + /// See available options at + pub fn storage_option(mut self, key: impl Into, value: impl Into) -> Self { + self.storage_options.insert(key.into(), value.into()); + self + } + + /// Set multiple options for the storage layer. + /// + /// See available options at + pub fn storage_options( + mut self, + pairs: impl IntoIterator, impl Into)>, + ) -> Self { + for (key, value) in pairs { + self.storage_options.insert(key.into(), value.into()); + } self } @@ -522,6 +635,9 @@ struct Database { pub(crate) store_wrapper: Option>, read_consistency_interval: Option, + + // Storage options to be inherited by tables created from this connection + storage_options: HashMap, } impl std::fmt::Display for Database { @@ -604,20 +720,11 @@ impl Database { }; let plain_uri = url.to_string(); - let os_params: ObjectStoreParams = if let Some(aws_creds) = &options.aws_creds { - let credential_provider: Arc< - dyn CredentialProvider, - > = Arc::new(StaticCredentialProvider::new(AwsCredential { - key_id: aws_creds.key_id.clone(), - secret_key: aws_creds.secret_key.clone(), - token: aws_creds.token.clone(), - })); - ObjectStoreParams::with_aws_credentials( - Some(credential_provider), - options.region.clone(), - ) - } else { - ObjectStoreParams::default() + + let storage_options = options.storage_options.clone(); + let os_params = ObjectStoreParams { + storage_options: Some(storage_options.clone()), + ..Default::default() }; let (object_store, base_path) = ObjectStore::from_uri_and_params(&plain_uri, &os_params).await?; @@ -641,6 +748,7 @@ impl Database { object_store, store_wrapper: write_store_wrapper, read_consistency_interval: options.read_consistency_interval, + storage_options, }) } Err(_) => Self::open_path(uri, options.read_consistency_interval).await, @@ -662,6 +770,7 @@ impl Database { object_store, store_wrapper: None, read_consistency_interval, + storage_options: HashMap::new(), }) } @@ -734,11 +843,26 @@ impl ConnectionInternal for Database { async fn do_create_table( &self, - options: CreateTableBuilder, + mut options: CreateTableBuilder, data: Box, ) -> Result
{ let table_uri = self.table_uri(&options.name)?; + // Inherit storage options from the connection + let storage_options = options + .write_options + .lance_write_params + .get_or_insert_with(Default::default) + .store_params + .get_or_insert_with(Default::default) + .storage_options + .get_or_insert_with(Default::default); + for (key, value) in self.storage_options.iter() { + if !storage_options.contains_key(key) { + storage_options.insert(key.clone(), value.clone()); + } + } + let mut write_params = options.write_options.lance_write_params.unwrap_or_default(); if matches!(&options.mode, CreateTableMode::Overwrite) { write_params.mode = WriteMode::Overwrite; @@ -768,8 +892,23 @@ impl ConnectionInternal for Database { } } - async fn do_open_table(&self, options: OpenTableBuilder) -> Result
{ + async fn do_open_table(&self, mut options: OpenTableBuilder) -> Result
{ let table_uri = self.table_uri(&options.name)?; + + // Inherit storage options from the connection + let storage_options = options + .lance_read_params + .get_or_insert_with(Default::default) + .store_options + .get_or_insert_with(Default::default) + .storage_options + .get_or_insert_with(Default::default); + for (key, value) in self.storage_options.iter() { + if !storage_options.contains_key(key) { + storage_options.insert(key.clone(), value.clone()); + } + } + let native_table = Arc::new( NativeTable::open_with_params( &table_uri, @@ -801,7 +940,10 @@ impl ConnectionInternal for Database { } async fn drop_db(&self) -> Result<()> { - todo!() + self.object_store + .remove_dir_all(self.base_path.clone()) + .await?; + Ok(()) } } diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index 7331e29c..d5d3d1dd 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -14,6 +14,7 @@ //! LanceDB Table APIs +use std::collections::HashMap; use std::path::Path; use std::sync::Arc; @@ -757,6 +758,8 @@ pub struct NativeTable { // the object store wrapper to use on write path store_wrapper: Option>, + storage_options: HashMap, + // This comes from the connection options. We store here so we can pass down // to the dataset when we recreate it (for example, in checkout_latest). read_consistency_interval: Option, @@ -822,6 +825,13 @@ impl NativeTable { None => params, }; + let storage_options = params + .store_options + .clone() + .unwrap_or_default() + .storage_options + .unwrap_or_default(); + let dataset = DatasetBuilder::from_uri(uri) .with_read_params(params) .load() @@ -840,6 +850,7 @@ impl NativeTable { uri: uri.to_string(), dataset, store_wrapper: write_store_wrapper, + storage_options, read_consistency_interval, }) } @@ -908,6 +919,13 @@ impl NativeTable { None => params, }; + let storage_options = params + .store_params + .clone() + .unwrap_or_default() + .storage_options + .unwrap_or_default(); + let dataset = Dataset::write(batches, uri, Some(params)) .await .map_err(|e| match e { @@ -921,6 +939,7 @@ impl NativeTable { uri: uri.to_string(), dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval), store_wrapper: write_store_wrapper, + storage_options, read_consistency_interval, }) } @@ -1312,7 +1331,7 @@ impl TableInternal for NativeTable { add: AddDataBuilder, data: Box, ) -> Result<()> { - let lance_params = add.write_options.lance_write_params.unwrap_or(WriteParams { + let mut lance_params = add.write_options.lance_write_params.unwrap_or(WriteParams { mode: match add.mode { AddDataMode::Append => WriteMode::Append, AddDataMode::Overwrite => WriteMode::Overwrite, @@ -1320,6 +1339,18 @@ impl TableInternal for NativeTable { ..Default::default() }); + // Bring storage options from table + let storage_options = lance_params + .store_params + .get_or_insert(Default::default()) + .storage_options + .get_or_insert(Default::default()); + for (key, value) in self.storage_options.iter() { + if !storage_options.contains_key(key) { + storage_options.insert(key.clone(), value.clone()); + } + } + // patch the params if we have a write store wrapper let lance_params = match self.store_wrapper.clone() { Some(wrapper) => lance_params.patch_with_store_wrapper(wrapper)?, diff --git a/rust/lancedb/tests/object_store_test.rs b/rust/lancedb/tests/object_store_test.rs new file mode 100644 index 00000000..623b2484 --- /dev/null +++ b/rust/lancedb/tests/object_store_test.rs @@ -0,0 +1,290 @@ +// Copyright 2023 LanceDB Developers. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#![cfg(feature = "s3-test")] +use std::sync::Arc; + +use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray}; +use arrow_schema::{DataType, Field, Schema}; + +use aws_config::{BehaviorVersion, ConfigLoader, Region, SdkConfig}; +use aws_sdk_s3::{config::Credentials, types::ServerSideEncryption, Client as S3Client}; +use lancedb::Result; + +const CONFIG: &[(&str, &str)] = &[ + ("access_key_id", "ACCESS_KEY"), + ("secret_access_key", "SECRET_KEY"), + ("endpoint", "http://127.0.0.1:4566"), + ("allow_http", "true"), +]; + +async fn aws_config() -> SdkConfig { + let credentials = Credentials::new(CONFIG[0].1, CONFIG[1].1, None, None, "static"); + ConfigLoader::default() + .credentials_provider(credentials) + .endpoint_url(CONFIG[2].1) + .behavior_version(BehaviorVersion::latest()) + .region(Region::new("us-east-1")) + .load() + .await +} + +struct S3Bucket(String); + +impl S3Bucket { + async fn new(bucket: &str) -> Self { + let config = aws_config().await; + let client = S3Client::new(&config); + + // In case it wasn't deleted earlier + Self::delete_bucket(client.clone(), bucket).await; + + client.create_bucket().bucket(bucket).send().await.unwrap(); + + Self(bucket.to_string()) + } + + async fn delete_bucket(client: S3Client, bucket: &str) { + // Before we delete the bucket, we need to delete all objects in it + let res = client + .list_objects_v2() + .bucket(bucket) + .send() + .await + .map_err(|err| err.into_service_error()); + match res { + Err(e) if e.is_no_such_bucket() => return, + Err(e) => panic!("Failed to list objects in bucket: {}", e), + _ => {} + } + let objects = res.unwrap().contents.unwrap_or_default(); + for object in objects { + client + .delete_object() + .bucket(bucket) + .key(object.key.unwrap()) + .send() + .await + .unwrap(); + } + client.delete_bucket().bucket(bucket).send().await.unwrap(); + } +} + +impl Drop for S3Bucket { + fn drop(&mut self) { + let bucket_name = self.0.clone(); + tokio::task::spawn(async move { + let config = aws_config().await; + let client = S3Client::new(&config); + Self::delete_bucket(client, &bucket_name).await; + }); + } +} + +fn test_data() -> RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Utf8, false), + ])); + RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["a", "b", "c"])), + ], + ) + .unwrap() +} + +#[tokio::test] +async fn test_minio_lifecycle() -> Result<()> { + // test create, update, drop, list on localstack minio + let bucket = S3Bucket::new("test-bucket").await; + let uri = format!("s3://{}", bucket.0); + + let db = lancedb::connect(&uri) + .storage_options(CONFIG.iter().cloned()) + .execute() + .await?; + + let data = test_data(); + let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema()); + + let table = db.create_table("test_table", data).execute().await?; + + let row_count = table.count_rows(None).await?; + assert_eq!(row_count, 3); + + let table_names = db.table_names().execute().await?; + assert_eq!(table_names, vec!["test_table"]); + + // Re-open the table + let table = db.open_table("test_table").execute().await?; + let row_count = table.count_rows(None).await?; + assert_eq!(row_count, 3); + + let data = test_data(); + let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema()); + table.add(data).execute().await?; + + db.drop_table("test_table").await?; + + Ok(()) +} + +struct KMSKey(String); + +impl KMSKey { + async fn new() -> Self { + let config = aws_config().await; + let client = aws_sdk_kms::Client::new(&config); + let key = client + .create_key() + .description("test key") + .send() + .await + .unwrap() + .key_metadata + .unwrap() + .key_id; + Self(key) + } +} + +impl Drop for KMSKey { + fn drop(&mut self) { + let key_id = self.0.clone(); + tokio::task::spawn(async move { + let config = aws_config().await; + let client = aws_sdk_kms::Client::new(&config); + client + .schedule_key_deletion() + .key_id(&key_id) + .send() + .await + .unwrap(); + }); + } +} + +async fn validate_objects_encrypted(bucket: &str, path: &str, kms_key_id: &str) { + // Get S3 client + let config = aws_config().await; + let client = S3Client::new(&config); + + // list the objects are the path + let objects = client + .list_objects_v2() + .bucket(bucket) + .prefix(path) + .send() + .await + .unwrap() + .contents + .unwrap(); + + let mut errors = vec![]; + let mut correctly_encrypted = vec![]; + + // For each object, call head + for object in &objects { + let head = client + .head_object() + .bucket(bucket) + .key(object.key().unwrap()) + .send() + .await + .unwrap(); + + // Verify the object is encrypted + if head.server_side_encryption() != Some(&ServerSideEncryption::AwsKms) { + errors.push(format!("Object {} is not encrypted", object.key().unwrap())); + continue; + } + if !(head + .ssekms_key_id() + .map(|arn| arn.ends_with(kms_key_id)) + .unwrap_or(false)) + { + errors.push(format!( + "Object {} has wrong key id: {:?}, vs expected: {}", + object.key().unwrap(), + head.ssekms_key_id(), + kms_key_id + )); + continue; + } + correctly_encrypted.push(object.key().unwrap().to_string()); + } + + if !errors.is_empty() { + panic!( + "{} of {} correctly encrypted: {:?}\n{} of {} not correct: {:?}", + correctly_encrypted.len(), + objects.len(), + correctly_encrypted, + errors.len(), + objects.len(), + errors + ); + } +} + +#[tokio::test] +async fn test_encryption() -> Result<()> { + // test encryption on localstack minio + let bucket = S3Bucket::new("test-encryption").await; + let key = KMSKey::new().await; + + let uri = format!("s3://{}", bucket.0); + let db = lancedb::connect(&uri) + .storage_options(CONFIG.iter().cloned()) + .execute() + .await?; + + // Create a table with encryption + let data = test_data(); + let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema()); + + let mut builder = db.create_table("test_table", data); + for (key, value) in CONFIG { + builder = builder.storage_option(*key, *value); + } + let table = builder + .storage_option("aws_server_side_encryption", "aws:kms") + .storage_option("aws_sse_kms_key_id", &key.0) + .execute() + .await?; + validate_objects_encrypted(&bucket.0, "test_table", &key.0).await; + + table.delete("a = 1").await?; + validate_objects_encrypted(&bucket.0, "test_table", &key.0).await; + + // Test we can set encryption at the connection level. + let db = lancedb::connect(&uri) + .storage_options(CONFIG.iter().cloned()) + .storage_option("aws_server_side_encryption", "aws:kms") + .storage_option("aws_sse_kms_key_id", &key.0) + .execute() + .await?; + + let table = db.open_table("test_table").execute().await?; + + let data = test_data(); + let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema()); + table.add(data).execute().await?; + validate_objects_encrypted(&bucket.0, "test_table", &key.0).await; + + Ok(()) +} From a6aa67baed47b497ef1988556a68a67a8dcc76c2 Mon Sep 17 00:00:00 2001 From: Raghav Dixit <34462078+raghavdixit99@users.noreply.github.com> Date: Wed, 10 Apr 2024 13:17:14 -0400 Subject: [PATCH 03/19] python: Bug fixes / tests (#1210) closes #1194 #1172 #1124 #1208 @wjones127 : `if query_type != "fts":` is needed because both fts and vector search create `LanceQueryBuilder` which has `vector_column_name` as a required attribute. --- python/pyproject.toml | 1 + python/python/lancedb/embeddings/bedrock.py | 3 +++ python/python/lancedb/embeddings/gemini_text.py | 3 +++ python/python/lancedb/embeddings/imagebind.py | 11 +++++++++-- .../python/lancedb/embeddings/transformers.py | 10 ++++++++-- python/python/lancedb/table.py | 12 +++++++++++- python/python/tests/test_db.py | 17 +++++++++++++++++ 7 files changed, 52 insertions(+), 5 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index f457f262..f598f08b 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -57,6 +57,7 @@ tests = [ "duckdb", "pytz", "polars>=0.19", + "tantivy" ] dev = ["ruff", "pre-commit"] docs = [ diff --git a/python/python/lancedb/embeddings/bedrock.py b/python/python/lancedb/embeddings/bedrock.py index 8b0ccbc2..dab926a9 100644 --- a/python/python/lancedb/embeddings/bedrock.py +++ b/python/python/lancedb/embeddings/bedrock.py @@ -78,6 +78,9 @@ class BedRockText(TextEmbeddingFunction): class Config: keep_untouched = (cached_property,) + else: + model_config = dict() + model_config["ignored_types"] = (cached_property,) def ndims(self): # return len(self._generate_embedding("test")) diff --git a/python/python/lancedb/embeddings/gemini_text.py b/python/python/lancedb/embeddings/gemini_text.py index bdbd304c..e3a9b96d 100644 --- a/python/python/lancedb/embeddings/gemini_text.py +++ b/python/python/lancedb/embeddings/gemini_text.py @@ -94,6 +94,9 @@ class GeminiText(TextEmbeddingFunction): class Config: keep_untouched = (cached_property,) + else: + model_config = dict() + model_config["ignored_types"] = (cached_property,) def ndims(self): # TODO: fix hardcoding diff --git a/python/python/lancedb/embeddings/imagebind.py b/python/python/lancedb/embeddings/imagebind.py index 209a134b..634b1487 100644 --- a/python/python/lancedb/embeddings/imagebind.py +++ b/python/python/lancedb/embeddings/imagebind.py @@ -22,6 +22,8 @@ from .base import EmbeddingFunction from .registry import register from .utils import AUDIO, IMAGES, TEXT +from lancedb.pydantic import PYDANTIC_VERSION + @register("imagebind") class ImageBindEmbeddings(EmbeddingFunction): @@ -38,8 +40,13 @@ class ImageBindEmbeddings(EmbeddingFunction): device: str = "cpu" normalize: bool = False - class Config: - keep_untouched = (cached_property,) + if PYDANTIC_VERSION < (2, 0): # Pydantic 1.x compat + + class Config: + keep_untouched = (cached_property,) + else: + model_config = dict() + model_config["ignored_types"] = (cached_property,) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/python/python/lancedb/embeddings/transformers.py b/python/python/lancedb/embeddings/transformers.py index f796bd2e..02696c4f 100644 --- a/python/python/lancedb/embeddings/transformers.py +++ b/python/python/lancedb/embeddings/transformers.py @@ -17,6 +17,7 @@ from typing import List, Any import numpy as np from pydantic import PrivateAttr +from lancedb.pydantic import PYDANTIC_VERSION from ..util import attempt_import_or_raise from .base import EmbeddingFunction @@ -53,8 +54,13 @@ class TransformersEmbeddingFunction(EmbeddingFunction): self._tokenizer = transformers.AutoTokenizer.from_pretrained(self.name) self._model = transformers.AutoModel.from_pretrained(self.name) - class Config: - keep_untouched = (cached_property,) + if PYDANTIC_VERSION < (2, 0): # Pydantic 1.x compat + + class Config: + keep_untouched = (cached_property,) + else: + model_config = dict() + model_config["ignored_types"] = (cached_property,) def ndims(self): self._ndims = self._model.config.hidden_size diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 1928a3a9..58d0d0bf 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -95,6 +95,9 @@ def _sanitize_data( data.data.to_batches(), schema, metadata, on_bad_vectors, fill_value ) + if isinstance(data, LanceModel): + raise ValueError("Cannot add a single LanceModel to a table. Use a list.") + if isinstance(data, list): # convert to list of dict if data is a bunch of LanceModels if isinstance(data[0], LanceModel): @@ -1403,7 +1406,14 @@ class LanceTable(Table): vector and the returned vector. """ if vector_column_name is None and query is not None: - vector_column_name = inf_vector_column_query(self.schema) + try: + vector_column_name = inf_vector_column_query(self.schema) + except Exception as e: + if query_type == "fts": + vector_column_name = "" + else: + raise e + return LanceQueryBuilder.create( self, query, diff --git a/python/python/tests/test_db.py b/python/python/tests/test_db.py index fc4420ba..82b90c0a 100644 --- a/python/python/tests/test_db.py +++ b/python/python/tests/test_db.py @@ -28,13 +28,25 @@ def test_basic(tmp_path): assert db.uri == str(tmp_path) assert db.table_names() == [] + class SimpleModel(LanceModel): + item: str + price: float + vector: Vector(2) + table = db.create_table( "test", data=[ {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}, ], + schema=SimpleModel, ) + + with pytest.raises( + ValueError, match="Cannot add a single LanceModel to a table. Use a list." + ): + table.add(SimpleModel(item="baz", price=30.0, vector=[1.0, 2.0])) + rs = table.search([100, 100]).limit(1).to_pandas() assert len(rs) == 1 assert rs["item"].iloc[0] == "bar" @@ -43,6 +55,11 @@ def test_basic(tmp_path): assert len(rs) == 1 assert rs["item"].iloc[0] == "foo" + table.create_fts_index(["item"]) + rs = table.search("bar", query_type="fts").to_pandas() + assert len(rs) == 1 + assert rs["item"].iloc[0] == "bar" + assert db.table_names() == ["test"] assert "test" in db assert len(db) == 1 From e4945abb1a6cce4ee24bafced23b03a418bf44e3 Mon Sep 17 00:00:00 2001 From: Lance Release Date: Wed, 10 Apr 2024 17:39:52 +0000 Subject: [PATCH 04/19] =?UTF-8?q?Bump=20version:=200.4.16=20=E2=86=92=200.?= =?UTF-8?q?4.17?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- node/package.json | 12 ++++++------ nodejs/npm/darwin-arm64/package.json | 2 +- nodejs/npm/darwin-x64/package.json | 2 +- nodejs/npm/linux-arm64-gnu/package.json | 2 +- nodejs/npm/linux-x64-gnu/package.json | 2 +- nodejs/package.json | 12 ++++++------ rust/ffi/node/Cargo.toml | 2 +- rust/lancedb/Cargo.toml | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index df270b59..5de9f0d4 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.16 +current_version = 0.4.17 commit = True message = Bump version: {current_version} → {new_version} tag = True diff --git a/node/package.json b/node/package.json index f497cb5a..50edfb27 100644 --- a/node/package.json +++ b/node/package.json @@ -1,6 +1,6 @@ { "name": "vectordb", - "version": "0.4.16", + "version": "0.4.17", "description": " Serverless, low-latency vector database for AI applications", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -88,10 +88,10 @@ } }, "optionalDependencies": { - "@lancedb/vectordb-darwin-arm64": "0.4.16", - "@lancedb/vectordb-darwin-x64": "0.4.16", - "@lancedb/vectordb-linux-arm64-gnu": "0.4.16", - "@lancedb/vectordb-linux-x64-gnu": "0.4.16", - "@lancedb/vectordb-win32-x64-msvc": "0.4.16" + "@lancedb/vectordb-darwin-arm64": "0.4.17", + "@lancedb/vectordb-darwin-x64": "0.4.17", + "@lancedb/vectordb-linux-arm64-gnu": "0.4.17", + "@lancedb/vectordb-linux-x64-gnu": "0.4.17", + "@lancedb/vectordb-win32-x64-msvc": "0.4.17" } } diff --git a/nodejs/npm/darwin-arm64/package.json b/nodejs/npm/darwin-arm64/package.json index 59e2675e..5f031bec 100644 --- a/nodejs/npm/darwin-arm64/package.json +++ b/nodejs/npm/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-darwin-arm64", - "version": "0.4.16", + "version": "0.4.17", "os": [ "darwin" ], diff --git a/nodejs/npm/darwin-x64/package.json b/nodejs/npm/darwin-x64/package.json index 2b4c3783..986274e1 100644 --- a/nodejs/npm/darwin-x64/package.json +++ b/nodejs/npm/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-darwin-x64", - "version": "0.4.16", + "version": "0.4.17", "os": [ "darwin" ], diff --git a/nodejs/npm/linux-arm64-gnu/package.json b/nodejs/npm/linux-arm64-gnu/package.json index 0b14248e..072b4954 100644 --- a/nodejs/npm/linux-arm64-gnu/package.json +++ b/nodejs/npm/linux-arm64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-arm64-gnu", - "version": "0.4.16", + "version": "0.4.17", "os": [ "linux" ], diff --git a/nodejs/npm/linux-x64-gnu/package.json b/nodejs/npm/linux-x64-gnu/package.json index ffff0e07..c68bf9db 100644 --- a/nodejs/npm/linux-x64-gnu/package.json +++ b/nodejs/npm/linux-x64-gnu/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb-linux-x64-gnu", - "version": "0.4.16", + "version": "0.4.17", "os": [ "linux" ], diff --git a/nodejs/package.json b/nodejs/package.json index c9e06faf..dc5172df 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -1,6 +1,6 @@ { "name": "@lancedb/lancedb", - "version": "0.4.16", + "version": "0.4.17", "main": "./dist/index.js", "types": "./dist/index.d.ts", "napi": { @@ -70,11 +70,11 @@ "version": "napi version" }, "optionalDependencies": { - "@lancedb/lancedb-darwin-arm64": "0.4.16", - "@lancedb/lancedb-darwin-x64": "0.4.16", - "@lancedb/lancedb-linux-arm64-gnu": "0.4.16", - "@lancedb/lancedb-linux-x64-gnu": "0.4.16", - "@lancedb/lancedb-win32-x64-msvc": "0.4.16" + "@lancedb/lancedb-darwin-arm64": "0.4.17", + "@lancedb/lancedb-darwin-x64": "0.4.17", + "@lancedb/lancedb-linux-arm64-gnu": "0.4.17", + "@lancedb/lancedb-linux-x64-gnu": "0.4.17", + "@lancedb/lancedb-win32-x64-msvc": "0.4.17" }, "dependencies": { "openai": "^4.29.2", diff --git a/rust/ffi/node/Cargo.toml b/rust/ffi/node/Cargo.toml index 4714a6d1..81df8fe0 100644 --- a/rust/ffi/node/Cargo.toml +++ b/rust/ffi/node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lancedb-node" -version = "0.4.16" +version = "0.4.17" description = "Serverless, low-latency vector database for AI applications" license.workspace = true edition.workspace = true diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index 7c6c5487..517fcd03 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lancedb" -version = "0.4.16" +version = "0.4.17" edition.workspace = true description = "LanceDB: A serverless, low-latency vector database for AI applications" license.workspace = true From 09cfab6d006a550d0c4d1048b9555155ff99ff6a Mon Sep 17 00:00:00 2001 From: Lance Release Date: Wed, 10 Apr 2024 17:40:03 +0000 Subject: [PATCH 05/19] Updating package-lock.json --- node/package-lock.json | 74 ++++-------------------------------------- 1 file changed, 7 insertions(+), 67 deletions(-) diff --git a/node/package-lock.json b/node/package-lock.json index b55fcec5..721f3657 100644 --- a/node/package-lock.json +++ b/node/package-lock.json @@ -1,12 +1,12 @@ { "name": "vectordb", - "version": "0.4.16", + "version": "0.4.17", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "vectordb", - "version": "0.4.16", + "version": "0.4.17", "cpu": [ "x64", "arm64" @@ -52,11 +52,11 @@ "uuid": "^9.0.0" }, "optionalDependencies": { - "@lancedb/vectordb-darwin-arm64": "0.4.16", - "@lancedb/vectordb-darwin-x64": "0.4.16", - "@lancedb/vectordb-linux-arm64-gnu": "0.4.16", - "@lancedb/vectordb-linux-x64-gnu": "0.4.16", - "@lancedb/vectordb-win32-x64-msvc": "0.4.16" + "@lancedb/vectordb-darwin-arm64": "0.4.17", + "@lancedb/vectordb-darwin-x64": "0.4.17", + "@lancedb/vectordb-linux-arm64-gnu": "0.4.17", + "@lancedb/vectordb-linux-x64-gnu": "0.4.17", + "@lancedb/vectordb-win32-x64-msvc": "0.4.17" }, "peerDependencies": { "@apache-arrow/ts": "^14.0.2", @@ -333,66 +333,6 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, - "node_modules/@lancedb/vectordb-darwin-arm64": { - "version": "0.4.16", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.16.tgz", - "integrity": "sha512-RtuizzrZIVDYQ4ZZIMQRHGuV0DvOV93lyvivJJBLP1zCORMHEtEduaVbFE/+H0OCo0oqPsKXEpbc0nUEXKQqRg==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@lancedb/vectordb-darwin-x64": { - "version": "0.4.16", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.16.tgz", - "integrity": "sha512-bxuh0scgVzAryZScRiTS3Z6hMZA3ekBTataDrlEJ/ddPTcAm14oQb8qrQu3mjWsYPMxlFHpSLMAJSU9SkWZbgg==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@lancedb/vectordb-linux-arm64-gnu": { - "version": "0.4.16", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.16.tgz", - "integrity": "sha512-howoWlHsOwDHm3jl1BC1a1NZ/MJR4J98jSLLfzmmQu071fj5IrZmKv1RyCYNWLm3KHxl+5XVkWxJOghb9x0ByQ==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@lancedb/vectordb-linux-x64-gnu": { - "version": "0.4.16", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.16.tgz", - "integrity": "sha512-oKx97pP8fnh+pm1mSVZ2+1VPqgT073iHT5nt+3wg7HP8A9XMGlCpdDHM/vC2NNCjbb9j64I5Tq/2x7s33bUfaw==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@lancedb/vectordb-win32-x64-msvc": { - "version": "0.4.16", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.16.tgz", - "integrity": "sha512-klG2HHeQ/CuLVFF3ZKJ61BIurEjqtTBxFh0CXL5aCG+pbA55IfzDDyhpGk2yCldZcF/XuNIufyRAqhJPlQzuVg==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "win32" - ] - }, "node_modules/@neon-rs/cli": { "version": "0.0.160", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", From cf06b653d44f82892ac84b4136f3703779812146 Mon Sep 17 00:00:00 2001 From: Lance Release Date: Wed, 10 Apr 2024 17:51:45 +0000 Subject: [PATCH 06/19] =?UTF-8?q?[python]=20Bump=20version:=200.6.7=20?= =?UTF-8?q?=E2=86=92=200.6.8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/.bumpversion.cfg | 2 +- python/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/.bumpversion.cfg b/python/.bumpversion.cfg index a4e5ccf0..5c1e81e7 100644 --- a/python/.bumpversion.cfg +++ b/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.7 +current_version = 0.6.8 commit = True message = [python] Bump version: {current_version} → {new_version} tag = True diff --git a/python/pyproject.toml b/python/pyproject.toml index f598f08b..4338373a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lancedb" -version = "0.6.7" +version = "0.6.8" dependencies = [ "deprecation", "pylance==0.10.10", From 8ea06fe7f3dfa5164129bb18f626ee56a32dc471 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 10 Apr 2024 13:09:39 -0700 Subject: [PATCH 07/19] ci: fix failures in release scripts (#1215) * Python release has been running when we create a Node release. https://github.com/lancedb/lancedb/actions/runs/8635662585 * Rust is missing new enough compilers to check the kernels feature https://github.com/lancedb/lancedb/actions/runs/8635662578 --- .github/workflows/cargo-publish.yml | 3 +++ .github/workflows/pypi-publish.yml | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/.github/workflows/cargo-publish.yml b/.github/workflows/cargo-publish.yml index 9e4dfd35..50cdd125 100644 --- a/.github/workflows/cargo-publish.yml +++ b/.github/workflows/cargo-publish.yml @@ -8,6 +8,9 @@ env: # This env var is used by Swatinem/rust-cache@v2 for the cache # key, so we set it to make sure it is always consistent. CARGO_TERM_COLOR: always + # Up-to-date compilers needed for fp16kernels. + CC: gcc-12 + CXX: g++-12 jobs: build: diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index aacd5848..c913003d 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -6,6 +6,8 @@ on: jobs: linux: + # Only runs on tags that matches the python-make-release action + if: startsWith(github.ref, 'refs/tags/python-v') name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }} timeout-minutes: 60 strategy: @@ -44,6 +46,8 @@ jobs: token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} repo: "pypi" mac: + # Only runs on tags that matches the python-make-release action + if: startsWith(github.ref, 'refs/tags/python-v') timeout-minutes: 60 runs-on: ${{ matrix.config.runner }} strategy: @@ -76,6 +80,8 @@ jobs: token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }} repo: "pypi" windows: + # Only runs on tags that matches the python-make-release action + if: startsWith(github.ref, 'refs/tags/python-v') timeout-minutes: 60 runs-on: windows-latest strategy: From 44c03ebef3a3ae9b53abd3ec22fd062850371a37 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Thu, 11 Apr 2024 15:20:00 +0530 Subject: [PATCH 08/19] docs : Update Reranking docs (#1213) --- docs/mkdocs.yml | 26 ++++--- docs/requirements.txt | 3 +- docs/src/reranking/cohere.md | 75 ++++++++++++++++++++ docs/src/reranking/colbert.md | 71 +++++++++++++++++++ docs/src/reranking/cross_encoder.md | 70 +++++++++++++++++++ docs/src/reranking/custom_reranker.md | 88 ++++++++++++++++++++++++ docs/src/reranking/index.md | 60 ++++++++++++++++ docs/src/reranking/linear_combination.md | 52 ++++++++++++++ docs/src/reranking/openai.md | 73 ++++++++++++++++++++ docs/test/md_testing.py | 1 + 10 files changed, 507 insertions(+), 12 deletions(-) create mode 100644 docs/src/reranking/cohere.md create mode 100644 docs/src/reranking/colbert.md create mode 100644 docs/src/reranking/cross_encoder.md create mode 100644 docs/src/reranking/custom_reranker.md create mode 100644 docs/src/reranking/index.md create mode 100644 docs/src/reranking/linear_combination.md create mode 100644 docs/src/reranking/openai.md diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 4be33361..33f6c85f 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -57,16 +57,6 @@ plugins: - https://arrow.apache.org/docs/objects.inv - https://pandas.pydata.org/docs/objects.inv - mkdocs-jupyter - - ultralytics: - verbose: True - enabled: True - default_image: "assets/lancedb_and_lance.png" # Default image for all pages - add_image: True # Automatically add meta image - add_keywords: True # Add page keywords in the header tag - add_share_buttons: True # Add social share buttons - add_authors: False # Display page authors - add_desc: False - add_dates: False markdown_extensions: - admonition @@ -104,6 +94,14 @@ nav: - Overview: hybrid_search/hybrid_search.md - Comparing Rerankers: hybrid_search/eval.md - Airbnb financial data example: notebooks/hybrid_search.ipynb + - Reranking: + - Quickstart: reranking/index.md + - Cohere Reranker: reranking/cohere.md + - Linear Combination Reranker: reranking/linear_combination.md + - Cross Encoder Reranker: reranking/cross_encoder.md + - ColBERT Reranker: reranking/colbert.md + - OpenAI Reranker: reranking/openai.md + - Building Custom Rerankers: reranking/custom_reranker.md - Filtering: sql.md - Versioning & Reproducibility: notebooks/reproducibility.ipynb - Configuring Storage: guides/storage.md @@ -170,6 +168,14 @@ nav: - Overview: hybrid_search/hybrid_search.md - Comparing Rerankers: hybrid_search/eval.md - Airbnb financial data example: notebooks/hybrid_search.ipynb + - Reranking: + - Quickstart: reranking/index.md + - Cohere Reranker: reranking/cohere.md + - Linear Combination Reranker: reranking/linear_combination.md + - Cross Encoder Reranker: reranking/cross_encoder.md + - ColBERT Reranker: reranking/colbert.md + - OpenAI Reranker: reranking/openai.md + - Building Custom Rerankers: reranking/custom_reranker.md - Filtering: sql.md - Versioning & Reproducibility: notebooks/reproducibility.ipynb - Configuring Storage: guides/storage.md diff --git a/docs/requirements.txt b/docs/requirements.txt index 7f34591e..e5b8bbd3 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,5 +2,4 @@ mkdocs==1.5.3 mkdocs-jupyter==0.24.1 mkdocs-material==9.5.3 mkdocstrings[python]==0.20.0 -pydantic -mkdocs-ultralytics-plugin==0.0.44 \ No newline at end of file +pydantic \ No newline at end of file diff --git a/docs/src/reranking/cohere.md b/docs/src/reranking/cohere.md new file mode 100644 index 00000000..50b72e56 --- /dev/null +++ b/docs/src/reranking/cohere.md @@ -0,0 +1,75 @@ +# Cohere Reranker + +This re-ranker uses the [Cohere](https://cohere.ai/) API to rerank the search results. You can use this re-ranker by passing `CohereReranker()` to the `rerank()` method. Note that you'll either need to set the `COHERE_API_KEY` environment variable or pass the `api_key` argument to use this re-ranker. + + +!!! note + Supported Query Types: Hybrid, Vector, FTS + + +```python +import numpy +import lancedb +from lancedb.embeddings import get_registry +from lancedb.pydantic import LanceModel, Vector +from lancedb.rerankers import CohereReranker + +embedder = get_registry().get("sentence-transformers").create() +db = lancedb.connect("~/.lancedb") + +class Schema(LanceModel): + text: str = embedder.SourceField() + vector: Vector(embedder.ndims()) = embedder.VectorField() + +data = [ + {"text": "hello world"}, + {"text": "goodbye world"} + ] +tbl = db.create_table("test", schema=Schema, mode="overwrite") +tbl.add(data) +reranker = CohereReranker(api_key="key") + +# Run vector search with a reranker +result = tbl.search("hello").rerank(reranker=reranker).to_list() + +# Run FTS search with a reranker +result = tbl.search("hello", query_type="fts").rerank(reranker=reranker).to_list() + +# Run hybrid search with a reranker +tbl.create_fts_index("text", replace=True) +result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list() + +``` + +Accepted Arguments +---------------- +| Argument | Type | Default | Description | +| --- | --- | --- | --- | +| `model_name` | `str` | `"rerank-english-v2.0"` | The name of the reranker model to use. Available cohere models are: rerank-english-v2.0, rerank-multilingual-v2.0 | +| `column` | `str` | `"text"` | The name of the column to use as input to the cross encoder model. | +| `top_n` | `str` | `None` | The number of results to return. If None, will return all results. | +| `api_key` | `str` | `None` | The API key for the Cohere API. If not provided, the `COHERE_API_KEY` environment variable is used. | +| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all" is supported, will return relevance score along with the vector and/or fts scores depending on query type | + + + +## Supported Scores for each query type +You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type: + +### Hybrid Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ❌ Not Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) | + +### Vector Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ✅ Supported | Returns have vector(`_distance`) along with Hybrid Search score(`_relevance_score`) | + +### FTS Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ✅ Supported | Returns have FTS(`score`) along with Hybrid Search score(`_relevance_score`) | \ No newline at end of file diff --git a/docs/src/reranking/colbert.md b/docs/src/reranking/colbert.md new file mode 100644 index 00000000..ace5a9a8 --- /dev/null +++ b/docs/src/reranking/colbert.md @@ -0,0 +1,71 @@ +# ColBERT Reranker + +This re-ranker uses ColBERT model to rerank the search results. You can use this re-ranker by passing `ColbertReranker()` to the `rerank()` method. +!!! note + Supported Query Types: Hybrid, Vector, FTS + + +```python +import numpy +import lancedb +from lancedb.embeddings import get_registry +from lancedb.pydantic import LanceModel, Vector +from lancedb.rerankers import ColbertReranker + +embedder = get_registry().get("sentence-transformers").create() +db = lancedb.connect("~/.lancedb") + +class Schema(LanceModel): + text: str = embedder.SourceField() + vector: Vector(embedder.ndims()) = embedder.VectorField() + +data = [ + {"text": "hello world"}, + {"text": "goodbye world"} + ] +tbl = db.create_table("test", schema=Schema, mode="overwrite") +tbl.add(data) +reranker = ColbertReranker() + +# Run vector search with a reranker +result = tbl.search("hello").rerank(reranker=reranker).to_list() + +# Run FTS search with a reranker +result = tbl.search("hello", query_type="fts").rerank(reranker=reranker).to_list() + +# Run hybrid search with a reranker +tbl.create_fts_index("text", replace=True) +result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list() + +``` + +Accepted Arguments +---------------- +| Argument | Type | Default | Description | +| --- | --- | --- | --- | +| `model_name` | `str` | `"colbert-ir/colbertv2.0"` | The name of the reranker model to use.| +| `column` | `str` | `"text"` | The name of the column to use as input to the cross encoder model. | +| `device` | `str` | `None` | The device to use for the cross encoder model. If None, will use "cuda" if available, otherwise "cpu". | +| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all" is supported, will return relevance score along with the vector and/or fts scores depending on query type | + + +## Supported Scores for each query type +You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type: + +### Hybrid Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ❌ Not Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) | + +### Vector Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ✅ Supported | Returns have vector(`_distance`) along with Hybrid Search score(`_relevance_score`) | + +### FTS Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ✅ Supported | Returns have FTS(`score`) along with Hybrid Search score(`_relevance_score`) | \ No newline at end of file diff --git a/docs/src/reranking/cross_encoder.md b/docs/src/reranking/cross_encoder.md new file mode 100644 index 00000000..d6e3c54f --- /dev/null +++ b/docs/src/reranking/cross_encoder.md @@ -0,0 +1,70 @@ +# Cross Encoder Reranker + +This re-ranker uses Cross Encoder models from sentence-transformers to rerank the search results. You can use this re-ranker by passing `CrossEncoderReranker()` to the `rerank()` method. +!!! note + Supported Query Types: Hybrid, Vector, FTS + + +```python +import numpy +import lancedb +from lancedb.embeddings import get_registry +from lancedb.pydantic import LanceModel, Vector +from lancedb.rerankers import CrossEncoderReranker + +embedder = get_registry().get("sentence-transformers").create() +db = lancedb.connect("~/.lancedb") + +class Schema(LanceModel): + text: str = embedder.SourceField() + vector: Vector(embedder.ndims()) = embedder.VectorField() + +data = [ + {"text": "hello world"}, + {"text": "goodbye world"} + ] +tbl = db.create_table("test", schema=Schema, mode="overwrite") +tbl.add(data) +reranker = CrossEncoderReranker() + +# Run vector search with a reranker +result = tbl.search("hello").rerank(reranker=reranker).to_list() + +# Run FTS search with a reranker +result = tbl.search("hello", query_type="fts").rerank(reranker=reranker).to_list() + +# Run hybrid search with a reranker +tbl.create_fts_index("text", replace=True) +result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list() + +``` + +Accepted Arguments +---------------- +| Argument | Type | Default | Description | +| --- | --- | --- | --- | +| `model_name` | `str` | `""cross-encoder/ms-marco-TinyBERT-L-6"` | The name of the reranker model to use.| +| `column` | `str` | `"text"` | The name of the column to use as input to the cross encoder model. | +| `device` | `str` | `None` | The device to use for the cross encoder model. If None, will use "cuda" if available, otherwise "cpu". | +| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all" is supported, will return relevance score along with the vector and/or fts scores depending on query type | + +## Supported Scores for each query type +You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type: + +### Hybrid Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ❌ Not Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) | + +### Vector Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ✅ Supported | Returns have vector(`_distance`) along with Hybrid Search score(`_relevance_score`) | + +### FTS Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ✅ Supported | Returns have FTS(`score`) along with Hybrid Search score(`_relevance_score`) | \ No newline at end of file diff --git a/docs/src/reranking/custom_reranker.md b/docs/src/reranking/custom_reranker.md new file mode 100644 index 00000000..0fc00eb4 --- /dev/null +++ b/docs/src/reranking/custom_reranker.md @@ -0,0 +1,88 @@ +## Building Custom Rerankers +You can build your own custom reranker by subclassing the `Reranker` class and implementing the `rerank_hybrid()` method. Optionally, you can also implement the `rerank_vector()` and `rerank_fts()` methods if you want to support reranking for vector and FTS search separately. +Here's an example of a custom reranker that combines the results of semantic and full-text search using a linear combination of the scores. + +The `Reranker` base interface comes with a `merge_results()` method that can be used to combine the results of semantic and full-text search. This is a vanilla merging algorithm that simply concatenates the results and removes the duplicates without taking the scores into consideration. It only keeps the first copy of the row encountered. This works well in cases that don't require the scores of semantic and full-text search to combine the results. If you want to use the scores or want to support `return_score="all"`, you'll need to implement your own merging algorithm. + +```python + +from lancedb.rerankers import Reranker +import pyarrow as pa + +class MyReranker(Reranker): + def __init__(self, param1, param2, ..., return_score="relevance"): + super().__init__(return_score) + self.param1 = param1 + self.param2 = param2 + + def rerank_hybrid(self, query: str, vector_results: pa.Table, fts_results: pa.Table): + # Use the built-in merging function + combined_result = self.merge_results(vector_results, fts_results) + + # Do something with the combined results + # ... + + # Return the combined results + return combined_result + + def rerank_vector(self, query: str, vector_results: pa.Table): + # Do something with the vector results + # ... + + # Return the vector results + return vector_results + + def rerank_fts(self, query: str, fts_results: pa.Table): + # Do something with the FTS results + # ... + + # Return the FTS results + return fts_results + +``` + +### Example of a Custom Reranker +For the sake of simplicity let's build custom reranker that just enchances the Cohere Reranker by accepting a filter query, and accept other CohereReranker params as kwags. + +```python + +from typing import List, Union +import pandas as pd +from lancedb.rerankers import CohereReranker + +class ModifiedCohereReranker(CohereReranker): + def __init__(self, filters: Union[str, List[str]], **kwargs): + super().__init__(**kwargs) + filters = filters if isinstance(filters, list) else [filters] + self.filters = filters + + def rerank_hybrid(self, query: str, vector_results: pa.Table, fts_results: pa.Table)-> pa.Table: + combined_result = super().rerank_hybrid(query, vector_results, fts_results) + df = combined_result.to_pandas() + for filter in self.filters: + df = df.query("not text.str.contains(@filter)") + + return pa.Table.from_pandas(df) + + def rerank_vector(self, query: str, vector_results: pa.Table)-> pa.Table: + vector_results = super().rerank_vector(query, vector_results) + df = vector_results.to_pandas() + for filter in self.filters: + df = df.query("not text.str.contains(@filter)") + + return pa.Table.from_pandas(df) + + def rerank_fts(self, query: str, fts_results: pa.Table)-> pa.Table: + fts_results = super().rerank_fts(query, fts_results) + df = fts_results.to_pandas() + for filter in self.filters: + df = df.query("not text.str.contains(@filter)") + + return pa.Table.from_pandas(df) + +``` + +!!! tip + The `vector_results` and `fts_results` are pyarrow tables. Lean more about pyarrow tables [here](https://arrow.apache.org/docs/python). It can be convered to other data types like pandas dataframe, pydict, pylist etc. + + For example, You can convert them to pandas dataframes using `to_pandas()` method and perform any operations you want. After you are done, you can convert the dataframe back to pyarrow table using `pa.Table.from_pandas()` method and return it. \ No newline at end of file diff --git a/docs/src/reranking/index.md b/docs/src/reranking/index.md new file mode 100644 index 00000000..20199524 --- /dev/null +++ b/docs/src/reranking/index.md @@ -0,0 +1,60 @@ +Reranking is the process of reordering a list of items based on some criteria. In the context of search, reranking is used to reorder the search results returned by a search engine based on some criteria. This can be useful when the initial ranking of the search results is not satisfactory or when the user has provided additional information that can be used to improve the ranking of the search results. + +LanceDB comes with some built-in rerankers. Some of the rerankers that are available in LanceDB are: + +| Reranker | Description | Supported Query Types | +| --- | --- | --- | +| `LinearCombinationReranker` | Reranks search results based on a linear combination of FTS and vector search scores | Hybrid | +| `CohereReranker` | Uses cohere Reranker API to rerank results | Vector, FTS, Hybrid | +| `CrossEncoderReranker` | Uses a cross-encoder model to rerank search results | Vector, FTS, Hybrid | +| `ColbertReranker` | Uses a colbert model to rerank search results | Vector, FTS, Hybrid | +| `OpenaiReranker`(Experimental) | Uses OpenAI's chat model to rerank search results | Vector, FTS, Hybrid | + + +## Using a Reranker +Using rerankers is optional for vector and FTS. However, for hybrid search, rerankers are required. To use a reranker, you need to create an instance of the reranker and pass it to the `rerank` method of the query builder. + +```python +import numpy +import lancedb +from lancedb.embeddings import get_registry +from lancedb.pydantic import LanceModel, Vector +from lancedb.rerankers import CohereReranker + +embedder = get_registry().get("sentence-transformers").create() +db = lancedb.connect("~/.lancedb") + +class Schema(LanceModel): + text: str = embedder.SourceField() + vector: Vector(embedder.ndims()) = embedder.VectorField() + +data = [ + {"text": "hello world"}, + {"text": "goodbye world"} + ] +tbl = db.create_table("test", data) +reranker = CohereReranker(api_key="your_api_key") + +# Run vector search with a reranker +result = tbl.query("hello").rerank(reranker).to_list() + +# Run FTS search with a reranker +result = tbl.query("hello", query_type="fts").rerank(reranker).to_list() + +# Run hybrid search with a reranker +tbl.create_fts_index("text") +result = tbl.query("hello", query_type="hybrid").rerank(reranker).to_list() +``` + +## Available Rerankers +LanceDB comes with some built-in rerankers. Here are some of the rerankers that are available in LanceDB: + +- [Cohere Reranker](./cohere.md) +- [Cross Encoder Reranker](./cross_encoder.md) +- [ColBERT Reranker](./colbert.md) +- [OpenAI Reranker](./openai.md) +- [Linear Combination Reranker](./linear_combination.md) + +## Creating Custom Rerankers + +LanceDB also you to create custom rerankers by extending the base `Reranker` class. The custom reranker should implement the `rerank` method that takes a list of search results and returns a reranked list of search results. This is covered in more detail in the [Creating Custom Rerankers](./custom_reranker.md) section. \ No newline at end of file diff --git a/docs/src/reranking/linear_combination.md b/docs/src/reranking/linear_combination.md new file mode 100644 index 00000000..4a27907c --- /dev/null +++ b/docs/src/reranking/linear_combination.md @@ -0,0 +1,52 @@ +# Linear Combination Reranker + +This is the default re-ranker used by LanceDB hybrid search. It combines the results of semantic and full-text search using a linear combination of the scores. The weights for the linear combination can be specified. It defaults to 0.7, i.e, 70% weight for semantic search and 30% weight for full-text search. + +!!! note + Supported Query Types: Hybrid + + +```python +import numpy +import lancedb +from lancedb.embeddings import get_registry +from lancedb.pydantic import LanceModel, Vector +from lancedb.rerankers import LinearCombinationReranker + +embedder = get_registry().get("sentence-transformers").create() +db = lancedb.connect("~/.lancedb") + +class Schema(LanceModel): + text: str = embedder.SourceField() + vector: Vector(embedder.ndims()) = embedder.VectorField() + +data = [ + {"text": "hello world"}, + {"text": "goodbye world"} + ] +tbl = db.create_table("test", schema=Schema, mode="overwrite") +tbl.add(data) +reranker = LinearCombinationReranker() + +# Run hybrid search with a reranker +tbl.create_fts_index("text", replace=True) +result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list() + +``` + +Accepted Arguments +---------------- +| Argument | Type | Default | Description | +| --- | --- | --- | --- | +| `weight` | `float` | `0.7` | The weight to use for the semantic search score. The weight for the full-text search score is `1 - weights`. | +| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all", will return all scores from the vector and FTS search along with the relevance score. | + + +## Supported Scores for each query type +You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type: + +### Hybrid Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ✅ Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_distance`) | \ No newline at end of file diff --git a/docs/src/reranking/openai.md b/docs/src/reranking/openai.md new file mode 100644 index 00000000..ec935910 --- /dev/null +++ b/docs/src/reranking/openai.md @@ -0,0 +1,73 @@ +# OpenAI Reranker (Experimental) + +This re-ranker uses OpenAI chat model to rerank the search results. You can use this re-ranker by passing `OpenAI()` to the `rerank()` method. +!!! note + Supported Query Types: Hybrid, Vector, FTS + +!!! warning + This re-ranker is experimental. OpenAI doesn't have a dedicated reranking model, so we are using the chat model for reranking. + +```python +import numpy +import lancedb +from lancedb.embeddings import get_registry +from lancedb.pydantic import LanceModel, Vector +from lancedb.rerankers import OpenaiReranker + +embedder = get_registry().get("sentence-transformers").create() +db = lancedb.connect("~/.lancedb") + +class Schema(LanceModel): + text: str = embedder.SourceField() + vector: Vector(embedder.ndims()) = embedder.VectorField() + +data = [ + {"text": "hello world"}, + {"text": "goodbye world"} + ] +tbl = db.create_table("test", schema=Schema, mode="overwrite") +tbl.add(data) +reranker = OpenaiReranker() + +# Run vector search with a reranker +result = tbl.search("hello").rerank(reranker=reranker).to_list() + +# Run FTS search with a reranker +result = tbl.search("hello", query_type="fts").rerank(reranker=reranker).to_list() + +# Run hybrid search with a reranker +tbl.create_fts_index("text", replace=True) +result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list() + +``` + +Accepted Arguments +---------------- +| Argument | Type | Default | Description | +| --- | --- | --- | --- | +| `model_name` | `str` | `"gpt-4-turbo-preview"` | The name of the reranker model to use.| +| `column` | `str` | `"text"` | The name of the column to use as input to the cross encoder model. | +| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all" is supported, will return relevance score along with the vector and/or fts scores depending on query type | +| `api_key` | str | `None` | The API key to use. If None, will use the OPENAI_API_KEY environment variable. + + +## Supported Scores for each query type +You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type: + +### Hybrid Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ❌ Not Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) | + +### Vector Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ✅ Supported | Returns have vector(`_distance`) along with Hybrid Search score(`_relevance_score`) | + +### FTS Search +|`return_score`| Status | Description | +| --- | --- | --- | +| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column | +| `all` | ✅ Supported | Returns have FTS(`score`) along with Hybrid Search score(`_relevance_score`) | \ No newline at end of file diff --git a/docs/test/md_testing.py b/docs/test/md_testing.py index 305e3668..eef77bcd 100644 --- a/docs/test/md_testing.py +++ b/docs/test/md_testing.py @@ -15,6 +15,7 @@ excluded_globs = [ "../src/ann_indexes.md", "../src/basic.md", "../src/hybrid_search/hybrid_search.md", + "../src/reranking/*.md", ] python_prefix = "py" From 5d8c91256c0fc807ceabad55bf0ba746270004f0 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Thu, 11 Apr 2024 15:20:29 +0530 Subject: [PATCH 09/19] fix(python): Update to latest cohere reranking api (#1212) Fixes https://github.com/lancedb/lancedb/issues/1196 Cohere introduced a breaking change in their reranker API starting version 5.0.0. More context in discussion here https://github.com/cohere-ai/cohere-python/issues/446 --- python/python/lancedb/rerankers/cohere.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/python/python/lancedb/rerankers/cohere.py b/python/python/lancedb/rerankers/cohere.py index a1ccb060..373e76b8 100644 --- a/python/python/lancedb/rerankers/cohere.py +++ b/python/python/lancedb/rerankers/cohere.py @@ -1,4 +1,5 @@ import os +import semver from functools import cached_property from typing import Union @@ -42,6 +43,14 @@ class CohereReranker(Reranker): @cached_property def _client(self): cohere = attempt_import_or_raise("cohere") + # ensure version is at least 0.5.0 + if ( + hasattr(cohere, "__version__") + and semver.compare(cohere.__version__, "5.0.0") < 0 + ): + raise ValueError( + f"cohere version must be at least 0.5.0, found {cohere.__version__}" + ) if os.environ.get("COHERE_API_KEY") is None and self.api_key is None: raise ValueError( "COHERE_API_KEY not set. Either set it in your environment or \ @@ -51,11 +60,14 @@ class CohereReranker(Reranker): def _rerank(self, result_set: pa.Table, query: str): docs = result_set[self.column].to_pylist() - results = self._client.rerank( + response = self._client.rerank( query=query, documents=docs, top_n=self.top_n, model=self.model_name, + ) + results = ( + response.results ) # returns list (text, idx, relevance) attributes sorted descending by score indices, scores = list( zip(*[(result.index, result.relevance_score) for result in results]) From d155e82723c7d90cb52e70966aa470eb7de0888f Mon Sep 17 00:00:00 2001 From: Prashanth Rao <35005448+prrao87@users.noreply.github.com> Date: Thu, 11 Apr 2024 06:02:08 -0400 Subject: [PATCH 10/19] [docs] Fix broken links and clarify language in integrations docs (#1209) This PR does the following: - Fixes broken/outdated URLs - Adds clarity to the way DuckDB/LanceDB integration works via Arrow --- docs/mkdocs.yml | 7 ++++--- docs/src/python/duckdb.md | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 33f6c85f..c4d3857c 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -118,9 +118,10 @@ nav: - Pandas and PyArrow: python/pandas_and_pyarrow.md - Polars: python/polars_arrow.md - DuckDB: python/duckdb.md - - LangChain 🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html - - LangChain JS/TS 🔗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb - - LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html + - LangChain: + - LangChain 🔗: https://python.langchain.com/docs/integrations/vectorstores/lancedb/ + - LangChain JS/TS 🔗: https://js.langchain.com/docs/integrations/vectorstores/lancedb + - LlamaIndex 🦙: https://docs.llamaindex.ai/en/stable/examples/vector_stores/LanceDBIndexDemo/ - Pydantic: python/pydantic.md - Voxel51: integrations/voxel51.md - PromptTools: integrations/prompttools.md diff --git a/docs/src/python/duckdb.md b/docs/src/python/duckdb.md index 6b301b71..08ef66c5 100644 --- a/docs/src/python/duckdb.md +++ b/docs/src/python/duckdb.md @@ -24,7 +24,8 @@ data = [ table = db.create_table("pd_table", data=data) ``` -To query the table, first call `to_lance` to convert the table to a "dataset", which is an object that can be queried by DuckDB. Then all you need to do is reference that dataset by the same name in your SQL query. +The `to_lance` method converts the LanceDB table to a `LanceDataset`, which is accessible to DuckDB through the Arrow compatibility layer. +To query the resulting Lance dataset in DuckDB, all you need to do is reference the dataset by the same name in your SQL query. ```python import duckdb From b039765d50bea25c9d9fe6f922e686b4ed4f7709 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Thu, 11 Apr 2024 17:30:45 +0530 Subject: [PATCH 11/19] docs : Embedding functions quickstart and minor fixes (#1217) --- .../embeddings/default_embedding_functions.md | 37 +++++++---- docs/src/embeddings/index.md | 62 ++++++++++++++++++- python/pyproject.toml | 1 - 3 files changed, 87 insertions(+), 13 deletions(-) diff --git a/docs/src/embeddings/default_embedding_functions.md b/docs/src/embeddings/default_embedding_functions.md index 67422025..d04be52e 100644 --- a/docs/src/embeddings/default_embedding_functions.md +++ b/docs/src/embeddings/default_embedding_functions.md @@ -154,9 +154,12 @@ Allows you to set parameters when registering a `sentence-transformers` object. !!! note "BAAI Embeddings example" Here is an example that uses BAAI embedding model from the HuggingFace Hub [supported models](https://huggingface.co/models?library=sentence-transformers) ```python + import lancedb + from lancedb.pydantic import LanceModel, Vector + from lancedb.embeddings import get_registry + db = lancedb.connect("/tmp/db") - registry = EmbeddingFunctionRegistry.get_instance() - model = registry.get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu") + model = get_registry.get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu") class Words(LanceModel): text: str = model.SourceField() @@ -165,7 +168,7 @@ Allows you to set parameters when registering a `sentence-transformers` object. table = db.create_table("words", schema=Words) table.add( [ - {"text": "hello world"} + {"text": "hello world"}, {"text": "goodbye world"} ] ) @@ -213,18 +216,21 @@ LanceDB registers the OpenAI embeddings function in the registry by default, as ```python +import lancedb +from lancedb.pydantic import LanceModel, Vector +from lancedb.embeddings import get_registry + db = lancedb.connect("/tmp/db") -registry = EmbeddingFunctionRegistry.get_instance() -func = registry.get("openai").create() +func = get_registry().get("openai").create(name="text-embedding-ada-002") class Words(LanceModel): text: str = func.SourceField() vector: Vector(func.ndims()) = func.VectorField() -table = db.create_table("words", schema=Words) +table = db.create_table("words", schema=Words, mode="overwrite") table.add( [ - {"text": "hello world"} + {"text": "hello world"}, {"text": "goodbye world"} ] ) @@ -353,6 +359,10 @@ Supported parameters (to be passed in `create` method) are: Usage Example: ```python +import lancedb +from lancedb.pydantic import LanceModel, Vector +from lancedb.embeddings import get_registry + model = get_registry().get("bedrock-text").create() class TextModel(LanceModel): @@ -387,10 +397,12 @@ This embedding function supports ingesting images as both bytes and urls. You ca LanceDB supports ingesting images directly from accessible links. ```python +import lancedb +from lancedb.pydantic import LanceModel, Vector +from lancedb.embeddings import get_registry db = lancedb.connect(tmp_path) -registry = EmbeddingFunctionRegistry.get_instance() -func = registry.get("open-clip").create() +func = get_registry.get("open-clip").create() class Images(LanceModel): label: str @@ -465,9 +477,12 @@ This function is registered as `imagebind` and supports Audio, Video and Text mo Below is an example demonstrating how the API works: ```python +import lancedb +from lancedb.pydantic import LanceModel, Vector +from lancedb.embeddings import get_registry + db = lancedb.connect(tmp_path) -registry = EmbeddingFunctionRegistry.get_instance() -func = registry.get("imagebind").create() +func = get_registry.get("imagebind").create() class ImageBindModel(LanceModel): text: str diff --git a/docs/src/embeddings/index.md b/docs/src/embeddings/index.md index bd4bbbf9..0752cabe 100644 --- a/docs/src/embeddings/index.md +++ b/docs/src/embeddings/index.md @@ -11,4 +11,64 @@ LanceDB supports 3 methods of working with embeddings. that extends the default embedding functions. For python users, there is also a legacy [with_embeddings API](./legacy.md). -It is retained for compatibility and will be removed in a future version. \ No newline at end of file +It is retained for compatibility and will be removed in a future version. + +## Quickstart + +To get started with embeddings, you can use the built-in embedding functions. + +### OpenAI Embedding function +LanceDB registers the OpenAI embeddings function in the registry as `openai`. You can pass any supported model name to the `create`. By default it uses `"text-embedding-ada-002"`. + +```python +import lancedb +from lancedb.pydantic import LanceModel, Vector +from lancedb.embeddings import get_registry + +db = lancedb.connect("/tmp/db") +func = get_registry().get("openai").create(name="text-embedding-ada-002") + +class Words(LanceModel): + text: str = func.SourceField() + vector: Vector(func.ndims()) = func.VectorField() + +table = db.create_table("words", schema=Words, mode="overwrite") +table.add( + [ + {"text": "hello world"}, + {"text": "goodbye world"} + ] + ) + +query = "greetings" +actual = table.search(query).limit(1).to_pydantic(Words)[0] +print(actual.text) +``` + +### Sentence Transformers Embedding function +LanceDB registers the Sentence Transformers embeddings function in the registry as `sentence-transformers`. You can pass any supported model name to the `create`. By default it uses `"sentence-transformers/paraphrase-MiniLM-L6-v2"`. + +```python +import lancedb +from lancedb.pydantic import LanceModel, Vector +from lancedb.embeddings import get_registry + +db = lancedb.connect("/tmp/db") +model = get_registry().get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu") + +class Words(LanceModel): + text: str = model.SourceField() + vector: Vector(model.ndims()) = model.VectorField() + +table = db.create_table("words", schema=Words) +table.add( + [ + {"text": "hello world"}, + {"text": "goodbye world"} + ] +) + +query = "greetings" +actual = table.search(query).limit(1).to_pydantic(Words)[0] +print(actual.text) +``` \ No newline at end of file diff --git a/python/pyproject.toml b/python/pyproject.toml index 4338373a..29a8b800 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -65,7 +65,6 @@ docs = [ "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]", - "mkdocs-ultralytics-plugin==0.0.44", ] clip = ["torch", "pillow", "open-clip"] embeddings = [ From deb947ddbd91ac67854c46bf37bc1d44bf7d45a1 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Thu, 11 Apr 2024 14:58:51 -0700 Subject: [PATCH 12/19] doc: fix typo, broken links (#1218) --- docs/mkdocs.yml | 4 ++-- python/python/lancedb/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index c4d3857c..e6a28a33 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -193,8 +193,8 @@ nav: - Pandas and PyArrow: python/pandas_and_pyarrow.md - Polars: python/polars_arrow.md - DuckDB: python/duckdb.md - - LangChain 🦜️🔗↗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html - - LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb + - LangChain 🦜️🔗↗: https://python.langchain.com/docs/integrations/vectorstores/lancedb + - LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/integrations/vectorstores/lancedb - LlamaIndex 🦙↗: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html - Pydantic: python/pydantic.md - Voxel51: integrations/voxel51.md diff --git a/python/python/lancedb/__init__.py b/python/python/lancedb/__init__.py index 99d66f58..6c542852 100644 --- a/python/python/lancedb/__init__.py +++ b/python/python/lancedb/__init__.py @@ -83,7 +83,7 @@ def connect( >>> db = lancedb.connect("s3://my-bucket/lancedb") - Connect to LancdDB cloud: + Connect to LanceDB cloud: >>> db = lancedb.connect("db://my_database", api_key="ldb_...") From 1d0dd9a8b84971d2403d2e0915a2bdee98c2a697 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Fri, 12 Apr 2024 15:08:39 -0700 Subject: [PATCH 13/19] feat: bump lance version from 0.10.10 to 0.10.12 (#1219) --- Cargo.toml | 8 ++++---- python/pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f1689478..326e3899 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"] categories = ["database-implementations"] [workspace.dependencies] -lance = { "version" = "=0.10.10", "features" = ["dynamodb"] } -lance-index = { "version" = "=0.10.10" } -lance-linalg = { "version" = "=0.10.10" } -lance-testing = { "version" = "=0.10.10" } +lance = { "version" = "=0.10.12", "features" = ["dynamodb"] } +lance-index = { "version" = "=0.10.12" } +lance-linalg = { "version" = "=0.10.12" } +lance-testing = { "version" = "=0.10.12" } # Note that this one does not include pyarrow arrow = { version = "50.0", optional = false } arrow-array = "50.0" diff --git a/python/pyproject.toml b/python/pyproject.toml index 29a8b800..b2e80263 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -3,7 +3,7 @@ name = "lancedb" version = "0.6.8" dependencies = [ "deprecation", - "pylance==0.10.10", + "pylance==0.10.12", "ratelimiter~=1.0", "requests>=2.31.0", "retry>=0.9.2", From 7e023c1ef2f13de541daea1f99834f7fc3598ea6 Mon Sep 17 00:00:00 2001 From: Lance Release Date: Fri, 12 Apr 2024 22:09:12 +0000 Subject: [PATCH 14/19] =?UTF-8?q?[python]=20Bump=20version:=200.6.8=20?= =?UTF-8?q?=E2=86=92=200.6.9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/.bumpversion.cfg | 2 +- python/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/.bumpversion.cfg b/python/.bumpversion.cfg index 5c1e81e7..09bbca81 100644 --- a/python/.bumpversion.cfg +++ b/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.8 +current_version = 0.6.9 commit = True message = [python] Bump version: {current_version} → {new_version} tag = True diff --git a/python/pyproject.toml b/python/pyproject.toml index b2e80263..e4f04513 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lancedb" -version = "0.6.8" +version = "0.6.9" dependencies = [ "deprecation", "pylance==0.10.12", From c7fbc4aaeec314b48402e8d901bc113bee483f72 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Sat, 13 Apr 2024 15:02:57 -0700 Subject: [PATCH 15/19] docs: fix minor typo (#1220) --- node/src/index.ts | 2 +- nodejs/lancedb/connection.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/node/src/index.ts b/node/src/index.ts index fe6656d4..f018e3fb 100644 --- a/node/src/index.ts +++ b/node/src/index.ts @@ -163,7 +163,7 @@ export interface CreateTableOptions { /** * Connect to a LanceDB instance at the given URI. * - * Accpeted formats: + * Accepted formats: * * - `/path/to/database` - local database * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage diff --git a/nodejs/lancedb/connection.ts b/nodejs/lancedb/connection.ts index 66502f35..8bc8b7f5 100644 --- a/nodejs/lancedb/connection.ts +++ b/nodejs/lancedb/connection.ts @@ -20,7 +20,7 @@ import { Table as ArrowTable, Schema } from "apache-arrow"; /** * Connect to a LanceDB instance at the given URI. * - * Accpeted formats: + * Accepted formats: * * - `/path/to/database` - local database * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage From 2e7ab373dc1d2d623200834ac6a204172e2045c8 Mon Sep 17 00:00:00 2001 From: Bert Date: Wed, 17 Apr 2024 09:29:10 -0400 Subject: [PATCH 16/19] fix: update lance to 0.10.13 (#1226) --- Cargo.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 326e3899..6e5592d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"] categories = ["database-implementations"] [workspace.dependencies] -lance = { "version" = "=0.10.12", "features" = ["dynamodb"] } -lance-index = { "version" = "=0.10.12" } -lance-linalg = { "version" = "=0.10.12" } -lance-testing = { "version" = "=0.10.12" } +lance = { "version" = "=0.10.13", "features" = ["dynamodb"] } +lance-index = { "version" = "=0.10.13" } +lance-linalg = { "version" = "=0.10.13" } +lance-testing = { "version" = "=0.10.13" } # Note that this one does not include pyarrow arrow = { version = "50.0", optional = false } arrow-array = "50.0" From 1e5ccb161415cb578c65657a816a1f3fb7b9be8a Mon Sep 17 00:00:00 2001 From: Rob Meng Date: Fri, 19 Apr 2024 10:31:39 -0400 Subject: [PATCH 17/19] chore: upgrade lance to 0.10.15 (#1229) --- Cargo.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6e5592d8..c867f684 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"] categories = ["database-implementations"] [workspace.dependencies] -lance = { "version" = "=0.10.13", "features" = ["dynamodb"] } -lance-index = { "version" = "=0.10.13" } -lance-linalg = { "version" = "=0.10.13" } -lance-testing = { "version" = "=0.10.13" } +lance = { "version" = "=0.10.15", "features" = ["dynamodb"] } +lance-index = { "version" = "=0.10.15" } +lance-linalg = { "version" = "=0.10.15" } +lance-testing = { "version" = "=0.10.15" } # Note that this one does not include pyarrow arrow = { version = "50.0", optional = false } arrow-array = "50.0" From c1a7d6547347d1b84df982f7412331352a548508 Mon Sep 17 00:00:00 2001 From: Alex Kohler Date: Fri, 19 Apr 2024 21:55:16 -0400 Subject: [PATCH 18/19] chore: fix get_registry call in baai embeddings example (#1230) --- docs/src/embeddings/default_embedding_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/embeddings/default_embedding_functions.md b/docs/src/embeddings/default_embedding_functions.md index d04be52e..be05d9fa 100644 --- a/docs/src/embeddings/default_embedding_functions.md +++ b/docs/src/embeddings/default_embedding_functions.md @@ -159,7 +159,7 @@ Allows you to set parameters when registering a `sentence-transformers` object. from lancedb.embeddings import get_registry db = lancedb.connect("/tmp/db") - model = get_registry.get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu") + model = get_registry().get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu") class Words(LanceModel): text: str = model.SourceField() From 431f94e5644936e67298e81285c76d8fd1858d29 Mon Sep 17 00:00:00 2001 From: Lance Release Date: Mon, 22 Apr 2024 17:42:24 +0000 Subject: [PATCH 19/19] =?UTF-8?q?[python]=20Bump=20version:=200.6.9=20?= =?UTF-8?q?=E2=86=92=200.6.10?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/.bumpversion.cfg | 2 +- python/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/.bumpversion.cfg b/python/.bumpversion.cfg index 09bbca81..e839dc99 100644 --- a/python/.bumpversion.cfg +++ b/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.9 +current_version = 0.6.10 commit = True message = [python] Bump version: {current_version} → {new_version} tag = True diff --git a/python/pyproject.toml b/python/pyproject.toml index e4f04513..b45a2da7 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lancedb" -version = "0.6.9" +version = "0.6.10" dependencies = [ "deprecation", "pylance==0.10.12",