[python] Bump version: 0.6.8 → 0.6.9

feat: bump lance version from 0.10.10 to 0.10.12 (#1219 )
doc: fix typo, broken links (#1218 )
2025-12-23 13:29:57 +00:00 · 2024-04-12 22:09:12 +00:00 · 2024-04-12 15:08:39 -07:00 · 2024-04-11 14:58:51 -07:00 · 2024-04-11 17:30:45 +05:30 · 2024-04-11 15:32:08 +05:30
79 changed files with 4072 additions and 1470 deletions
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.16
+current_version = 0.4.17
 commit = True
 message = Bump version: {current_version} → {new_version}
 tag = True
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -8,6 +8,9 @@ env:
  # This env var is used by Swatinem/rust-cache@v2 for the cache
  # key, so we set it to make sure it is always consistent.
  CARGO_TERM_COLOR: always
+  # Up-to-date compilers needed for fp16kernels.
+  CC: gcc-12
+  CXX: g++-12

 jobs:
  build:
--- a/.github/workflows/node.yml
+++ b/.github/workflows/node.yml
@@ -107,6 +107,7 @@ jobs:
      AWS_ENDPOINT: http://localhost:4566
      # this one is for dynamodb
      DYNAMODB_ENDPOINT: http://localhost:4566
+      ALLOW_HTTP: true
    steps:
    - uses: actions/checkout@v4
      with:
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -28,6 +28,10 @@ jobs:
      run:
        shell: bash
        working-directory: nodejs
+    env:
+      # Need up-to-date compilers for kernels
+      CC: gcc-12
+      CXX: g++-12
    steps:
    - uses: actions/checkout@v4
      with:
@@ -81,7 +85,12 @@ jobs:
      run: |
        npm ci
        npm run build
+    - name: Setup localstack
+      working-directory: .
+      run: docker compose up --detach --wait
    - name: Test
+      env:
+        S3_TEST: "1"
      run: npm run test
  macos:
    timeout-minutes: 30
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -6,6 +6,8 @@ on:

 jobs:
  linux:
+    # Only runs on tags that matches the python-make-release action
+    if: startsWith(github.ref, 'refs/tags/python-v')
    name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
    timeout-minutes: 60
    strategy:
@@ -44,6 +46,8 @@ jobs:
          token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          repo: "pypi"
  mac:
+    # Only runs on tags that matches the python-make-release action
+    if: startsWith(github.ref, 'refs/tags/python-v')
    timeout-minutes: 60
    runs-on: ${{ matrix.config.runner }}
    strategy:
@@ -76,6 +80,8 @@ jobs:
          token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
          repo: "pypi"
  windows:
+    # Only runs on tags that matches the python-make-release action
+    if: startsWith(github.ref, 'refs/tags/python-v')
    timeout-minutes: 60
    runs-on: windows-latest
    strategy:
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -99,6 +99,8 @@ jobs:
          workspaces: python
      - uses: ./.github/workflows/build_linux_wheel
      - uses: ./.github/workflows/run_tests
+        with:
+          integration: true
      # Make sure wheels are not included in the Rust cache
      - name: Delete wheels
        run: rm -rf target/wheels
@@ -190,4 +192,4 @@ jobs:
          pip install -e .[tests]
          pip install tantivy
      - name: Run tests
-        run: pytest -m "not slow" -x -v --durations=30 python/tests
+        run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
--- a/.github/workflows/run_tests/action.yml
+++ b/.github/workflows/run_tests/action.yml
@@ -5,6 +5,10 @@ inputs:
  python-minor-version:
    required: true
    description: "8 9 10 11 12"
+  integration:
+    required: false
+    description: "Run integration tests"
+    default: "false"
 runs:
  using: "composite"
  steps:
@@ -12,6 +16,16 @@ runs:
      shell: bash
      run: |
        pip3 install $(ls target/wheels/lancedb-*.whl)[tests,dev]
-    - name: pytest
+    - name: Setup localstack for integration tests
+      if: ${{ inputs.integration == 'true' }}
      shell: bash
+      working-directory: .
+      run: docker compose up --detach --wait
+    - name: pytest (with integration)
+      shell: bash
+      if: ${{ inputs.integration == 'true' }}
      run: pytest -m "not slow" -x -v --durations=30 python/python/tests
+    - name: pytest (no integration tests)
+      shell: bash
+      if: ${{ inputs.integration != 'true' }}
+      run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/python/tests
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -76,6 +76,9 @@ jobs:
          sudo apt install -y protobuf-compiler libssl-dev
    - name: Build
      run: cargo build --all-features
+    - name: Start S3 integration test environment
+      working-directory: .
+      run: docker compose up --detach --wait
    - name: Run tests
      run: cargo test --all-features
    - name: Run examples
@@ -105,7 +108,8 @@ jobs:
      - name: Build
        run: cargo build --all-features
      - name: Run tests
-        run: cargo test --all-features
+        # Run with everything except the integration tests.
+        run: cargo test --features remote,fp16kernels
  windows:
    runs-on: windows-2022
    steps:
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
 categories = ["database-implementations"]

 [workspace.dependencies]
-lance = { "version" = "=0.10.9", "features" = ["dynamodb"] }
-lance-index = { "version" = "=0.10.9" }
-lance-linalg = { "version" = "=0.10.9" }
-lance-testing = { "version" = "=0.10.9" }
+lance = { "version" = "=0.10.12", "features" = ["dynamodb"] }
+lance-index = { "version" = "=0.10.12" }
+lance-linalg = { "version" = "=0.10.12" }
+lance-testing = { "version" = "=0.10.12" }
 # Note that this one does not include pyarrow
 arrow = { version = "50.0", optional = false }
 arrow-array = "50.0"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,18 +1,18 @@
 version: "3.9"
 services:
  localstack:
-    image: localstack/localstack:0.14
+    image: localstack/localstack:3.3
    ports:
      - 4566:4566
    environment:
-      - SERVICES=s3,dynamodb
+      - SERVICES=s3,dynamodb,kms
      - DEBUG=1
      - LS_LOG=trace
      - DOCKER_HOST=unix:///var/run/docker.sock
      - AWS_ACCESS_KEY_ID=ACCESSKEY
      - AWS_SECRET_ACCESS_KEY=SECRETKEY
    healthcheck:
-      test: [ "CMD", "curl", "-f", "http://localhost:4566/health" ]
+      test: [ "CMD", "curl", "-s", "http://localhost:4566/_localstack/health" ]
      interval: 5s
      retries: 3
      start_period: 10s
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -57,16 +57,6 @@ plugins:
            - https://arrow.apache.org/docs/objects.inv
            - https://pandas.pydata.org/docs/objects.inv
  - mkdocs-jupyter
-  - ultralytics:
-      verbose: True
-      enabled: True
-      default_image: "assets/lancedb_and_lance.png" # Default image for all pages
-      add_image: True # Automatically add meta image
-      add_keywords: True # Add page keywords in the header tag
-      add_share_buttons: True # Add social share buttons
-      add_authors: False # Display page authors
-      add_desc: False
-      add_dates: False

 markdown_extensions:
  - admonition
@@ -104,6 +94,14 @@ nav:
              - Overview: hybrid_search/hybrid_search.md
              - Comparing Rerankers: hybrid_search/eval.md
              - Airbnb financial data example: notebooks/hybrid_search.ipynb
+          - Reranking:
+              - Quickstart: reranking/index.md
+              - Cohere Reranker: reranking/cohere.md
+              - Linear Combination Reranker: reranking/linear_combination.md
+              - Cross Encoder Reranker: reranking/cross_encoder.md
+              - ColBERT Reranker: reranking/colbert.md
+              - OpenAI Reranker: reranking/openai.md
+              - Building Custom Rerankers: reranking/custom_reranker.md
          - Filtering: sql.md
          - Versioning & Reproducibility: notebooks/reproducibility.ipynb
          - Configuring Storage: guides/storage.md
@@ -120,9 +118,10 @@ nav:
          - Pandas and PyArrow: python/pandas_and_pyarrow.md
          - Polars: python/polars_arrow.md
          - DuckDB: python/duckdb.md
-          - LangChain 🔗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
-          - LangChain JS/TS 🔗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb
-          - LlamaIndex 🦙: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
+          - LangChain:
+            - LangChain 🔗: https://python.langchain.com/docs/integrations/vectorstores/lancedb/
+            - LangChain JS/TS 🔗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
+          - LlamaIndex 🦙: https://docs.llamaindex.ai/en/stable/examples/vector_stores/LanceDBIndexDemo/
          - Pydantic: python/pydantic.md
          - Voxel51: integrations/voxel51.md
          - PromptTools: integrations/prompttools.md
@@ -143,7 +142,6 @@ nav:
              - TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
          - 🦀 Rust:
              - Overview: examples/examples_rust.md
-      - 🔧 CLI & Config: cli_config.md
      - 💭 FAQs: faq.md
      - ⚙️ API reference:
          - 🐍 Python: python/python.md
@@ -171,6 +169,14 @@ nav:
          - Overview: hybrid_search/hybrid_search.md
          - Comparing Rerankers: hybrid_search/eval.md
          - Airbnb financial data example: notebooks/hybrid_search.ipynb
+      - Reranking:
+          - Quickstart: reranking/index.md
+          - Cohere Reranker: reranking/cohere.md
+          - Linear Combination Reranker: reranking/linear_combination.md
+          - Cross Encoder Reranker: reranking/cross_encoder.md
+          - ColBERT Reranker: reranking/colbert.md
+          - OpenAI Reranker: reranking/openai.md
+          - Building Custom Rerankers: reranking/custom_reranker.md
      - Filtering: sql.md
      - Versioning & Reproducibility: notebooks/reproducibility.ipynb
      - Configuring Storage: guides/storage.md
@@ -187,8 +193,8 @@ nav:
      - Pandas and PyArrow: python/pandas_and_pyarrow.md
      - Polars: python/polars_arrow.md
      - DuckDB: python/duckdb.md
-      - LangChain 🦜️🔗↗: https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/lancedb.html
-      - LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/lancedb
+      - LangChain 🦜️🔗↗: https://python.langchain.com/docs/integrations/vectorstores/lancedb
+      - LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
      - LlamaIndex 🦙↗: https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html
      - Pydantic: python/pydantic.md
      - Voxel51: integrations/voxel51.md
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -3,4 +3,3 @@ mkdocs-jupyter==0.24.1
 mkdocs-material==9.5.3
 mkdocstrings[python]==0.20.0
 pydantic
-mkdocs-ultralytics-plugin==0.0.44
--- a/docs/src/cli_config.md
+++ b/docs/src/cli_config.md
@@ -1,51 +0,0 @@
-
-# CLI & Config
-
-## LanceDB CLI
-Once lanceDB is installed, you can access the CLI using `lancedb` command on the console.
-
-```
-lancedb
-```
-
-This lists out all the various command-line options available. You can get the usage or help for a particular command.
-
-```
-lancedb {command} --help
-```
-
-## LanceDB config
-LanceDB uses a global config file to store certain settings. These settings are configurable using the lanceDB cli.
-To view your config settings, you can use:
-
-```
-lancedb config
-```
-
-These config parameters can be tuned using the cli.
-
-```
-lancedb {config_name} --{argument}
-```
-
-## LanceDB Opt-in Diagnostics
-When enabled, LanceDB will send anonymous events to help us improve LanceDB. These diagnostics are used only for error reporting and no data is collected. Error & stats allow us to automate certain aspects of bug reporting, prioritization of fixes and feature requests.
-These diagnostics are opt-in and can be enabled or disabled using the `lancedb diagnostics` command. These are enabled by default.
-
-### Get usage help
-
-```
-lancedb diagnostics --help
-```
-
-### Disable diagnostics
-
-```
-lancedb diagnostics --disabled
-```
-
-### Enable diagnostics
-
-```
-lancedb diagnostics --enabled
-```
--- a/docs/src/embeddings/default_embedding_functions.md
+++ b/docs/src/embeddings/default_embedding_functions.md
@@ -154,9 +154,12 @@ Allows you to set parameters when registering a `sentence-transformers` object.
 !!! note "BAAI Embeddings example"
    Here is an example that uses BAAI embedding model from the HuggingFace Hub [supported models](https://huggingface.co/models?library=sentence-transformers)
    ```python
+    import lancedb
+    from lancedb.pydantic import LanceModel, Vector
+    from lancedb.embeddings import get_registry
+
    db = lancedb.connect("/tmp/db")
-    registry = EmbeddingFunctionRegistry.get_instance()
-    model = registry.get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu")
+    model = get_registry.get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu")

    class Words(LanceModel):
        text: str = model.SourceField()
@@ -165,7 +168,7 @@ Allows you to set parameters when registering a `sentence-transformers` object.
    table = db.create_table("words", schema=Words)
    table.add(
        [
-            {"text": "hello world"}
+            {"text": "hello world"},
            {"text": "goodbye world"}
        ]
    )
@@ -177,6 +180,32 @@ Allows you to set parameters when registering a `sentence-transformers` object.
 Visit sentence-transformers [HuggingFace HUB](https://huggingface.co/sentence-transformers) page for more information on the available models.


+### Huggingface embedding models
+We offer support for all huggingface models (which can be loaded via [transformers](https://huggingface.co/docs/transformers/en/index) library). The default model is `colbert-ir/colbertv2.0` which also has its own special callout - `registry.get("colbert")` 
+
+Example usage - 
+```python
+import lancedb
+import pandas as pd
+
+from lancedb.embeddings import get_registry
+from lancedb.pydantic import LanceModel, Vector
+
+model = get_registry().get("huggingface").create(name='facebook/bart-base')
+
+class TextModel(LanceModel):
+    text: str = model.SourceField()
+    vector: Vector(model.ndims()) = model.VectorField()
+
+df = pd.DataFrame({"text": ["hi hello sayonara", "goodbye world"]})
+table = db.create_table("greets", schema=Words)
+table.add()
+query = "old greeting"
+actual = table.search(query).limit(1).to_pydantic(Words)[0]
+print(actual.text)
+```
+
+
 ### OpenAI embeddings
 LanceDB registers the OpenAI embeddings function in the registry by default, as `openai`. Below are the parameters that you can customize when creating the instances:

@@ -187,18 +216,21 @@ LanceDB registers the OpenAI embeddings function in the registry by default, as


 ```python
+import lancedb
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.embeddings import get_registry
+
 db = lancedb.connect("/tmp/db")
-registry = EmbeddingFunctionRegistry.get_instance()
-func = registry.get("openai").create()
+func = get_registry().get("openai").create(name="text-embedding-ada-002")

 class Words(LanceModel):
    text: str = func.SourceField()
    vector: Vector(func.ndims()) = func.VectorField()

-table = db.create_table("words", schema=Words)
+table = db.create_table("words", schema=Words, mode="overwrite")
 table.add(
    [
-        {"text": "hello world"}
+        {"text": "hello world"},
        {"text": "goodbye world"}
    ]
    )
@@ -327,6 +359,10 @@ Supported parameters (to be passed in `create` method) are:
 Usage Example:

 ```python
+import lancedb
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.embeddings import get_registry
+
 model = get_registry().get("bedrock-text").create()

 class TextModel(LanceModel):
@@ -361,10 +397,12 @@ This embedding function supports ingesting images as both bytes and urls. You ca
    LanceDB supports ingesting images directly from accessible links.

 ```python
+import lancedb
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.embeddings import get_registry

 db = lancedb.connect(tmp_path)
-registry = EmbeddingFunctionRegistry.get_instance()
-func = registry.get("open-clip").create()
+func = get_registry.get("open-clip").create()

 class Images(LanceModel):
    label: str
@@ -439,9 +477,12 @@ This function is registered as `imagebind` and supports Audio, Video and Text mo
 Below is an example demonstrating how the API works:

 ```python
+import lancedb
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.embeddings import get_registry
+
 db = lancedb.connect(tmp_path)
-registry = EmbeddingFunctionRegistry.get_instance()
-func = registry.get("imagebind").create()
+func = get_registry.get("imagebind").create()

 class ImageBindModel(LanceModel):
    text: str
--- a/docs/src/embeddings/index.md
+++ b/docs/src/embeddings/index.md
@@ -12,3 +12,63 @@ LanceDB supports 3 methods of working with embeddings.

 For python users, there is also a legacy [with_embeddings API](./legacy.md).
 It is retained for compatibility and will be removed in a future version.
+
+## Quickstart
+
+To get started with embeddings, you can use the built-in embedding functions.
+
+### OpenAI Embedding function
+LanceDB registers the OpenAI embeddings function in the registry as `openai`. You can pass any supported model name to the `create`. By default it uses `"text-embedding-ada-002"`.
+
+```python
+import lancedb
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.embeddings import get_registry
+
+db = lancedb.connect("/tmp/db")
+func = get_registry().get("openai").create(name="text-embedding-ada-002")
+
+class Words(LanceModel):
+    text: str = func.SourceField()
+    vector: Vector(func.ndims()) = func.VectorField()
+
+table = db.create_table("words", schema=Words, mode="overwrite")
+table.add(
+    [
+        {"text": "hello world"},
+        {"text": "goodbye world"}
+    ]
+    )
+
+query = "greetings"
+actual = table.search(query).limit(1).to_pydantic(Words)[0]
+print(actual.text)
+```
+
+### Sentence Transformers Embedding function
+LanceDB registers the Sentence Transformers embeddings function in the registry as `sentence-transformers`. You can pass any supported model name to the `create`. By default it uses `"sentence-transformers/paraphrase-MiniLM-L6-v2"`.
+
+```python
+import lancedb
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.embeddings import get_registry
+
+db = lancedb.connect("/tmp/db")
+model = get_registry().get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu")
+
+class Words(LanceModel):
+    text: str = model.SourceField()
+    vector: Vector(model.ndims()) = model.VectorField()
+
+table = db.create_table("words", schema=Words)
+table.add(
+    [
+        {"text": "hello world"},
+        {"text": "goodbye world"}
+    ]
+)
+
+query = "greetings"
+actual = table.search(query).limit(1).to_pydantic(Words)[0]
+print(actual.text)
+```
--- a/docs/src/guides/storage.md
+++ b/docs/src/guides/storage.md
@@ -55,18 +55,139 @@ LanceDB OSS supports object stores such as AWS S3 (and compatible stores), Azure
    const db = await lancedb.connect("az://bucket/path");
    ```

-In most cases, when running in the respective cloud and permissions are set up correctly, no additional configuration is required. When running outside of the respective cloud, authentication credentials must be provided using environment variables. In general, these environment variables are the same as those used by the respective cloud SDKs. The sections below describe the environment variables that can be used to configure each object store.
+In most cases, when running in the respective cloud and permissions are set up correctly, no additional configuration is required. When running outside of the respective cloud, authentication credentials must be provided. Credentials and other configuration options can be set in two ways: first, by setting environment variables. And second, by passing a `storage_options` object to the `connect` function. For example, to increase the request timeout to 60 seconds, you can set the `TIMEOUT` environment variable to `60s`:

-LanceDB OSS uses the [object-store](https://docs.rs/object_store/latest/object_store/) Rust crate for object store access. There are general environment variables that can be used to configure the object store, such as the request timeout and proxy configuration. See the [object_store ClientConfigKey](https://docs.rs/object_store/latest/object_store/enum.ClientConfigKey.html) doc for available configuration options. The environment variables that can be set are the snake-cased versions of these variable names. For example, to set `ProxyUrl` use the environment variable `PROXY_URL`. (Don't let the Rust docs intimidate you! We link to them so you can see an up-to-date list of the available options.)
+```bash
+export TIMEOUT=60s
+```
+
+!!! note "`storage_options` availability"
+
+    The `storage_options` parameter is only available in Python *async* API and JavaScript API.
+    It is not yet supported in the Python synchronous API.
+
+If you only want this to apply to one particular connection, you can pass the `storage_options` argument when opening the connection:
+
+=== "Python"
+
+    ```python
+    import lancedb
+    db = await lancedb.connect_async(
+        "s3://bucket/path",
+        storage_options={"timeout": "60s"}
+    )
+    ```
+
+=== "JavaScript"
+
+    ```javascript
+    const lancedb = require("lancedb");
+    const db = await lancedb.connect("s3://bucket/path",
+                                     {storageOptions: {timeout: "60s"}});
+    ```
+
+Getting even more specific, you can set the `timeout` for only a particular table:
+
+=== "Python"
+
+    <!-- skip-test -->
+    ```python
+    import lancedb
+    db = await lancedb.connect_async("s3://bucket/path")
+    table = await db.create_table(
+        "table",
+        [{"a": 1, "b": 2}],
+        storage_options={"timeout": "60s"}
+    )
+    ```
+
+=== "JavaScript"
+
+    <!-- skip-test -->
+    ```javascript
+    const lancedb = require("lancedb");
+    const db = await lancedb.connect("s3://bucket/path");
+    const table = db.createTable(
+        "table",
+        [{ a: 1, b: 2}],
+        {storageOptions: {timeout: "60s"}}
+    );
+    ```
+
+!!! info "Storage option casing"
+
+    The storage option keys are case-insensitive. So `connect_timeout` and `CONNECT_TIMEOUT` are the same setting. Usually lowercase is used in the `storage_options` argument and uppercase is used for environment variables. In the `lancedb` Node package, the keys can also be provided in `camelCase` capitalization. For example, `connectTimeout` is equivalent to `connect_timeout`.
+
+### General configuration
+
+There are several options that can be set for all object stores, mostly related to network client configuration.
+
+<!-- from here: https://docs.rs/object_store/latest/object_store/enum.ClientConfigKey.html -->
+
+| Key                        | Description                                                                                      |
+|----------------------------|--------------------------------------------------------------------------------------------------|
+| `allow_http`               | Allow non-TLS, i.e. non-HTTPS connections. Default: `False`.                                      |
+| `allow_invalid_certificates`| Skip certificate validation on HTTPS connections. Default: `False`.                               |
+| `connect_timeout`          | Timeout for only the connect phase of a Client. Default: `5s`.                                    |
+| `timeout`                  | Timeout for the entire request, from connection until the response body has finished. Default: `30s`. |
+| `user_agent`               | User agent string to use in requests.                                                             |
+| `proxy_url`                | URL of a proxy server to use for requests. Default: `None`.                                       |
+| `proxy_ca_certificate`     | PEM-formatted CA certificate for proxy connections.                                                |
+| `proxy_excludes`           | List of hosts that bypass the proxy. This is a comma-separated list of domains and IP masks. Any subdomain of the provided domain will be bypassed. For example, `example.com, 192.168.1.0/24` would bypass `https://api.example.com`, `https://www.example.com`, and any IP in the range `192.168.1.0/24`. |


 ### AWS S3

-To configure credentials for AWS S3, you can use the `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_SESSION_TOKEN` environment variables.
+To configure credentials for AWS S3, you can use the `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_SESSION_TOKEN` keys. Region can also be set, but it is not mandatory when using AWS.
+These can be set as environment variables or passed in the `storage_options` parameter:
+
+=== "Python"
+
+    ```python
+    import lancedb
+    db = await lancedb.connect_async(
+        "s3://bucket/path",
+        storage_options={
+            "aws_access_key_id": "my-access-key",
+            "aws_secret_access_key": "my-secret-key",
+            "aws_session_token": "my-session-token",
+        }
+    )
+    ```
+
+=== "JavaScript"
+
+    ```javascript
+    const lancedb = require("lancedb");
+    const db = await lancedb.connect(
+        "s3://bucket/path",
+        {
+            storageOptions: {
+                awsAccessKeyId: "my-access-key",
+                awsSecretAccessKey: "my-secret-key",
+                awsSessionToken: "my-session-token",
+            }
+        }
+    );
+    ```

 Alternatively, if you are using AWS SSO, you can use the `AWS_PROFILE` and `AWS_DEFAULT_REGION` environment variables.

-You can see a full list of environment variables [here](https://docs.rs/object_store/latest/object_store/aws/struct.AmazonS3Builder.html#method.from_env).
+The following keys can be used as both environment variables or keys in the `storage_options` parameter:
+
+| Key                                | Description                                                                                          |
+|------------------------------------|------------------------------------------------------------------------------------------------------|
+| `aws_region` / `region`             | The AWS region the bucket is in. This can be automatically detected when using AWS S3, but must be specified for S3-compatible stores. |
+| `aws_access_key_id` / `access_key_id` | The AWS access key ID to use.                                                                       |
+| `aws_secret_access_key` / `secret_access_key` | The AWS secret access key to use.                                                               |
+| `aws_session_token` / `session_token` | The AWS session token to use.                                                                     |
+| `aws_endpoint` / `endpoint`         | The endpoint to use for S3-compatible stores.                                                       |
+| `aws_virtual_hosted_style_request` / `virtual_hosted_style_request` | Whether to use virtual hosted-style requests, where the bucket name is part of the endpoint. Meant to be used with `aws_endpoint`. Default: `False`. |
+| `aws_s3_express` / `s3_express`     | Whether to use S3 Express One Zone endpoints. Default: `False`. See more details below.             |
+| `aws_server_side_encryption`        | The server-side encryption algorithm to use. Must be one of `"AES256"`, `"aws:kms"`, or `"aws:kms:dsse"`. Default: `None`. |
+| `aws_sse_kms_key_id`                | The KMS key ID to use for server-side encryption. If set, `aws_server_side_encryption` must be `"aws:kms"` or `"aws:kms:dsse"`. |
+| `aws_sse_bucket_key_enabled`        | Whether to use bucket keys for server-side encryption.                                               |
+

 !!! tip "Automatic cleanup for failed writes"

@@ -146,22 +267,174 @@ For **read-only access**, LanceDB will need a policy such as:

 #### S3-compatible stores

-LanceDB can also connect to S3-compatible stores, such as MinIO. To do so, you must specify two environment variables: `AWS_ENDPOINT` and `AWS_DEFAULT_REGION`. `AWS_ENDPOINT` should be the URL of the S3-compatible store, and `AWS_DEFAULT_REGION` should be the region to use.
+LanceDB can also connect to S3-compatible stores, such as MinIO. To do so, you must specify both region and endpoint:
+
+=== "Python"
+
+    ```python
+    import lancedb
+    db = await lancedb.connect_async(
+        "s3://bucket/path",
+        storage_options={
+            "region": "us-east-1",
+            "endpoint": "http://minio:9000",
+        }
+    )
+    ```
+
+=== "JavaScript"
+
+    ```javascript
+    const lancedb = require("lancedb");
+    const db = await lancedb.connect(
+        "s3://bucket/path",
+        {
+            storageOptions: {
+                region: "us-east-1",
+                endpoint: "http://minio:9000",
+            }
+        }
+    );
+    ```
+
+This can also be done with the ``AWS_ENDPOINT`` and ``AWS_DEFAULT_REGION`` environment variables.
+
+#### S3 Express
+
+LanceDB supports [S3 Express One Zone](https://aws.amazon.com/s3/storage-classes/express-one-zone/) endpoints, but requires additional configuration. Also, S3 Express endpoints only support connecting from an EC2 instance within the same region.
+
+To configure LanceDB to use an S3 Express endpoint, you must set the storage option `s3_express`. The bucket name in your table URI should **include the suffix**.
+
+=== "Python"
+
+    ```python
+    import lancedb
+    db = await lancedb.connect_async(
+        "s3://my-bucket--use1-az4--x-s3/path",
+        storage_options={
+            "region": "us-east-1",
+            "s3_express": "true",
+        }
+    )
+    ```
+
+=== "JavaScript"
+
+    ```javascript
+    const lancedb = require("lancedb");
+    const db = await lancedb.connect(
+        "s3://my-bucket--use1-az4--x-s3/path",
+        {
+            storageOptions: {
+                region: "us-east-1",
+                s3Express: "true",
+            }
+        }
+    );
+    ```

-<!-- TODO: we should also document the use of S3 Express once we fully support it -->

 ### Google Cloud Storage

-GCS credentials are configured by setting the `GOOGLE_SERVICE_ACCOUNT` environment variable to the path of a JSON file containing the service account credentials. There are several aliases for this environment variable, documented [here](https://docs.rs/object_store/latest/object_store/gcp/struct.GoogleCloudStorageBuilder.html#method.from_env).
+GCS credentials are configured by setting the `GOOGLE_SERVICE_ACCOUNT` environment variable to the path of a JSON file containing the service account credentials. Alternatively, you can pass the path to the JSON file in the `storage_options`:
+
+=== "Python"
+
+    <!-- skip-test -->
+    ```python
+    import lancedb
+    db = await lancedb.connect_async(
+        "gs://my-bucket/my-database",
+        storage_options={
+            "service_account": "path/to/service-account.json",
+        }
+    )
+    ```
+
+=== "JavaScript"
+
+    ```javascript
+    const lancedb = require("lancedb");
+    const db = await lancedb.connect(
+        "gs://my-bucket/my-database",
+        {
+            storageOptions: {
+                serviceAccount: "path/to/service-account.json",
+            }
+        }
+    );
+    ```


 !!! info "HTTP/2 support"

    By default, GCS uses HTTP/1 for communication, as opposed to HTTP/2. This improves maximum throughput significantly. However, if you wish to use HTTP/2 for some reason, you can set the environment variable `HTTP1_ONLY` to `false`.

+
+The following keys can be used as both environment variables or keys in the `storage_options` parameter:
+<!-- source: https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html -->
+
+| Key                                   | Description                                  |
+|---------------------------------------|----------------------------------------------|
+| ``google_service_account`` / `service_account` | Path to the service account JSON file.       |
+| ``google_service_account_key``        | The serialized service account key.          |
+| ``google_application_credentials``    | Path to the application credentials.         |
+
+
 ### Azure Blob Storage

-Azure Blob Storage credentials can be configured by setting the `AZURE_STORAGE_ACCOUNT_NAME` and ``AZURE_STORAGE_ACCOUNT_KEY`` environment variables. The full list of environment variables that can be set are documented [here](https://docs.rs/object_store/latest/object_store/azure/struct.MicrosoftAzureBuilder.html#method.from_env).
+Azure Blob Storage credentials can be configured by setting the `AZURE_STORAGE_ACCOUNT_NAME`and `AZURE_STORAGE_ACCOUNT_KEY` environment variables. Alternatively, you can pass the account name and key in the `storage_options` parameter:

+=== "Python"
+
+    <!-- skip-test -->
+    ```python
+    import lancedb
+    db = await lancedb.connect_async(
+        "az://my-container/my-database",
+        storage_options={
+            account_name: "some-account",
+            account_key: "some-key",
+        }
+    )
+    ```
+
+=== "JavaScript"
+
+    ```javascript
+    const lancedb = require("lancedb");
+    const db = await lancedb.connect(
+        "az://my-container/my-database",
+        {
+            storageOptions: {
+                accountName: "some-account",
+                accountKey: "some-key",
+            }
+        }
+    );
+    ```
+
+These keys can be used as both environment variables or keys in the `storage_options` parameter:
+
+<!-- source: https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html -->
+
+| Key                                   | Description                                                                                      |
+|---------------------------------------|--------------------------------------------------------------------------------------------------|
+| ``azure_storage_account_name``        | The name of the azure storage account.                                                           |
+| ``azure_storage_account_key``         | The serialized service account key.                                                              |
+| ``azure_client_id``                   | Service principal client id for authorizing requests.                                            |
+| ``azure_client_secret``               | Service principal client secret for authorizing requests.                                        |
+| ``azure_tenant_id``                   | Tenant id used in oauth flows.                                                                   |
+| ``azure_storage_sas_key``             | Shared access signature. The signature is expected to be percent-encoded, much like they are provided in the azure storage explorer or azure portal. |
+| ``azure_storage_token``               | Bearer token.                                                                                    |
+| ``azure_storage_use_emulator``        | Use object store with azurite storage emulator.                                                  |
+| ``azure_endpoint``                    | Override the endpoint used to communicate with blob storage.                                      |
+| ``azure_use_fabric_endpoint``         | Use object store with url scheme account.dfs.fabric.microsoft.com.                               |
+| ``azure_msi_endpoint``                | Endpoint to request a imds managed identity token.                                               |
+| ``azure_object_id``                   | Object id for use with managed identity authentication.                                          |
+| ``azure_msi_resource_id``             | Msi resource id for use with managed identity authentication.                                    |
+| ``azure_federated_token_file``        | File containing token for Azure AD workload identity federation.                                 |
+| ``azure_use_azure_cli``               | Use azure cli for acquiring access token.                                                        |
+| ``azure_disable_tagging``             | Disables tagging objects. This can be desirable if not supported by the backing store.           |

 <!-- TODO: demonstrate how to configure networked file systems for optimal performance -->
--- a/docs/src/js/modules.md
+++ b/docs/src/js/modules.md
@@ -142,6 +142,7 @@ rules are as follows:

 **`Example`**

+```ts
 import { fromTableToBuffer, makeArrowTable } from "../arrow";
 import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";

--- a/docs/src/python/duckdb.md
+++ b/docs/src/python/duckdb.md
@@ -24,7 +24,8 @@ data = [
 table = db.create_table("pd_table", data=data)
 ```

-To query the table, first call `to_lance` to convert the table to a "dataset", which is an object that can be queried by DuckDB. Then all you need to do is reference that dataset by the same name in your SQL query.
+The `to_lance` method converts the LanceDB table to a `LanceDataset`, which is accessible to DuckDB through the Arrow compatibility layer.
+To query the resulting Lance dataset in DuckDB, all you need to do is reference the dataset by the same name in your SQL query.

 ```python
 import duckdb
--- a/docs/src/reranking/cohere.md
+++ b/docs/src/reranking/cohere.md
@@ -0,0 +1,75 @@
+# Cohere Reranker
+
+This re-ranker uses the [Cohere](https://cohere.ai/) API to rerank the search results. You can use this re-ranker by passing `CohereReranker()` to the `rerank()` method. Note that you'll either need to set the `COHERE_API_KEY` environment variable or pass the `api_key` argument to use this re-ranker.
+
+
+!!! note
+    Supported Query Types: Hybrid, Vector, FTS
+
+
+```python
+import numpy
+import lancedb
+from lancedb.embeddings import get_registry
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.rerankers import CohereReranker
+
+embedder = get_registry().get("sentence-transformers").create()
+db = lancedb.connect("~/.lancedb")
+
+class Schema(LanceModel):
+    text: str = embedder.SourceField()
+    vector: Vector(embedder.ndims()) = embedder.VectorField()
+
+data = [
+    {"text": "hello world"},
+    {"text": "goodbye world"}
+    ]
+tbl = db.create_table("test", schema=Schema, mode="overwrite")
+tbl.add(data)
+reranker = CohereReranker(api_key="key")
+
+# Run vector search with a reranker
+result = tbl.search("hello").rerank(reranker=reranker).to_list() 
+
+# Run FTS search with a reranker
+result = tbl.search("hello", query_type="fts").rerank(reranker=reranker).to_list()
+
+# Run hybrid search with a reranker
+tbl.create_fts_index("text", replace=True)
+result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list()
+
+```
+
+Accepted Arguments
+----------------
+| Argument | Type | Default | Description |
+| --- | --- | --- | --- |
+| `model_name` | `str` | `"rerank-english-v2.0"` | The name of the reranker model to use. Available cohere models are: rerank-english-v2.0, rerank-multilingual-v2.0 |
+| `column` | `str` | `"text"` | The name of the column to use as input to the cross encoder model. |
+| `top_n` | `str` | `None` | The number of results to return. If None, will return all results. |
+| `api_key` | `str` | `None` | The API key for the Cohere API. If not provided, the `COHERE_API_KEY` environment variable is used. |
+| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all" is supported, will return relevance score along with the vector and/or fts scores depending on query type |
+
+
+
+## Supported Scores for each query type
+You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type:
+
+### Hybrid Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ❌ Not Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
+
+### Vector Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have vector(`_distance`) along with Hybrid Search score(`_relevance_score`) |
+
+### FTS Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
--- a/docs/src/reranking/colbert.md
+++ b/docs/src/reranking/colbert.md
@@ -0,0 +1,71 @@
+# ColBERT Reranker
+
+This re-ranker uses ColBERT model to rerank the search results. You can use this re-ranker by passing `ColbertReranker()` to the `rerank()` method. 
+!!! note
+    Supported Query Types: Hybrid, Vector, FTS
+
+
+```python
+import numpy
+import lancedb
+from lancedb.embeddings import get_registry
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.rerankers import ColbertReranker
+
+embedder = get_registry().get("sentence-transformers").create()
+db = lancedb.connect("~/.lancedb")
+
+class Schema(LanceModel):
+    text: str = embedder.SourceField()
+    vector: Vector(embedder.ndims()) = embedder.VectorField()
+
+data = [
+    {"text": "hello world"},
+    {"text": "goodbye world"}
+    ]
+tbl = db.create_table("test", schema=Schema, mode="overwrite")
+tbl.add(data)
+reranker = ColbertReranker()
+
+# Run vector search with a reranker
+result = tbl.search("hello").rerank(reranker=reranker).to_list() 
+
+# Run FTS search with a reranker
+result = tbl.search("hello", query_type="fts").rerank(reranker=reranker).to_list()
+
+# Run hybrid search with a reranker
+tbl.create_fts_index("text", replace=True)
+result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list()
+
+```
+
+Accepted Arguments
+----------------
+| Argument | Type | Default | Description |
+| --- | --- | --- | --- |
+| `model_name` | `str` | `"colbert-ir/colbertv2.0"` | The name of the reranker model to use.|
+| `column` | `str` | `"text"` | The name of the column to use as input to the cross encoder model. |
+| `device` | `str` | `None` | The device to use for the cross encoder model. If None, will use "cuda" if available, otherwise "cpu". |
+| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all" is supported, will return relevance score along with the vector and/or fts scores depending on query type |
+
+
+## Supported Scores for each query type
+You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type:
+
+### Hybrid Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ❌ Not Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
+
+### Vector Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have vector(`_distance`) along with Hybrid Search score(`_relevance_score`) |
+
+### FTS Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
--- a/docs/src/reranking/cross_encoder.md
+++ b/docs/src/reranking/cross_encoder.md
@@ -0,0 +1,70 @@
+# Cross Encoder Reranker
+
+This re-ranker uses Cross Encoder models from sentence-transformers to rerank the search results. You can use this re-ranker by passing `CrossEncoderReranker()` to the `rerank()` method. 
+!!! note
+    Supported Query Types: Hybrid, Vector, FTS
+
+
+```python
+import numpy
+import lancedb
+from lancedb.embeddings import get_registry
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.rerankers import CrossEncoderReranker
+
+embedder = get_registry().get("sentence-transformers").create()
+db = lancedb.connect("~/.lancedb")
+
+class Schema(LanceModel):
+    text: str = embedder.SourceField()
+    vector: Vector(embedder.ndims()) = embedder.VectorField()
+
+data = [
+    {"text": "hello world"},
+    {"text": "goodbye world"}
+    ]
+tbl = db.create_table("test", schema=Schema, mode="overwrite")
+tbl.add(data)
+reranker = CrossEncoderReranker()
+
+# Run vector search with a reranker
+result = tbl.search("hello").rerank(reranker=reranker).to_list() 
+
+# Run FTS search with a reranker
+result = tbl.search("hello", query_type="fts").rerank(reranker=reranker).to_list()
+
+# Run hybrid search with a reranker
+tbl.create_fts_index("text", replace=True)
+result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list()
+
+```
+
+Accepted Arguments
+----------------
+| Argument | Type | Default | Description |
+| --- | --- | --- | --- |
+| `model_name` | `str` | `""cross-encoder/ms-marco-TinyBERT-L-6"` | The name of the reranker model to use.|
+| `column` | `str` | `"text"` | The name of the column to use as input to the cross encoder model. |
+| `device` | `str` | `None` | The device to use for the cross encoder model. If None, will use "cuda" if available, otherwise "cpu". |
+| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all" is supported, will return relevance score along with the vector and/or fts scores depending on query type |
+
+## Supported Scores for each query type
+You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type:
+
+### Hybrid Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ❌ Not Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
+
+### Vector Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have vector(`_distance`) along with Hybrid Search score(`_relevance_score`) |
+
+### FTS Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
--- a/docs/src/reranking/custom_reranker.md
+++ b/docs/src/reranking/custom_reranker.md
@@ -0,0 +1,88 @@
+## Building Custom Rerankers
+You can build your own custom reranker by subclassing the `Reranker` class and implementing the `rerank_hybrid()` method. Optionally, you can also implement the `rerank_vector()` and `rerank_fts()` methods if you want to support reranking for vector and FTS search separately.
+Here's an example of a custom reranker that combines the results of semantic and full-text search using a linear combination of the scores.
+
+The `Reranker` base interface comes with a `merge_results()` method that can be used to combine the results of semantic and full-text search. This is a vanilla merging algorithm that simply concatenates the results and removes the duplicates without taking the scores into consideration. It only keeps the first copy of the row encountered. This works well in cases that don't require the scores of semantic and full-text search to combine the results. If you want to use the scores or want to support `return_score="all"`, you'll need to implement your own merging algorithm.
+
+```python
+
+from lancedb.rerankers import Reranker
+import pyarrow as pa
+
+class MyReranker(Reranker):
+    def __init__(self, param1, param2, ..., return_score="relevance"):
+        super().__init__(return_score)
+        self.param1 = param1
+        self.param2 = param2
+
+    def rerank_hybrid(self, query: str, vector_results: pa.Table, fts_results: pa.Table):
+        # Use the built-in merging function
+        combined_result = self.merge_results(vector_results, fts_results)
+
+        # Do something with the combined results
+        # ...
+
+        # Return the combined results
+        return combined_result
+
+    def rerank_vector(self, query: str, vector_results: pa.Table):
+        # Do something with the vector results
+        # ...
+
+        # Return the vector results
+        return vector_results
+
+    def rerank_fts(self, query: str, fts_results: pa.Table):
+        # Do something with the FTS results
+        # ...
+
+        # Return the FTS results
+        return fts_results
+
+```
+
+### Example of a Custom Reranker
+For the sake of simplicity let's build custom reranker that just enchances the Cohere Reranker by accepting a filter query, and accept other CohereReranker params as kwags.
+
+```python
+
+from typing import List, Union
+import pandas as pd
+from lancedb.rerankers import CohereReranker
+
+class ModifiedCohereReranker(CohereReranker):
+    def __init__(self, filters: Union[str, List[str]], **kwargs):
+        super().__init__(**kwargs)
+        filters = filters if isinstance(filters, list) else [filters]
+        self.filters = filters
+
+    def rerank_hybrid(self, query: str, vector_results: pa.Table, fts_results: pa.Table)-> pa.Table:
+        combined_result = super().rerank_hybrid(query, vector_results, fts_results)
+        df = combined_result.to_pandas()
+        for filter in self.filters:
+            df = df.query("not text.str.contains(@filter)")
+
+        return pa.Table.from_pandas(df)
+
+    def rerank_vector(self, query: str, vector_results: pa.Table)-> pa.Table:
+        vector_results = super().rerank_vector(query, vector_results)
+        df = vector_results.to_pandas()
+        for filter in self.filters:
+            df = df.query("not text.str.contains(@filter)")
+
+        return pa.Table.from_pandas(df)
+
+    def rerank_fts(self, query: str, fts_results: pa.Table)-> pa.Table:
+        fts_results = super().rerank_fts(query, fts_results)
+        df = fts_results.to_pandas()
+        for filter in self.filters:
+            df = df.query("not text.str.contains(@filter)")
+
+        return pa.Table.from_pandas(df)
+
+```
+
+!!! tip
+    The `vector_results` and `fts_results` are pyarrow tables. Lean more about pyarrow tables [here](https://arrow.apache.org/docs/python). It can be convered to other data types like pandas dataframe, pydict, pylist etc.
+
+    For example, You can convert them to pandas dataframes using `to_pandas()` method and perform any operations you want. After you are done, you can convert the dataframe back to pyarrow table using `pa.Table.from_pandas()` method and return it.
--- a/docs/src/reranking/index.md
+++ b/docs/src/reranking/index.md
@@ -0,0 +1,60 @@
+Reranking is the process of reordering a list of items based on some criteria. In the context of search, reranking is used to reorder the search results returned by a search engine based on some criteria. This can be useful when the initial ranking of the search results is not satisfactory or when the user has provided additional information that can be used to improve the ranking of the search results.
+
+LanceDB comes with some built-in rerankers. Some of the rerankers that are available in LanceDB are:
+
+| Reranker | Description | Supported Query Types |
+| --- | --- | --- |
+| `LinearCombinationReranker` | Reranks search results based on a linear combination of FTS and vector search scores | Hybrid |
+| `CohereReranker` | Uses cohere Reranker API to rerank results | Vector, FTS, Hybrid |
+| `CrossEncoderReranker` | Uses a cross-encoder model to rerank search results | Vector, FTS, Hybrid |
+| `ColbertReranker` | Uses a colbert model to rerank search results | Vector, FTS, Hybrid |
+| `OpenaiReranker`(Experimental) | Uses OpenAI's chat model to rerank search results | Vector, FTS, Hybrid |
+
+
+## Using a Reranker
+Using rerankers is optional for vector and FTS. However, for hybrid search, rerankers are required. To use a reranker, you need to create an instance of the reranker and pass it to the `rerank` method of the query builder.
+
+```python
+import numpy
+import lancedb
+from lancedb.embeddings import get_registry
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.rerankers import CohereReranker
+
+embedder = get_registry().get("sentence-transformers").create()
+db = lancedb.connect("~/.lancedb")
+
+class Schema(LanceModel):
+    text: str = embedder.SourceField()
+    vector: Vector(embedder.ndims()) = embedder.VectorField()
+
+data = [
+    {"text": "hello world"},
+    {"text": "goodbye world"}
+    ]
+tbl = db.create_table("test", data)
+reranker = CohereReranker(api_key="your_api_key")
+
+# Run vector search with a reranker
+result = tbl.query("hello").rerank(reranker).to_list() 
+
+# Run FTS search with a reranker
+result = tbl.query("hello", query_type="fts").rerank(reranker).to_list()
+
+# Run hybrid search with a reranker
+tbl.create_fts_index("text")
+result = tbl.query("hello", query_type="hybrid").rerank(reranker).to_list()
+```
+
+## Available Rerankers
+LanceDB comes with some built-in rerankers. Here are some of the rerankers that are available in LanceDB:
+
+- [Cohere Reranker](./cohere.md)
+- [Cross Encoder Reranker](./cross_encoder.md)
+- [ColBERT Reranker](./colbert.md)
+- [OpenAI Reranker](./openai.md)
+- [Linear Combination Reranker](./linear_combination.md)
+
+## Creating Custom Rerankers
+
+LanceDB also you to create custom rerankers by extending the base `Reranker` class. The custom reranker should implement the `rerank` method that takes a list of search results and returns a reranked list of search results. This is covered in more detail in the [Creating Custom Rerankers](./custom_reranker.md) section.
--- a/docs/src/reranking/linear_combination.md
+++ b/docs/src/reranking/linear_combination.md
@@ -0,0 +1,52 @@
+# Linear Combination Reranker
+
+This is the default re-ranker used by LanceDB hybrid search. It combines the results of semantic and full-text search using a linear combination of the scores. The weights for the linear combination can be specified. It defaults to 0.7, i.e, 70% weight for semantic search and 30% weight for full-text search.
+
+!!! note
+    Supported Query Types: Hybrid
+
+
+```python
+import numpy
+import lancedb
+from lancedb.embeddings import get_registry
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.rerankers import LinearCombinationReranker
+
+embedder = get_registry().get("sentence-transformers").create()
+db = lancedb.connect("~/.lancedb")
+
+class Schema(LanceModel):
+    text: str = embedder.SourceField()
+    vector: Vector(embedder.ndims()) = embedder.VectorField()
+
+data = [
+    {"text": "hello world"},
+    {"text": "goodbye world"}
+    ]
+tbl = db.create_table("test", schema=Schema, mode="overwrite")
+tbl.add(data)
+reranker = LinearCombinationReranker()
+
+# Run hybrid search with a reranker
+tbl.create_fts_index("text", replace=True)
+result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list()
+
+```
+
+Accepted Arguments
+----------------
+| Argument | Type | Default | Description |
+| --- | --- | --- | --- |
+| `weight` | `float` | `0.7` | The weight to use for the semantic search score. The weight for the full-text search score is `1 - weights`. |
+| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all", will return all scores from the vector and FTS search along with the relevance score. |
+
+
+## Supported Scores for each query type
+You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type:
+
+### Hybrid Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_distance`) |
--- a/docs/src/reranking/openai.md
+++ b/docs/src/reranking/openai.md
@@ -0,0 +1,73 @@
+# OpenAI Reranker (Experimental)
+
+This re-ranker uses OpenAI chat model to rerank the search results. You can use this re-ranker by passing `OpenAI()` to the `rerank()` method. 
+!!! note
+    Supported Query Types: Hybrid, Vector, FTS
+
+!!! warning
+    This re-ranker is experimental. OpenAI doesn't have a dedicated reranking model, so we are using the chat model for reranking. 
+
+```python
+import numpy
+import lancedb
+from lancedb.embeddings import get_registry
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.rerankers import OpenaiReranker
+
+embedder = get_registry().get("sentence-transformers").create()
+db = lancedb.connect("~/.lancedb")
+
+class Schema(LanceModel):
+    text: str = embedder.SourceField()
+    vector: Vector(embedder.ndims()) = embedder.VectorField()
+
+data = [
+    {"text": "hello world"},
+    {"text": "goodbye world"}
+    ]
+tbl = db.create_table("test", schema=Schema, mode="overwrite")
+tbl.add(data)
+reranker = OpenaiReranker()
+
+# Run vector search with a reranker
+result = tbl.search("hello").rerank(reranker=reranker).to_list() 
+
+# Run FTS search with a reranker
+result = tbl.search("hello", query_type="fts").rerank(reranker=reranker).to_list()
+
+# Run hybrid search with a reranker
+tbl.create_fts_index("text", replace=True)
+result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list()
+
+```
+
+Accepted Arguments
+----------------
+| Argument | Type | Default | Description |
+| --- | --- | --- | --- |
+| `model_name` | `str` | `"gpt-4-turbo-preview"` | The name of the reranker model to use.|
+| `column` | `str` | `"text"` | The name of the column to use as input to the cross encoder model. |
+| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score. If "all" is supported, will return relevance score along with the vector and/or fts scores depending on query type |
+| `api_key` | str | `None` | The API key to use. If None, will use the OPENAI_API_KEY environment variable.
+
+
+## Supported Scores for each query type
+You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type:
+
+### Hybrid Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ❌ Not Supported | Returns have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
+
+### Vector Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have vector(`_distance`) along with Hybrid Search score(`_relevance_score`) |
+
+### FTS Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returns only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returns have FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
--- a/docs/test/md_testing.py
+++ b/docs/test/md_testing.py
@@ -1,5 +1,5 @@
 import glob
-from typing import Iterator
+from typing import Iterator, List
 from pathlib import Path

 glob_string = "../src/**/*.md"
@@ -15,6 +15,7 @@ excluded_globs = [
    "../src/ann_indexes.md",
    "../src/basic.md",
    "../src/hybrid_search/hybrid_search.md",
+    "../src/reranking/*.md",
 ]

 python_prefix = "py"
@@ -50,11 +51,24 @@ def yield_lines(lines: Iterator[str], prefix: str, suffix: str):
                yield line[strip_length:]


+def wrap_async(lines: List[str]) -> List[str]:
+    # Indent all the lines
+    lines = ["    " + line for line in lines]
+    # Put all lines in `async def main():`
+    lines = ["async def main():\n"] + lines
+    # Put `import asyncio\n asyncio.run(main())` at the end
+    lines = lines + ["\n", "import asyncio\n", "asyncio.run(main())\n"]
+    return lines
+
+
 for file in filter(lambda file: file not in excluded_files, files):
    with open(file, "r") as f:
        lines = list(yield_lines(iter(f), "```", "```"))

    if len(lines) > 0:
+        if any("await" in line for line in lines):
+            lines = wrap_async(lines)
+
        print(lines)
        out_path = (
            Path(python_folder)
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.4.16",
+  "version": "0.4.17",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.4.16",
+      "version": "0.4.17",
      "cpu": [
        "x64",
        "arm64"
@@ -52,11 +52,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.4.16",
-        "@lancedb/vectordb-darwin-x64": "0.4.16",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.4.16",
-        "@lancedb/vectordb-linux-x64-gnu": "0.4.16",
-        "@lancedb/vectordb-win32-x64-msvc": "0.4.16"
+        "@lancedb/vectordb-darwin-arm64": "0.4.17",
+        "@lancedb/vectordb-darwin-x64": "0.4.17",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.4.17",
+        "@lancedb/vectordb-linux-x64-gnu": "0.4.17",
+        "@lancedb/vectordb-win32-x64-msvc": "0.4.17"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.4.16",
+  "version": "0.4.17",
  "description": " Serverless, low-latency vector database for AI applications",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -88,10 +88,10 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-arm64": "0.4.16",
-    "@lancedb/vectordb-darwin-x64": "0.4.16",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.4.16",
-    "@lancedb/vectordb-linux-x64-gnu": "0.4.16",
-    "@lancedb/vectordb-win32-x64-msvc": "0.4.16"
+    "@lancedb/vectordb-darwin-arm64": "0.4.17",
+    "@lancedb/vectordb-darwin-x64": "0.4.17",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.4.17",
+    "@lancedb/vectordb-linux-x64-gnu": "0.4.17",
+    "@lancedb/vectordb-win32-x64-msvc": "0.4.17"
  }
 }
--- a/node/src/index.ts
+++ b/node/src/index.ts
@@ -78,12 +78,25 @@ export interface ConnectionOptions {
  /** User provided AWS crednetials.
   *
   * If not provided, LanceDB will use the default credentials provider chain.
+   *
+   * @deprecated Pass `aws_access_key_id`, `aws_secret_access_key`, and `aws_session_token`
+   * through `storageOptions` instead.
   */
  awsCredentials?: AwsCredentials

-  /** AWS region to connect to. Default is {@link defaultAwsRegion}. */
+  /** AWS region to connect to. Default is {@link defaultAwsRegion}
+   *
+   * @deprecated Pass `region` through `storageOptions` instead.
+   */
  awsRegion?: string

+  /**
+   * User provided options for object storage. For example, S3 credentials or request timeouts.
+   *
+   * The various options are described at https://lancedb.github.io/lancedb/guides/storage/
+   */
+  storageOptions?: Record<string, string>
+
  /**
   * API key for the remote connections
   *
@@ -176,7 +189,6 @@ export async function connect (
  if (typeof arg === 'string') {
    opts = { uri: arg }
  } else {
-    // opts = { uri: arg.uri, awsCredentials = arg.awsCredentials }
    const keys = Object.keys(arg)
    if (keys.length === 1 && keys[0] === 'uri' && typeof arg.uri === 'string') {
      opts = { uri: arg.uri }
@@ -198,12 +210,26 @@ export async function connect (
    // Remote connection
    return new RemoteConnection(opts)
  }
+
+  const storageOptions = opts.storageOptions ?? {};
+  if (opts.awsCredentials?.accessKeyId !== undefined) {
+    storageOptions.aws_access_key_id = opts.awsCredentials.accessKeyId
+  }
+  if (opts.awsCredentials?.secretKey !== undefined) {
+    storageOptions.aws_secret_access_key = opts.awsCredentials.secretKey
+  }
+  if (opts.awsCredentials?.sessionToken !== undefined) {
+    storageOptions.aws_session_token = opts.awsCredentials.sessionToken
+  }
+  if (opts.awsRegion !== undefined) {
+    storageOptions.region = opts.awsRegion
+  }
+  // It's a pain to pass a record to Rust, so we convert it to an array of key-value pairs
+  const storageOptionsArr = Object.entries(storageOptions);
+
  const db = await databaseNew(
    opts.uri,
-    opts.awsCredentials?.accessKeyId,
-    opts.awsCredentials?.secretKey,
-    opts.awsCredentials?.sessionToken,
-    opts.awsRegion,
+    storageOptionsArr,
    opts.readConsistencyInterval
  )
  return new LocalConnection(db, opts)
@@ -720,7 +746,6 @@ export class LocalConnection implements Connection {
    const tbl = await databaseOpenTable.call(
      this._db,
      name,
-      ...getAwsArgs(this._options())
    )
    if (embeddings !== undefined) {
      return new LocalTable(tbl, name, this._options(), embeddings)
--- a/node/src/remote/client.ts
+++ b/node/src/remote/client.ts
@@ -111,6 +111,10 @@ async function decodeErrorData(
  if (responseType === 'arraybuffer') {
      return new TextDecoder().decode(errorData)
  } else {
+    if (typeof errorData === 'object') {
+      return JSON.stringify(errorData)
+    }
+
    return errorData
  }
 }
--- a/node/src/remote/index.ts
+++ b/node/src/remote/index.ts
@@ -38,7 +38,7 @@ import {
  fromRecordsToStreamBuffer,
  fromTableToStreamBuffer
 } from '../arrow'
-import { toSQL } from '../util'
+import { toSQL, TTLCache } from '../util'
 import { type HttpMiddleware } from '../middleware'

 /**
@@ -47,6 +47,7 @@ import { type HttpMiddleware } from '../middleware'
 export class RemoteConnection implements Connection {
  private _client: HttpLancedbClient
  private readonly _dbName: string
+  private readonly _tableCache = new TTLCache(300_000)

  constructor (opts: ConnectionOptions) {
    if (!opts.uri.startsWith('db://')) {
@@ -89,6 +90,9 @@ export class RemoteConnection implements Connection {
      page_token: pageToken
    })
    const body = await response.body()
+    for (const table of body.tables) {
+      this._tableCache.set(table, true)
+    }
    return body.tables
  }

@@ -101,6 +105,12 @@ export class RemoteConnection implements Connection {
    name: string,
    embeddings?: EmbeddingFunction<T>
  ): Promise<Table<T>> {
+      // check if the table exists
+      if (this._tableCache.get(name) === undefined) {
+        await this._client.post(`/v1/table/${encodeURIComponent(name)}/describe/`)
+        this._tableCache.set(name, true)
+      }
+
    if (embeddings !== undefined) {
      return new RemoteTable(this._client, name, embeddings)
    } else {
@@ -169,6 +179,7 @@ export class RemoteConnection implements Connection {
      )
    }

+    this._tableCache.set(tableName, true)
    if (embeddings === undefined) {
      return new RemoteTable(this._client, tableName)
    } else {
@@ -178,6 +189,7 @@ export class RemoteConnection implements Connection {

  async dropTable (name: string): Promise<void> {
    await this._client.post(`/v1/table/${encodeURIComponent(name)}/drop/`)
+    this._tableCache.delete(name)
  }

  withMiddleware (middleware: HttpMiddleware): Connection {
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
@@ -42,6 +42,7 @@ import {
  Float16,
  Int64
 } from 'apache-arrow'
+import type { RemoteRequest, RemoteResponse } from '../middleware'

 const expect = chai.expect
 const assert = chai.assert
@@ -74,6 +75,19 @@ describe('LanceDB client', function () {
      assert.equal(con.uri, uri)
    })

+    it('should accept custom storage options', async function () {
+      const uri = await createTestDB()
+      const storageOptions = {
+        region: 'us-west-2',
+        timeout: '30s'
+      };
+      const con = await lancedb.connect({
+        uri,
+        storageOptions
+      })
+      assert.equal(con.uri, uri)
+    })
+
    it('should return the existing table names', async function () {
      const uri = await createTestDB()
      const con = await lancedb.connect(uri)
@@ -913,7 +927,22 @@ describe('Remote LanceDB client', function () {
      }

      // Search
-      const table = await con.openTable('vectors')
+      const table = await con.withMiddleware(new (class {
+        async onRemoteRequest(req: RemoteRequest, next: (req: RemoteRequest) => Promise<RemoteResponse>) {
+          // intercept call to check if the table exists and make the call succeed
+          if (req.uri.endsWith('/describe/')) {
+            return {
+              status: 200,
+              statusText: 'OK',
+              headers: new Map(),
+              body: async () => ({})
+            }
+          }
+
+          return await next(req)
+        }
+      })()).openTable('vectors')
+
      try {
        await table.search([0.1, 0.3]).execute()
      } catch (err) {
--- a/node/src/util.ts
+++ b/node/src/util.ts
@@ -42,3 +42,36 @@ export function toSQL (value: Literal): string {
  // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
  throw new Error(`Unsupported value type: ${typeof value} value: (${value})`)
 }
+
+export class TTLCache {
+  private readonly cache: Map<string, { value: any, expires: number }>
+
+  /**
+   * @param ttl Time to live in milliseconds
+   */
+  constructor (private readonly ttl: number) {
+    this.cache = new Map()
+  }
+
+  get (key: string): any | undefined {
+    const entry = this.cache.get(key)
+    if (entry === undefined) {
+      return undefined
+    }
+
+    if (entry.expires < Date.now()) {
+      this.cache.delete(key)
+      return undefined
+    }
+
+    return entry.value
+  }
+
+  set (key: string, value: any): void {
+    this.cache.set(key, { value, expires: Date.now() + this.ttl })
+  }
+
+  delete (key: string): void {
+    this.cache.delete(key)
+  }
+}
--- a/nodejs/test/s3_integration.test.ts
+++ b/nodejs/test/s3_integration.test.ts
@@ -0,0 +1,219 @@
+// Copyright 2024 Lance Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/* eslint-disable @typescript-eslint/naming-convention */
+
+import { connect } from "../dist";
+import {
+  CreateBucketCommand,
+  DeleteBucketCommand,
+  DeleteObjectCommand,
+  HeadObjectCommand,
+  ListObjectsV2Command,
+  S3Client,
+} from "@aws-sdk/client-s3";
+import {
+  CreateKeyCommand,
+  ScheduleKeyDeletionCommand,
+  KMSClient,
+} from "@aws-sdk/client-kms";
+
+// Skip these tests unless the S3_TEST environment variable is set
+const maybeDescribe = process.env.S3_TEST ? describe : describe.skip;
+
+// These are all keys that are accepted by storage_options
+const CONFIG = {
+  allowHttp: "true",
+  awsAccessKeyId: "ACCESSKEY",
+  awsSecretAccessKey: "SECRETKEY",
+  awsEndpoint: "http://127.0.0.1:4566",
+  awsRegion: "us-east-1",
+};
+
+class S3Bucket {
+  name: string;
+  constructor(name: string) {
+    this.name = name;
+  }
+
+  static s3Client() {
+    return new S3Client({
+      region: CONFIG.awsRegion,
+      credentials: {
+        accessKeyId: CONFIG.awsAccessKeyId,
+        secretAccessKey: CONFIG.awsSecretAccessKey,
+      },
+      endpoint: CONFIG.awsEndpoint,
+    });
+  }
+
+  public static async create(name: string): Promise<S3Bucket> {
+    const client = this.s3Client();
+    // Delete the bucket if it already exists
+    try {
+      await this.deleteBucket(client, name);
+    } catch (e) {
+      // It's fine if the bucket doesn't exist
+    }
+    await client.send(new CreateBucketCommand({ Bucket: name }));
+    return new S3Bucket(name);
+  }
+
+  public async delete() {
+    const client = S3Bucket.s3Client();
+    await S3Bucket.deleteBucket(client, this.name);
+  }
+
+  static async deleteBucket(client: S3Client, name: string) {
+    // Must delete all objects before we can delete the bucket
+    const objects = await client.send(
+      new ListObjectsV2Command({ Bucket: name }),
+    );
+    if (objects.Contents) {
+      for (const object of objects.Contents) {
+        await client.send(
+          new DeleteObjectCommand({ Bucket: name, Key: object.Key }),
+        );
+      }
+    }
+
+    await client.send(new DeleteBucketCommand({ Bucket: name }));
+  }
+
+  public async assertAllEncrypted(path: string, keyId: string) {
+    const client = S3Bucket.s3Client();
+    const objects = await client.send(
+      new ListObjectsV2Command({ Bucket: this.name, Prefix: path }),
+    );
+    if (objects.Contents) {
+      for (const object of objects.Contents) {
+        const metadata = await client.send(
+          new HeadObjectCommand({ Bucket: this.name, Key: object.Key }),
+        );
+        expect(metadata.ServerSideEncryption).toBe("aws:kms");
+        expect(metadata.SSEKMSKeyId).toContain(keyId);
+      }
+    }
+  }
+}
+
+class KmsKey {
+  keyId: string;
+  constructor(keyId: string) {
+    this.keyId = keyId;
+  }
+
+  static kmsClient() {
+    return new KMSClient({
+      region: CONFIG.awsRegion,
+      credentials: {
+        accessKeyId: CONFIG.awsAccessKeyId,
+        secretAccessKey: CONFIG.awsSecretAccessKey,
+      },
+      endpoint: CONFIG.awsEndpoint,
+    });
+  }
+
+  public static async create(): Promise<KmsKey> {
+    const client = this.kmsClient();
+    const key = await client.send(new CreateKeyCommand({}));
+    const keyId = key?.KeyMetadata?.KeyId;
+    if (!keyId) {
+      throw new Error("Failed to create KMS key");
+    }
+    return new KmsKey(keyId);
+  }
+
+  public async delete() {
+    const client = KmsKey.kmsClient();
+    await client.send(new ScheduleKeyDeletionCommand({ KeyId: this.keyId }));
+  }
+}
+
+maybeDescribe("storage_options", () => {
+  let bucket: S3Bucket;
+  let kmsKey: KmsKey;
+  beforeAll(async () => {
+    bucket = await S3Bucket.create("lancedb");
+    kmsKey = await KmsKey.create();
+  });
+  afterAll(async () => {
+    await kmsKey.delete();
+    await bucket.delete();
+  });
+
+  it("can be used to configure auth and endpoints", async () => {
+    const uri = `s3://${bucket.name}/test`;
+    const db = await connect(uri, { storageOptions: CONFIG });
+
+    let table = await db.createTable("test", [{ a: 1, b: 2 }]);
+
+    let rowCount = await table.countRows();
+    expect(rowCount).toBe(1);
+
+    let tableNames = await db.tableNames();
+    expect(tableNames).toEqual(["test"]);
+
+    table = await db.openTable("test");
+    rowCount = await table.countRows();
+    expect(rowCount).toBe(1);
+
+    await table.add([
+      { a: 2, b: 3 },
+      { a: 3, b: 4 },
+    ]);
+    rowCount = await table.countRows();
+    expect(rowCount).toBe(3);
+
+    await db.dropTable("test");
+
+    tableNames = await db.tableNames();
+    expect(tableNames).toEqual([]);
+  });
+
+  it("can configure encryption at connection and table level", async () => {
+    const uri = `s3://${bucket.name}/test`;
+    let db = await connect(uri, { storageOptions: CONFIG });
+
+    let table = await db.createTable("table1", [{ a: 1, b: 2 }], {
+      storageOptions: {
+        awsServerSideEncryption: "aws:kms",
+        awsSseKmsKeyId: kmsKey.keyId,
+      },
+    });
+
+    let rowCount = await table.countRows();
+    expect(rowCount).toBe(1);
+
+    await table.add([{ a: 2, b: 3 }]);
+
+    await bucket.assertAllEncrypted("test/table1.lance", kmsKey.keyId);
+
+    // Now with encryption settings at connection level
+    db = await connect(uri, {
+      storageOptions: {
+        ...CONFIG,
+        awsServerSideEncryption: "aws:kms",
+        awsSseKmsKeyId: kmsKey.keyId,
+      },
+    });
+    table = await db.createTable("table2", [{ a: 1, b: 2 }]);
+    rowCount = await table.countRows();
+    expect(rowCount).toBe(1);
+
+    await table.add([{ a: 2, b: 3 }]);
+
+    await bucket.assertAllEncrypted("test/table2.lance", kmsKey.keyId);
+  });
+});
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -13,10 +13,32 @@
 // limitations under the License.

 import { fromTableToBuffer, makeArrowTable, makeEmptyTable } from "./arrow";
-import { Connection as LanceDbConnection } from "./native";
+import { ConnectionOptions, Connection as LanceDbConnection } from "./native";
 import { Table } from "./table";
 import { Table as ArrowTable, Schema } from "apache-arrow";

+/**
+ * Connect to a LanceDB instance at the given URI.
+ *
+ * Accpeted formats:
+ *
+ * - `/path/to/database` - local database
+ * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
+ * - `db://host:port` - remote database (LanceDB cloud)
+ * @param {string} uri - The uri of the database. If the database uri starts
+ * with `db://` then it connects to a remote database.
+ * @see {@link ConnectionOptions} for more details on the URI format.
+ */
+export async function connect(
+  uri: string,
+  opts?: Partial<ConnectionOptions>,
+): Promise<Connection> {
+  opts = opts ?? {};
+  opts.storageOptions = cleanseStorageOptions(opts.storageOptions);
+  const nativeConn = await LanceDbConnection.new(uri, opts);
+  return new Connection(nativeConn);
+}
+
 export interface CreateTableOptions {
  /**
   * The mode to use when creating the table.
@@ -33,6 +55,28 @@ export interface CreateTableOptions {
   * then no error will be raised.
   */
  existOk: boolean;
+
+  /**
+   * Configuration for object storage.
+   *
+   * Options already set on the connection will be inherited by the table,
+   * but can be overridden here.
+   *
+   * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
+   */
+  storageOptions?: Record<string, string>;
+}
+
+export interface OpenTableOptions {
+  /**
+   * Configuration for object storage.
+   *
+   * Options already set on the connection will be inherited by the table,
+   * but can be overridden here.
+   *
+   * The available options are described at https://lancedb.github.io/lancedb/guides/storage/
+   */
+  storageOptions?: Record<string, string>;
 }

 export interface TableNamesOptions {
@@ -109,8 +153,14 @@ export class Connection {
   * Open a table in the database.
   * @param {string} name - The name of the table
   */
-  async openTable(name: string): Promise<Table> {
-    const innerTable = await this.inner.openTable(name);
+  async openTable(
+    name: string,
+    options?: Partial<OpenTableOptions>,
+  ): Promise<Table> {
+    const innerTable = await this.inner.openTable(
+      name,
+      cleanseStorageOptions(options?.storageOptions),
+    );
    return new Table(innerTable);
  }

@@ -139,7 +189,12 @@ export class Connection {
      table = makeArrowTable(data);
    }
    const buf = await fromTableToBuffer(table);
-    const innerTable = await this.inner.createTable(name, buf, mode);
+    const innerTable = await this.inner.createTable(
+      name,
+      buf,
+      mode,
+      cleanseStorageOptions(options?.storageOptions),
+    );
    return new Table(innerTable);
  }

@@ -162,7 +217,12 @@ export class Connection {

    const table = makeEmptyTable(schema);
    const buf = await fromTableToBuffer(table);
-    const innerTable = await this.inner.createEmptyTable(name, buf, mode);
+    const innerTable = await this.inner.createEmptyTable(
+      name,
+      buf,
+      mode,
+      cleanseStorageOptions(options?.storageOptions),
+    );
    return new Table(innerTable);
  }

@@ -174,3 +234,43 @@ export class Connection {
    return this.inner.dropTable(name);
  }
 }
+
+/**
+ * Takes storage options and makes all the keys snake case.
+ */
+function cleanseStorageOptions(
+  options?: Record<string, string>,
+): Record<string, string> | undefined {
+  if (options === undefined) {
+    return undefined;
+  }
+  const result: Record<string, string> = {};
+  for (const [key, value] of Object.entries(options)) {
+    if (value !== undefined) {
+      const newKey = camelToSnakeCase(key);
+      result[newKey] = value;
+    }
+  }
+  return result;
+}
+
+/**
+ * Convert a string to snake case. It might already be snake case, in which case it is
+ * returned unchanged.
+ */
+function camelToSnakeCase(camel: string): string {
+  if (camel.includes("_")) {
+    // Assume if there is at least one underscore, it is already snake case
+    return camel;
+  }
+  if (camel.toLocaleUpperCase() === camel) {
+    // Assume if the string is all uppercase, it is already snake case
+    return camel;
+  }
+
+  let result = camel.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
+  if (result.startsWith("_")) {
+    result = result.slice(1);
+  }
+  return result;
+}
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -12,12 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-import { Connection } from "./connection";
-import {
-  Connection as LanceDbConnection,
-  ConnectionOptions,
-} from "./native.js";
-
 export {
  WriteOptions,
  WriteMode,
@@ -32,6 +26,7 @@ export {
  VectorColumnOptions,
 } from "./arrow";
 export {
+  connect,
  Connection,
  CreateTableOptions,
  TableNamesOptions,
@@ -46,24 +41,3 @@ export {
 export { Index, IndexOptions, IvfPqOptions } from "./indices";
 export { Table, AddDataOptions, IndexConfig, UpdateOptions } from "./table";
 export * as embedding from "./embedding";
-
-/**
- * Connect to a LanceDB instance at the given URI.
- *
- * Accpeted formats:
- *
- * - `/path/to/database` - local database
- * - `s3://bucket/path/to/database` or `gs://bucket/path/to/database` - database on cloud storage
- * - `db://host:port` - remote database (LanceDB cloud)
- * @param {string} uri - The uri of the database. If the database uri starts
- * with `db://` then it connects to a remote database.
- * @see {@link ConnectionOptions} for more details on the URI format.
- */
-export async function connect(
-  uri: string,
-  opts?: Partial<ConnectionOptions>,
-): Promise<Connection> {
-  opts = opts ?? {};
-  const nativeConn = await LanceDbConnection.new(uri, opts);
-  return new Connection(nativeConn);
-}
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-darwin-arm64",
-  "version": "0.4.16",
+  "version": "0.4.17",
  "os": [
    "darwin"
  ],
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-darwin-x64",
-  "version": "0.4.16",
+  "version": "0.4.17",
  "os": [
    "darwin"
  ],
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-linux-arm64-gnu",
-  "version": "0.4.16",
+  "version": "0.4.17",
  "os": [
    "linux"
  ],
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-linux-x64-gnu",
-  "version": "0.4.16",
+  "version": "0.4.17",
  "os": [
    "linux"
  ],
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.4.16",
+  "version": "0.4.17",
  "main": "./dist/index.js",
  "types": "./dist/index.d.ts",
  "napi": {
@@ -18,6 +18,8 @@
  },
  "license": "Apache 2.0",
  "devDependencies": {
+    "@aws-sdk/client-s3": "^3.33.0",
+    "@aws-sdk/client-kms": "^3.33.0",
    "@napi-rs/cli": "^2.18.0",
    "@types/jest": "^29.1.2",
    "@types/tmp": "^0.2.6",
@@ -63,15 +65,16 @@
    "lint": "eslint lancedb && eslint __test__",
    "prepublishOnly": "napi prepublish -t npm",
    "test": "npm run build && jest --verbose",
+    "integration": "S3_TEST=1 npm run test",
    "universal": "napi universal",
    "version": "napi version"
  },
  "optionalDependencies": {
-    "@lancedb/lancedb-darwin-arm64": "0.4.16",
-    "@lancedb/lancedb-darwin-x64": "0.4.16",
-    "@lancedb/lancedb-linux-arm64-gnu": "0.4.16",
-    "@lancedb/lancedb-linux-x64-gnu": "0.4.16",
-    "@lancedb/lancedb-win32-x64-msvc": "0.4.16"
+    "@lancedb/lancedb-darwin-arm64": "0.4.17",
+    "@lancedb/lancedb-darwin-x64": "0.4.17",
+    "@lancedb/lancedb-linux-arm64-gnu": "0.4.17",
+    "@lancedb/lancedb-linux-x64-gnu": "0.4.17",
+    "@lancedb/lancedb-win32-x64-msvc": "0.4.17"
  },
  "dependencies": {
    "openai": "^4.29.2",
--- a/nodejs/src/connection.rs
+++ b/nodejs/src/connection.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::HashMap;
+
 use napi::bindgen_prelude::*;
 use napi_derive::*;

@@ -64,6 +66,11 @@ impl Connection {
            builder =
                builder.read_consistency_interval(std::time::Duration::from_secs_f64(interval));
        }
+        if let Some(storage_options) = options.storage_options {
+            for (key, value) in storage_options {
+                builder = builder.storage_option(key, value);
+            }
+        }
        Ok(Self::inner_new(
            builder
                .execute()
@@ -118,14 +125,18 @@ impl Connection {
        name: String,
        buf: Buffer,
        mode: String,
+        storage_options: Option<HashMap<String, String>>,
    ) -> napi::Result<Table> {
        let batches = ipc_file_to_batches(buf.to_vec())
            .map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
        let mode = Self::parse_create_mode_str(&mode)?;
-        let tbl = self
-            .get_inner()?
-            .create_table(&name, batches)
-            .mode(mode)
+        let mut builder = self.get_inner()?.create_table(&name, batches).mode(mode);
+        if let Some(storage_options) = storage_options {
+            for (key, value) in storage_options {
+                builder = builder.storage_option(key, value);
+            }
+        }
+        let tbl = builder
            .execute()
            .await
            .map_err(|e| napi::Error::from_reason(format!("{}", e)))?;
@@ -138,15 +149,22 @@ impl Connection {
        name: String,
        schema_buf: Buffer,
        mode: String,
+        storage_options: Option<HashMap<String, String>>,
    ) -> napi::Result<Table> {
        let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
            napi::Error::from_reason(format!("Failed to marshal schema from JS to Rust: {}", e))
        })?;
        let mode = Self::parse_create_mode_str(&mode)?;
-        let tbl = self
+        let mut builder = self
            .get_inner()?
            .create_empty_table(&name, schema)
-            .mode(mode)
+            .mode(mode);
+        if let Some(storage_options) = storage_options {
+            for (key, value) in storage_options {
+                builder = builder.storage_option(key, value);
+            }
+        }
+        let tbl = builder
            .execute()
            .await
            .map_err(|e| napi::Error::from_reason(format!("{}", e)))?;
@@ -154,10 +172,18 @@ impl Connection {
    }

    #[napi]
-    pub async fn open_table(&self, name: String) -> napi::Result<Table> {
-        let tbl = self
-            .get_inner()?
-            .open_table(&name)
+    pub async fn open_table(
+        &self,
+        name: String,
+        storage_options: Option<HashMap<String, String>>,
+    ) -> napi::Result<Table> {
+        let mut builder = self.get_inner()?.open_table(&name);
+        if let Some(storage_options) = storage_options {
+            for (key, value) in storage_options {
+                builder = builder.storage_option(key, value);
+            }
+        }
+        let tbl = builder
            .execute()
            .await
            .map_err(|e| napi::Error::from_reason(format!("{}", e)))?;
--- a/nodejs/src/lib.rs
+++ b/nodejs/src/lib.rs
@@ -12,7 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use connection::Connection;
+use std::collections::HashMap;
+
 use napi_derive::*;

 mod connection;
@@ -38,6 +39,10 @@ pub struct ConnectionOptions {
    /// Note: this consistency only applies to read operations. Write operations are
    /// always consistent.
    pub read_consistency_interval: Option<f64>,
+    /// (For LanceDB OSS only): configuration for object storage.
+    ///
+    /// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
+    pub storage_options: Option<HashMap<String, String>>,
 }

 /// Write mode for writing a table.
@@ -54,7 +59,7 @@ pub struct WriteOptions {
    pub mode: Option<WriteMode>,
 }

-#[napi]
-pub async fn connect(uri: String, options: ConnectionOptions) -> napi::Result<Connection> {
-    Connection::new(uri, options).await
+#[napi(object)]
+pub struct OpenTableOptions {
+    pub storage_options: Option<HashMap<String, String>>,
 }
--- a/python/.bumpversion.cfg
+++ b/python/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.6.7
+current_version = 0.6.9
 commit = True
 message = [python] Bump version: {current_version} → {new_version}
 tag = True
--- a/python/README.md
+++ b/python/README.md
@@ -41,7 +41,7 @@ To build the python package you can use maturin:
 ```bash
 # This will build the rust bindings and place them in the appropriate place
 # in your venv or conda environment
-matruin develop
+maturin develop
 ```

 To run the unit tests:
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,19 +1,17 @@
 [project]
 name = "lancedb"
-version = "0.6.7"
+version = "0.6.9"
 dependencies = [
    "deprecation",
-    "pylance==0.10.9",
+    "pylance==0.10.12",
    "ratelimiter~=1.0",
+    "requests>=2.31.0",
    "retry>=0.9.2",
    "tqdm>=4.27.0",
    "pydantic>=1.10",
    "attrs>=21.3.0",
    "semver>=3.0",
    "cachetools",
-    "pyyaml>=6.0",
-    "click>=8.1.7",
-    "requests>=2.31.0",
    "overrides>=0.7",
 ]
 description = "lancedb"
@@ -51,6 +49,7 @@ repository = "https://github.com/lancedb/lancedb"
 [project.optional-dependencies]
 tests = [
    "aiohttp",
+    "boto3",
    "pandas>=1.4",
    "pytest",
    "pytest-mock",
@@ -58,6 +57,7 @@ tests = [
    "duckdb",
    "pytz",
    "polars>=0.19",
+    "tantivy"
 ]
 dev = ["ruff", "pre-commit"]
 docs = [
@@ -65,7 +65,6 @@ docs = [
    "mkdocs-jupyter",
    "mkdocs-material",
    "mkdocstrings[python]",
-    "mkdocs-ultralytics-plugin==0.0.44",
 ]
 clip = ["torch", "pillow", "open-clip"]
 embeddings = [
@@ -88,19 +87,17 @@ azure = ["adlfs>=2024.2.0"]
 python-source = "python"
 module-name = "lancedb._lancedb"

-[project.scripts]
-lancedb = "lancedb.cli.cli:cli"
-
 [build-system]
 requires = ["maturin>=1.4"]
 build-backend = "maturin"

 [tool.ruff.lint]
-select = ["F", "E", "W", "I", "G", "TCH", "PERF"]
+select = ["F", "E", "W", "G", "TCH", "PERF"]

 [tool.pytest.ini_options]
 addopts = "--strict-markers --ignore-glob=lancedb/embeddings/*.py"
 markers = [
    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
    "asyncio",
+    "s3_test"
 ]
--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -15,7 +15,7 @@ import importlib.metadata
 import os
 from concurrent.futures import ThreadPoolExecutor
 from datetime import timedelta
-from typing import Optional, Union
+from typing import Dict, Optional, Union

 __version__ = importlib.metadata.version("lancedb")

@@ -25,7 +25,6 @@ from .db import AsyncConnection, DBConnection, LanceDBConnection
 from .remote.db import RemoteDBConnection
 from .schema import vector
 from .table import AsyncTable
-from .utils import sentry_log


 def connect(
@@ -84,7 +83,7 @@ def connect(

    >>> db = lancedb.connect("s3://my-bucket/lancedb")

-    Connect to LancdDB cloud:
+    Connect to LanceDB cloud:

    >>> db = lancedb.connect("db://my_database", api_key="ldb_...")

@@ -119,6 +118,7 @@ async def connect_async(
    host_override: Optional[str] = None,
    read_consistency_interval: Optional[timedelta] = None,
    request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
+    storage_options: Optional[Dict[str, str]] = None,
 ) -> AsyncConnection:
    """Connect to a LanceDB database.

@@ -145,6 +145,9 @@ async def connect_async(
        the last check, then the table will be checked for updates. Note: this
        consistency only applies to read operations. Write operations are
        always consistent.
+    storage_options: dict, optional
+        Additional options for the storage backend. See available options at
+        https://lancedb.github.io/lancedb/guides/storage/

    Examples
    --------
@@ -173,6 +176,7 @@ async def connect_async(
            region,
            host_override,
            read_consistency_interval_secs,
+            storage_options,
        )
    )

@@ -184,7 +188,6 @@ __all__ = [
    "AsyncTable",
    "URI",
    "sanitize_uri",
-    "sentry_log",
    "vector",
    "DBConnection",
    "LanceDBConnection",
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -19,10 +19,18 @@ class Connection(object):
        self, start_after: Optional[str], limit: Optional[int]
    ) -> list[str]: ...
    async def create_table(
-        self, name: str, mode: str, data: pa.RecordBatchReader
+        self,
+        name: str,
+        mode: str,
+        data: pa.RecordBatchReader,
+        storage_options: Optional[Dict[str, str]] = None,
    ) -> Table: ...
    async def create_empty_table(
-        self, name: str, mode: str, schema: pa.Schema
+        self,
+        name: str,
+        mode: str,
+        schema: pa.Schema,
+        storage_options: Optional[Dict[str, str]] = None,
    ) -> Table: ...

 class Table:
--- a/python/python/lancedb/cli/init.py
+++ b/python/python/lancedb/cli/init.py
@@ -1,12 +0,0 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
--- a/python/python/lancedb/cli/cli.py
+++ b/python/python/lancedb/cli/cli.py
@@ -1,47 +0,0 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import click
-
-from lancedb.utils import CONFIG
-
-
-@click.group()
-@click.version_option(help="LanceDB command line interface entry point")
-def cli():
-    "LanceDB command line interface"
-
-
-diagnostics_help = """
-Enable or disable LanceDB diagnostics. When enabled, LanceDB will send anonymous events
-to help us improve LanceDB. These diagnostics are used only for error reporting and no
-data is collected. You can find more about diagnosis on our docs:
-https://lancedb.github.io/lancedb/cli_config/
-"""
-
-
-@cli.command(help=diagnostics_help)
-@click.option("--enabled/--disabled", default=True)
-def diagnostics(enabled):
-    CONFIG.update({"diagnostics": True if enabled else False})
-    click.echo("LanceDB diagnostics is %s" % ("enabled" if enabled else "disabled"))
-
-
-@cli.command(help="Show current LanceDB configuration")
-def config():
-    # TODO: pretty print as table with colors and formatting
-    click.echo("Current LanceDB configuration:")
-    cfg = CONFIG.copy()
-    cfg.pop("uuid")  # Don't show uuid as it is not configurable
-    for item, amount in cfg.items():
-        click.echo("{} ({})".format(item, amount))
--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -18,14 +18,13 @@ import inspect
 import os
 from abc import abstractmethod
 from pathlib import Path
-from typing import TYPE_CHECKING, Iterable, List, Literal, Optional, Union
+from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union

 import pyarrow as pa
 from overrides import EnforceOverrides, override
 from pyarrow import fs

 from lancedb.common import data_to_reader, validate_schema
-from lancedb.utils.events import register_event

 from ._lancedb import connect as lancedb_connect
 from .pydantic import LanceModel
@@ -534,6 +533,7 @@ class AsyncConnection(object):
        exist_ok: Optional[bool] = None,
        on_bad_vectors: Optional[str] = None,
        fill_value: Optional[float] = None,
+        storage_options: Optional[Dict[str, str]] = None,
    ) -> AsyncTable:
        """Create an [AsyncTable][lancedb.table.AsyncTable] in the database.

@@ -571,6 +571,12 @@ class AsyncConnection(object):
            One of "error", "drop", "fill".
        fill_value: float
            The value to use when filling vectors. Only used if on_bad_vectors="fill".
+        storage_options: dict, optional
+            Additional options for the storage backend. Options already set on the
+            connection will be inherited by the table, but can be overridden here.
+            See available options at
+            https://lancedb.github.io/lancedb/guides/storage/
+

        Returns
        -------
@@ -730,32 +736,40 @@ class AsyncConnection(object):
            mode = "exist_ok"

        if data is None:
-            new_table = await self._inner.create_empty_table(name, mode, schema)
+            new_table = await self._inner.create_empty_table(
+                name, mode, schema, storage_options=storage_options
+            )
        else:
            data = data_to_reader(data, schema)
            new_table = await self._inner.create_table(
                name,
                mode,
                data,
+                storage_options=storage_options,
            )

-        register_event("create_table")
        return AsyncTable(new_table)

-    async def open_table(self, name: str) -> Table:
+    async def open_table(
+        self, name: str, storage_options: Optional[Dict[str, str]] = None
+    ) -> Table:
        """Open a Lance Table in the database.

        Parameters
        ----------
        name: str
            The name of the table.
+        storage_options: dict, optional
+            Additional options for the storage backend. Options already set on the
+            connection will be inherited by the table, but can be overridden here.
+            See available options at
+            https://lancedb.github.io/lancedb/guides/storage/

        Returns
        -------
        A LanceTable object representing the table.
        """
-        table = await self._inner.open_table(name)
-        register_event("open_table")
+        table = await self._inner.open_table(name, storage_options)
        return AsyncTable(table)

    async def drop_table(self, name: str):
--- a/python/python/lancedb/embeddings/init.py
+++ b/python/python/lancedb/embeddings/init.py
@@ -10,7 +10,6 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 # ruff: noqa: F401
 from .base import EmbeddingFunction, EmbeddingFunctionConfig, TextEmbeddingFunction
 from .bedrock import BedRockText
@@ -21,4 +20,7 @@ from .open_clip import OpenClipEmbeddings
 from .openai import OpenAIEmbeddings
 from .registry import EmbeddingFunctionRegistry, get_registry
 from .sentence_transformers import SentenceTransformerEmbeddings
+from .gte import GteEmbeddings
+from .transformers import TransformersEmbeddingFunction, ColbertEmbeddings
+from .imagebind import ImageBindEmbeddings
 from .utils import with_embeddings
--- a/python/python/lancedb/embeddings/bedrock.py
+++ b/python/python/lancedb/embeddings/bedrock.py
@@ -78,6 +78,9 @@ class BedRockText(TextEmbeddingFunction):

        class Config:
            keep_untouched = (cached_property,)
+    else:
+        model_config = dict()
+        model_config["ignored_types"] = (cached_property,)

    def ndims(self):
        # return len(self._generate_embedding("test"))
--- a/python/python/lancedb/embeddings/gemini_text.py
+++ b/python/python/lancedb/embeddings/gemini_text.py
@@ -94,6 +94,9 @@ class GeminiText(TextEmbeddingFunction):

        class Config:
            keep_untouched = (cached_property,)
+    else:
+        model_config = dict()
+        model_config["ignored_types"] = (cached_property,)

    def ndims(self):
        # TODO: fix hardcoding
--- a/python/python/lancedb/embeddings/imagebind.py
+++ b/python/python/lancedb/embeddings/imagebind.py
@@ -22,6 +22,8 @@ from .base import EmbeddingFunction
 from .registry import register
 from .utils import AUDIO, IMAGES, TEXT

+from lancedb.pydantic import PYDANTIC_VERSION
+

@register("imagebind")
 class ImageBindEmbeddings(EmbeddingFunction):
@@ -38,6 +40,14 @@ class ImageBindEmbeddings(EmbeddingFunction):
    device: str = "cpu"
    normalize: bool = False

+    if PYDANTIC_VERSION < (2, 0):  # Pydantic 1.x compat
+
+        class Config:
+            keep_untouched = (cached_property,)
+    else:
+        model_config = dict()
+        model_config["ignored_types"] = (cached_property,)
+
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._ndims = 1024
--- a/python/python/lancedb/embeddings/transformers.py
+++ b/python/python/lancedb/embeddings/transformers.py
@@ -0,0 +1,106 @@
+#  Copyright (c) 2023. LanceDB Developers
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from functools import cached_property
+from typing import List, Any
+
+import numpy as np
+
+from pydantic import PrivateAttr
+from lancedb.pydantic import PYDANTIC_VERSION
+
+from ..util import attempt_import_or_raise
+from .base import EmbeddingFunction
+from .registry import register
+from .utils import TEXT
+
+
+@register("huggingface")
+class TransformersEmbeddingFunction(EmbeddingFunction):
+    """
+    An embedding function that can use any model from the transformers library.
+
+    Parameters:
+    ----------
+    name : str
+        The name of the model to use. This should be a model name that can be loaded
+        by transformers.AutoModel.from_pretrained. For example, "bert-base-uncased".
+        default: "colbert-ir/colbertv2.0""
+
+    to download package, run :
+        `pip install transformers`
+    you may need to install pytorch as well - `https://pytorch.org/get-started/locally/`
+
+    """
+
+    name: str = "colbert-ir/colbertv2.0"
+    _tokenizer: Any = PrivateAttr()
+    _model: Any = PrivateAttr()
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._ndims = None
+        transformers = attempt_import_or_raise("transformers")
+        self._tokenizer = transformers.AutoTokenizer.from_pretrained(self.name)
+        self._model = transformers.AutoModel.from_pretrained(self.name)
+
+    if PYDANTIC_VERSION < (2, 0):  # Pydantic 1.x compat
+
+        class Config:
+            keep_untouched = (cached_property,)
+    else:
+        model_config = dict()
+        model_config["ignored_types"] = (cached_property,)
+
+    def ndims(self):
+        self._ndims = self._model.config.hidden_size
+        return self._ndims
+
+    def compute_query_embeddings(self, query: str, *args, **kwargs) -> List[np.array]:
+        return self.compute_source_embeddings(query)
+
+    def compute_source_embeddings(self, texts: TEXT, *args, **kwargs) -> List[np.array]:
+        texts = self.sanitize_input(texts)
+        embedding = []
+        for text in texts:
+            encoding = self._tokenizer(
+                text, return_tensors="pt", padding=True, truncation=True
+            )
+            emb = self._model(**encoding).last_hidden_state.mean(dim=1).squeeze()
+            embedding.append(emb.detach().numpy())
+
+        return embedding
+
+
+@register("colbert")
+class ColbertEmbeddings(TransformersEmbeddingFunction):
+    """
+    An embedding function that uses the colbert model from the huggingface library.
+
+    Parameters:
+    ----------
+    name : str
+        The name of the model to use. This should be a model name that can be loaded
+        by transformers.AutoModel.from_pretrained. For example, "bert-base-uncased".
+        default: "colbert-ir/colbertv2.0""
+
+    to download package, run :
+        `pip install transformers`
+    you may need to install pytorch as well - `https://pytorch.org/get-started/locally/`
+
+    """
+
+    name: str = "colbert-ir/colbertv2.0"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
--- a/python/python/lancedb/embeddings/utils.py
+++ b/python/python/lancedb/embeddings/utils.py
@@ -19,15 +19,14 @@ import sys
 import time
 import urllib.error
 import weakref
+import logging
 from typing import Callable, List, Union
-
 import numpy as np
 import pyarrow as pa
 from lance.vector import vec_to_table
 from retry import retry

 from ..util import deprecated, safe_import_pandas
-from ..utils.general import LOGGER

 pd = safe_import_pandas()

@@ -256,7 +255,7 @@ def retry_with_exponential_backoff(
                    )

                delay *= exponential_base * (1 + jitter * random.random())
-                LOGGER.info(f"Retrying in {delay:.2f} seconds due to {e}")
+                logging.info("Retrying in %s seconds...", delay)
                time.sleep(delay)

    return wrapper
@@ -277,5 +276,5 @@ def url_retrieve(url: str):


 def api_key_not_found_help(provider):
-    LOGGER.error(f"Could not find API key for {provider}.")
+    logging.error("Could not find API key for %s", provider)
    raise ValueError(f"Please set the {provider.upper()}_API_KEY environment variable.")
--- a/python/python/lancedb/remote/db.py
+++ b/python/python/lancedb/remote/db.py
@@ -18,6 +18,7 @@ from concurrent.futures import ThreadPoolExecutor
 from typing import Iterable, List, Optional, Union
 from urllib.parse import urlparse

+from cachetools import TTLCache
 import pyarrow as pa
 from overrides import override

@@ -29,7 +30,6 @@ from ..table import Table, _sanitize_data
 from ..util import validate_table_name
 from .arrow import to_ipc_binary
 from .client import ARROW_STREAM_CONTENT_TYPE, RestfulLanceDBClient
-from .errors import LanceDBClientError


 class RemoteDBConnection(DBConnection):
@@ -60,6 +60,7 @@ class RemoteDBConnection(DBConnection):
            read_timeout=read_timeout,
        )
        self._request_thread_pool = request_thread_pool
+        self._table_cache = TTLCache(maxsize=10000, ttl=300)

    def __repr__(self) -> str:
        return f"RemoteConnect(name={self.db_name})"
@@ -89,6 +90,7 @@ class RemoteDBConnection(DBConnection):
            else:
                break
            for item in result:
+                self._table_cache[item] = True
                yield item

    @override
@@ -109,16 +111,10 @@ class RemoteDBConnection(DBConnection):
        self._client.mount_retry_adapter_for_table(name)

        # check if table exists
-        try:
+        if self._table_cache.get(name) is None:
            self._client.post(f"/v1/table/{name}/describe/")
-        except LanceDBClientError as err:
-            if str(err).startswith("Not found"):
-                logging.error(
-                    "Table %s does not exist. Please first call "
-                    "db.create_table(%s, data).",
-                    name,
-                    name,
-                )
+            self._table_cache[name] = True
+
        return RemoteTable(self, name)

    @override
@@ -267,6 +263,7 @@ class RemoteDBConnection(DBConnection):
            content_type=ARROW_STREAM_CONTENT_TYPE,
        )

+        self._table_cache[name] = True
        return RemoteTable(self, name)

    @override
@@ -282,6 +279,7 @@ class RemoteDBConnection(DBConnection):
        self._client.post(
            f"/v1/table/{name}/drop/",
        )
+        self._table_cache.pop(name)

    async def close(self):
        """Close the connection to the database."""
--- a/python/python/lancedb/rerankers/cohere.py
+++ b/python/python/lancedb/rerankers/cohere.py
@@ -1,4 +1,5 @@
 import os
+import semver
 from functools import cached_property
 from typing import Union

@@ -42,6 +43,14 @@ class CohereReranker(Reranker):
    @cached_property
    def _client(self):
        cohere = attempt_import_or_raise("cohere")
+        # ensure version is at least 0.5.0
+        if (
+            hasattr(cohere, "__version__")
+            and semver.compare(cohere.__version__, "5.0.0") < 0
+        ):
+            raise ValueError(
+                f"cohere version must be at least 0.5.0, found {cohere.__version__}"
+            )
        if os.environ.get("COHERE_API_KEY") is None and self.api_key is None:
            raise ValueError(
                "COHERE_API_KEY not set. Either set it in your environment or \
@@ -51,11 +60,14 @@ class CohereReranker(Reranker):

    def _rerank(self, result_set: pa.Table, query: str):
        docs = result_set[self.column].to_pylist()
-        results = self._client.rerank(
+        response = self._client.rerank(
            query=query,
            documents=docs,
            top_n=self.top_n,
            model=self.model_name,
+        )
+        results = (
+            response.results
        )  # returns list (text, idx, relevance) attributes sorted descending by score
        indices, scores = list(
            zip(*[(result.index, result.relevance_score) for result in results])
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -53,7 +53,6 @@ from .util import (
    safe_import_polars,
    value_to_sql,
 )
-from .utils.events import register_event

 if TYPE_CHECKING:
    import PIL
@@ -96,6 +95,9 @@ def _sanitize_data(
                data.data.to_batches(), schema, metadata, on_bad_vectors, fill_value
            )

+    if isinstance(data, LanceModel):
+        raise ValueError("Cannot add a single LanceModel to a table. Use a list.")
+
    if isinstance(data, list):
        # convert to list of dict if data is a bunch of LanceModels
        if isinstance(data[0], LanceModel):
@@ -907,7 +909,6 @@ class LanceTable(Table):
                f"Table {name} does not exist."
                f"Please first call db.create_table({name}, data)"
            )
-        register_event("open_table")

        return tbl

@@ -1151,7 +1152,6 @@ class LanceTable(Table):
            accelerator=accelerator,
            index_cache_size=index_cache_size,
        )
-        register_event("create_index")

    def create_scalar_index(self, column: str, *, replace: bool = True):
        self._dataset_mut.create_scalar_index(
@@ -1211,7 +1211,6 @@ class LanceTable(Table):
            ordering_fields=ordering_field_names,
            writer_heap_size=writer_heap_size,
        )
-        register_event("create_fts_index")

    def _get_fts_index_path(self):
        return join_uri(self._dataset_uri, "_indices", "tantivy")
@@ -1259,7 +1258,6 @@ class LanceTable(Table):
        self._ref.dataset = lance.write_dataset(
            data, self._dataset_uri, schema=self.schema, mode=mode
        )
-        register_event("add")

    def merge(
        self,
@@ -1322,7 +1320,6 @@ class LanceTable(Table):
        self._ref.dataset = self._dataset_mut.merge(
            other_table, left_on=left_on, right_on=right_on, schema=schema
        )
-        register_event("merge")

    @cached_property
    def embedding_functions(self) -> dict:
@@ -1409,8 +1406,14 @@ class LanceTable(Table):
            vector and the returned vector.
        """
        if vector_column_name is None and query is not None:
+            try:
                vector_column_name = inf_vector_column_query(self.schema)
-        register_event("search_table")
+            except Exception as e:
+                if query_type == "fts":
+                    vector_column_name = ""
+                else:
+                    raise e
+
        return LanceQueryBuilder.create(
            self,
            query,
@@ -1537,7 +1540,6 @@ class LanceTable(Table):
        if data is not None:
            new_table.add(data)

-        register_event("create_table")
        return new_table

    def delete(self, where: str):
@@ -1596,7 +1598,6 @@ class LanceTable(Table):
            values_sql = {k: value_to_sql(v) for k, v in values.items()}

        self._dataset_mut.update(values_sql, where)
-        register_event("update")

    def _execute_query(
        self, query: Query, batch_size: Optional[int] = None
@@ -2113,7 +2114,6 @@ class AsyncTable:
        if isinstance(data, pa.Table):
            data = pa.RecordBatchReader.from_batches(data.schema, data.to_batches())
        await self._inner.add(data, mode)
-        register_event("add")

    def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
        """
--- a/python/python/lancedb/utils/init.py
+++ b/python/python/lancedb/utils/init.py
@@ -1,15 +0,0 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-from .config import Config
-
-CONFIG = Config()
--- a/python/python/lancedb/utils/config.py
+++ b/python/python/lancedb/utils/config.py
@@ -1,118 +0,0 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import copy
-import hashlib
-import os
-import platform
-import uuid
-from pathlib import Path
-
-from .general import LOGGER, is_dir_writeable, yaml_load, yaml_save
-
-
-def get_user_config_dir(sub_dir="lancedb"):
-    """
-    Get the user config directory.
-
-    Args:
-        sub_dir (str): The name of the subdirectory to create.
-
-    Returns:
-        (Path): The path to the user config directory.
-    """
-    # Return the appropriate config directory for each operating system
-    if platform.system() == "Windows":
-        path = Path.home() / "AppData" / "Roaming" / sub_dir
-    elif platform.system() == "Darwin":
-        path = Path.home() / "Library" / "Application Support" / sub_dir
-    elif platform.system() == "Linux":
-        path = Path.home() / ".config" / sub_dir
-    else:
-        raise ValueError(f"Unsupported operating system: {platform.system()}")
-
-    # GCP and AWS lambda fix, only /tmp is writeable
-    if not is_dir_writeable(path.parent):
-        LOGGER.warning(
-            f"WARNING ⚠️ user config directory '{path}' is not writeable, defaulting "
-            "to '/tmp' or CWD. Alternatively you can define a LANCEDB_CONFIG_DIR "
-            "environment variable for this path."
-        )
-        path = (
-            Path("/tmp") / sub_dir
-            if is_dir_writeable("/tmp")
-            else Path().cwd() / sub_dir
-        )
-
-    # Create the subdirectory if it does not exist
-    path.mkdir(parents=True, exist_ok=True)
-
-    return path
-
-
-USER_CONFIG_DIR = Path(os.getenv("LANCEDB_CONFIG_DIR") or get_user_config_dir())
-CONFIG_FILE = USER_CONFIG_DIR / "config.yaml"
-
-
-class Config(dict):
-    """
-    Manages lancedb config stored in a YAML file.
-
-    Args:
-        file (str | Path): Path to the lancedb config YAML file. Default is
-        USER_CONFIG_DIR / 'config.yaml'.
-    """
-
-    def __init__(self, file=CONFIG_FILE):
-        self.file = Path(file)
-        self.defaults = {  # Default global config values
-            "diagnostics": True,
-            "uuid": hashlib.sha256(str(uuid.getnode()).encode()).hexdigest(),
-        }
-
-        super().__init__(copy.deepcopy(self.defaults))
-
-        if not self.file.exists():
-            self.save()
-
-        self.load()
-        correct_keys = self.keys() == self.defaults.keys()
-        correct_types = all(
-            type(a) is type(b) for a, b in zip(self.values(), self.defaults.values())
-        )
-        if not (correct_keys and correct_types):
-            LOGGER.warning(
-                "WARNING ⚠️ LanceDB settings reset to default values. This may be due "
-                "to a possible problem with your settings or a recent package update. "
-                f"\nView settings & usage with 'lancedb settings' or at '{self.file}'"
-            )
-            self.reset()
-
-    def load(self):
-        """Loads settings from the YAML file."""
-        super().update(yaml_load(self.file))
-
-    def save(self):
-        """Saves the current settings to the YAML file."""
-        yaml_save(self.file, dict(self))
-
-    def update(self, *args, **kwargs):
-        """Updates a setting value in the current settings."""
-        super().update(*args, **kwargs)
-        self.save()
-
-    def reset(self):
-        """Resets the settings to default and saves them."""
-        self.clear()
-        self.update(self.defaults)
-        self.save()
--- a/python/python/lancedb/utils/events.py
+++ b/python/python/lancedb/utils/events.py
@@ -1,171 +0,0 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import datetime
-import importlib.metadata
-import platform
-import random
-import sys
-import time
-
-from lancedb.utils import CONFIG
-from lancedb.utils.general import TryExcept
-
-from .general import (
-    PLATFORMS,
-    get_git_origin_url,
-    is_git_dir,
-    is_github_actions_ci,
-    is_online,
-    is_pip_package,
-    is_pytest_running,
-    threaded_request,
-)
-
-
-class _Events:
-    """
-    A class for collecting anonymous event analytics. Event analytics are enabled when
-    ``diagnostics=True`` in config and disabled when ``diagnostics=False``.
-
-    You can enable or disable diagnostics by running ``lancedb diagnostics --enabled``
-    or ``lancedb diagnostics --disabled``.
-
-    Attributes
-    ----------
-    url : str
-        The URL to send anonymous events.
-    rate_limit : float
-        The rate limit in seconds for sending events.
-    metadata : dict
-        A dictionary containing metadata about the environment.
-    enabled : bool
-        A flag to enable or disable Events based on certain conditions.
-    """
-
-    _instance = None
-
-    url = "https://app.posthog.com/capture/"
-    headers = {"Content-Type": "application/json"}
-    api_key = "phc_oENDjGgHtmIDrV6puUiFem2RB4JA8gGWulfdulmMdZP"
-    # This api-key is write only and is safe to expose in the codebase.
-
-    def __init__(self):
-        """
-        Initializes the Events object with default values for events, rate_limit,
-        and metadata.
-        """
-        self.events = []  # events list
-        self.throttled_event_names = ["search_table"]
-        self.throttled_events = set()
-        self.max_events = 5  # max events to store in memory
-        self.rate_limit = 60.0 * 60.0  # rate limit (seconds)
-        self.time = 0.0
-
-        if is_git_dir():
-            install = "git"
-        elif is_pip_package():
-            install = "pip"
-        else:
-            install = "other"
-        self.metadata = {
-            "cli": sys.argv[0],
-            "install": install,
-            "python": ".".join(platform.python_version_tuple()[:2]),
-            "version": importlib.metadata.version("lancedb"),
-            "platforms": PLATFORMS,
-            "session_id": round(random.random() * 1e15),
-            # TODO: In future we might be interested in this metric
-            # 'engagement_time_msec': 1000
-        }
-
-        TESTS_RUNNING = is_pytest_running() or is_github_actions_ci()
-        ONLINE = is_online()
-        self.enabled = (
-            CONFIG["diagnostics"]
-            and not TESTS_RUNNING
-            and ONLINE
-            and (
-                is_pip_package()
-                or get_git_origin_url() == "https://github.com/lancedb/lancedb.git"
-            )
-        )
-
-    def __call__(self, event_name, params={}):
-        """
-        Attempts to add a new event to the events list and send events if the rate
-        limit is reached.
-
-        Args
-        ----
-        event_name : str
-            The name of the event to be logged.
-        params : dict, optional
-            A dictionary of additional parameters to be logged with the event.
-        """
-        ### NOTE: We might need a way to tag a session with a label to check usage
-        ### from a source. Setting label should be exposed to the user.
-        if not self.enabled:
-            return
-        if (
-            len(self.events) < self.max_events
-        ):  # Events list limited to self.max_events (drop any events past this)
-            params.update(self.metadata)
-            event = {
-                "event": event_name,
-                "properties": params,
-                "timestamp": datetime.datetime.now(
-                    tz=datetime.timezone.utc
-                ).isoformat(),
-                "distinct_id": CONFIG["uuid"],
-            }
-            if event_name not in self.throttled_event_names:
-                self.events.append(event)
-            elif event_name not in self.throttled_events:
-                self.throttled_events.add(event_name)
-                self.events.append(event)
-
-        # Check rate limit
-        t = time.time()
-        if (t - self.time) < self.rate_limit:
-            return
-        # Time is over rate limiter, send now
-        data = {
-            "api_key": self.api_key,
-            "distinct_id": CONFIG["uuid"],  # posthog needs this to accepts the event
-            "batch": self.events,
-        }
-        # POST equivalent to requests.post(self.url, json=data).
-        # threaded request is used to avoid blocking, retries are disabled, and
-        # verbose is disabled to avoid any possible disruption in the console.
-        threaded_request(
-            method="post",
-            url=self.url,
-            headers=self.headers,
-            json=data,
-            retry=0,
-            verbose=False,
-        )
-
-        # Flush & Reset
-        self.events = []
-        self.throttled_events = set()
-        self.time = t
-
-
-@TryExcept(verbose=False)
-def register_event(name: str, **kwargs):
-    if _Events._instance is None:
-        _Events._instance = _Events()
-
-    _Events._instance(name, **kwargs)
--- a/python/python/lancedb/utils/general.py
+++ b/python/python/lancedb/utils/general.py
@@ -1,454 +0,0 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import contextlib
-import importlib
-import logging.config
-import os
-import platform
-import subprocess
-import sys
-import threading
-import time
-from pathlib import Path
-from typing import Union
-
-import requests
-import yaml
-
-LOGGING_NAME = "lancedb"
-VERBOSE = (
-    str(os.getenv("LANCEDB_VERBOSE", True)).lower() == "true"
-)  # global verbose mode
-
-
-def set_logging(name=LOGGING_NAME, verbose=True):
-    """Sets up logging for the given name.
-
-    Parameters
-    ----------
-    name : str, optional
-        The name of the logger. Default is 'lancedb'.
-    verbose : bool, optional
-        Whether to enable verbose logging. Default is True.
-    """
-
-    rank = int(os.getenv("RANK", -1))  # rank in world for Multi-GPU trainings
-    level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR
-    logging.config.dictConfig(
-        {
-            "version": 1,
-            "disable_existing_loggers": False,
-            "formatters": {name: {"format": "%(message)s"}},
-            "handlers": {
-                name: {
-                    "class": "logging.StreamHandler",
-                    "formatter": name,
-                    "level": level,
-                }
-            },
-            "loggers": {name: {"level": level, "handlers": [name], "propagate": False}},
-        }
-    )
-
-
-set_logging(LOGGING_NAME, verbose=VERBOSE)
-LOGGER = logging.getLogger(LOGGING_NAME)
-
-
-def is_pip_package(filepath: str = __name__) -> bool:
-    """Determines if the file at the given filepath is part of a pip package.
-
-    Parameters
-    ----------
-    filepath : str, optional
-        The filepath to check. Default is the current file.
-
-    Returns
-    -------
-    bool
-        True if the file is part of a pip package, False otherwise.
-    """
-    # Get the spec for the module
-    spec = importlib.util.find_spec(filepath)
-
-    # Return whether the spec is not None and the origin is not None (indicating
-    # it is a package)
-    return spec is not None and spec.origin is not None
-
-
-def is_pytest_running():
-    """Determines whether pytest is currently running or not.
-
-    Returns
-    -------
-    bool
-        True if pytest is running, False otherwise.
-    """
-    return (
-        ("PYTEST_CURRENT_TEST" in os.environ)
-        or ("pytest" in sys.modules)
-        or ("pytest" in Path(sys.argv[0]).stem)
-    )
-
-
-def is_github_actions_ci() -> bool:
-    """
-    Determine if the current environment is a GitHub Actions CI Python runner.
-
-    Returns
-    -------
-    bool
-        True if the current environment is a GitHub Actions CI Python runner,
-        False otherwise.
-    """
-
-    return (
-        "GITHUB_ACTIONS" in os.environ
-        and "RUNNER_OS" in os.environ
-        and "RUNNER_TOOL_CACHE" in os.environ
-    )
-
-
-def is_git_dir():
-    """
-    Determines whether the current file is part of a git repository.
-    If the current file is not part of a git repository, returns None.
-
-    Returns
-    -------
-    bool
-        True if current file is part of a git repository.
-    """
-    return get_git_dir() is not None
-
-
-def is_online() -> bool:
-    """
-    Check internet connectivity by attempting to connect to a known online host.
-
-    Returns
-    -------
-    bool
-        True if connection is successful, False otherwise.
-    """
-    import socket
-
-    for host in "1.1.1.1", "8.8.8.8", "223.5.5.5":  # Cloudflare, Google, AliDNS:
-        try:
-            test_connection = socket.create_connection(address=(host, 53), timeout=2)
-        except (socket.timeout, socket.gaierror, OSError):  # noqa: PERF203
-            continue
-        else:
-            # If the connection was successful, close it to avoid a ResourceWarning
-            test_connection.close()
-            return True
-    return False
-
-
-def is_dir_writeable(dir_path: Union[str, Path]) -> bool:
-    """Check if a directory is writeable.
-
-    Parameters
-    ----------
-    dir_path : Union[str, Path]
-        The path to the directory.
-
-    Returns
-    -------
-    bool
-        True if the directory is writeable, False otherwise.
-    """
-    return os.access(str(dir_path), os.W_OK)
-
-
-def is_colab():
-    """Check if the current script is running inside a Google Colab notebook.
-
-    Returns
-    -------
-    bool
-        True if running inside a Colab notebook, False otherwise.
-    """
-    return "COLAB_RELEASE_TAG" in os.environ or "COLAB_BACKEND_VERSION" in os.environ
-
-
-def is_kaggle():
-    """Check if the current script is running inside a Kaggle kernel.
-
-    Returns
-    -------
-    bool
-        True if running inside a Kaggle kernel, False otherwise.
-    """
-    return (
-        os.environ.get("PWD") == "/kaggle/working"
-        and os.environ.get("KAGGLE_URL_BASE") == "https://www.kaggle.com"
-    )
-
-
-def is_jupyter():
-    """Check if the current script is running inside a Jupyter Notebook.
-
-    Returns
-    -------
-    bool
-        True if running inside a Jupyter Notebook, False otherwise.
-    """
-    with contextlib.suppress(Exception):
-        from IPython import get_ipython
-
-        return get_ipython() is not None
-    return False
-
-
-def is_docker() -> bool:
-    """Determine if the script is running inside a Docker container.
-
-    Returns
-    -------
-    bool
-        True if the script is running inside a Docker container, False otherwise.
-    """
-    file = Path("/proc/self/cgroup")
-    if file.exists():
-        with open(file) as f:
-            return "docker" in f.read()
-    else:
-        return False
-
-
-def get_git_dir():
-    """Determine whether the current file is part of a git repository and if so,
-    returns the repository root directory.
-    If the current file is not part of a git repository, returns None.
-
-    Returns
-    -------
-    Path | None
-        Git root directory if found or None if not found.
-    """
-    for d in Path(__file__).parents:
-        if (d / ".git").is_dir():
-            return d
-
-
-def get_git_origin_url():
-    """Retrieve the origin URL of a git repository.
-
-    Returns
-    -------
-    str | None
-        The origin URL of the git repository or None if not git directory.
-    """
-    if is_git_dir():
-        with contextlib.suppress(subprocess.CalledProcessError):
-            origin = subprocess.check_output(
-                ["git", "config", "--get", "remote.origin.url"]
-            )
-            return origin.decode().strip()
-
-
-def yaml_save(file="data.yaml", data=None, header=""):
-    """Save YAML data to a file.
-
-    Parameters
-    ----------
-    file : str, optional
-        File name, by default 'data.yaml'.
-    data : dict, optional
-        Data to save in YAML format, by default None.
-    header : str, optional
-        YAML header to add, by default "".
-    """
-    if data is None:
-        data = {}
-    file = Path(file)
-    if not file.parent.exists():
-        # Create parent directories if they don't exist
-        file.parent.mkdir(parents=True, exist_ok=True)
-
-    # Convert Path objects to strings
-    for k, v in data.items():
-        if isinstance(v, Path):
-            data[k] = str(v)
-
-    # Dump data to file in YAML format
-    with open(file, "w", errors="ignore", encoding="utf-8") as f:
-        if header:
-            f.write(header)
-        yaml.safe_dump(data, f, sort_keys=False, allow_unicode=True)
-
-
-def yaml_load(file="data.yaml", append_filename=False):
-    """
-    Load YAML data from a file.
-
-    Parameters
-    ----------
-    file : str, optional
-        File name. Default is 'data.yaml'.
-    append_filename : bool, optional
-        Add the YAML filename to the YAML dictionary. Default is False.
-
-    Returns
-    -------
-    dict
-        YAML data and file name.
-    """
-    assert Path(file).suffix in (
-        ".yaml",
-        ".yml",
-    ), f"Attempting to load non-YAML file {file} with yaml_load()"
-    with open(file, errors="ignore", encoding="utf-8") as f:
-        s = f.read()  # string
-
-        # Add YAML filename to dict and return
-        data = (
-            yaml.safe_load(s) or {}
-        )  # always return a dict (yaml.safe_load() may return None for empty files)
-        if append_filename:
-            data["yaml_file"] = str(file)
-        return data
-
-
-def yaml_print(yaml_file: Union[str, Path, dict]) -> None:
-    """
-    Pretty prints a YAML file or a YAML-formatted dictionary.
-
-    Parameters
-    ----------
-    yaml_file : Union[str, Path, dict]
-        The file path of the YAML file or a YAML-formatted dictionary.
-
-    Returns
-    -------
-    None
-    """
-    yaml_dict = (
-        yaml_load(yaml_file) if isinstance(yaml_file, (str, Path)) else yaml_file
-    )
-    dump = yaml.dump(yaml_dict, sort_keys=False, allow_unicode=True)
-    LOGGER.info("Printing '%s'\n\n%s", yaml_file, dump)
-
-
-PLATFORMS = [platform.system()]
-if is_colab():
-    PLATFORMS.append("Colab")
-if is_kaggle():
-    PLATFORMS.append("Kaggle")
-if is_jupyter():
-    PLATFORMS.append("Jupyter")
-if is_docker():
-    PLATFORMS.append("Docker")
-
-PLATFORMS = "|".join(PLATFORMS)
-
-
-class TryExcept(contextlib.ContextDecorator):
-    """
-    TryExcept context manager.
-    Usage: @TryExcept() decorator or 'with TryExcept():' context manager.
-    """
-
-    def __init__(self, msg="", verbose=True):
-        """
-        Parameters
-        ----------
-        msg : str, optional
-            Custom message to display in case of exception, by default "".
-        verbose : bool, optional
-            Whether to display the message, by default True.
-        """
-        self.msg = msg
-        self.verbose = verbose
-
-    def __enter__(self):
-        pass
-
-    def __exit__(self, exc_type, value, traceback):
-        if self.verbose and value:
-            LOGGER.info("%s%s%s", self.msg, ": " if self.msg else "", value)
-        return True
-
-
-def threaded_request(
-    method, url, retry=3, timeout=30, thread=True, code=-1, verbose=True, **kwargs
-):
-    """
-    Makes an HTTP request using the 'requests' library, with exponential backoff
-    retries up to a specified timeout.
-
-    Parameters
-    ----------
-    method : str
-        The HTTP method to use for the request. Choices are 'post' and 'get'.
-    url : str
-        The URL to make the request to.
-    retry : int, optional
-        Number of retries to attempt before giving up, by default 3.
-    timeout : int, optional
-        Timeout in seconds after which the function will give up retrying,
-        by default 30.
-    thread : bool, optional
-        Whether to execute the request in a separate daemon thread, by default True.
-    code : int, optional
-        An identifier for the request, used for logging purposes, by default -1.
-    verbose : bool, optional
-        A flag to determine whether to print out to console or not, by default True.
-
-    Returns
-    -------
-    requests.Response
-        The HTTP response object. If the request is executed in a separate thread,
-        returns the thread itself.
-    """
-    # retry only these codes TODO: add codes if needed in future (500, 408)
-    retry_codes = ()
-
-    @TryExcept(verbose=verbose)
-    def func(method, url, **kwargs):
-        """Make HTTP requests with retries and timeouts, with optional progress
-        tracking.
-        """
-        response = None
-        t0 = time.time()
-        for i in range(retry + 1):
-            if (time.time() - t0) > timeout:
-                break
-            response = requests.request(method, url, **kwargs)
-            if response.status_code < 300:  # good return codes in the 2xx range
-                break
-            try:
-                m = response.json().get("message", "No JSON message.")
-            except AttributeError:
-                m = "Unable to read JSON."
-            if i == 0:
-                if response.status_code in retry_codes:
-                    m += f" Retrying {retry}x for {timeout}s." if retry else ""
-                elif response.status_code == 429:  # rate limit
-                    m = "Rate limit reached"
-                if verbose:
-                    LOGGER.warning("%s #%s", response.status_code, m)
-                if response.status_code not in retry_codes:
-                    return response
-            time.sleep(2**i)  # exponential standoff
-        return response
-
-    args = method, url
-    if thread:
-        return threading.Thread(
-            target=func, args=args, kwargs=kwargs, daemon=True
-        ).start()
-    else:
-        return func(*args, **kwargs)
--- a/python/python/lancedb/utils/sentry_log.py
+++ b/python/python/lancedb/utils/sentry_log.py
@@ -1,119 +0,0 @@
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import bdb
-import importlib.metadata
-import logging
-import sys
-from pathlib import Path
-
-from lancedb.utils import CONFIG
-
-from .general import (
-    PLATFORMS,
-    TryExcept,
-    is_git_dir,
-    is_github_actions_ci,
-    is_online,
-    is_pip_package,
-    is_pytest_running,
-)
-
-
-@TryExcept(verbose=False)
-def set_sentry():
-    """
-    Initialize the Sentry SDK for error tracking and reporting. Only used if
-    sentry_sdk package is installed and sync=True in settings. Run 'lancedb settings'
-    to see and update settings YAML file.
-
-    Conditions required to send errors (ALL conditions must be met or no errors will
-    be reported):
-        - sentry_sdk package is installed
-        - sync=True in  settings
-        - pytest is not running
-        - running in a pip package installation
-        - running in a non-git directory
-        - online environment
-
-    The function also configures Sentry SDK to ignore KeyboardInterrupt and
-    FileNotFoundError exceptions for now.
-
-    Additionally, the function sets custom tags and user information for Sentry
-    events.
-    """
-
-    def before_send(event, hint):
-        """
-        Modify the event before sending it to Sentry based on specific exception
-        types and messages.
-
-        Args:
-            event (dict): The event dictionary containing information about the error.
-            hint (dict): A dictionary containing additional information about
-                         the error.
-
-        Returns:
-            dict: The modified event or None if the event should not be sent
-                  to Sentry.
-        """
-        if "exc_info" in hint:
-            exc_type, exc_value, tb = hint["exc_info"]
-            ignored_errors = ["out of memory", "no space left on device", "testing"]
-            if any(error in str(exc_value).lower() for error in ignored_errors):
-                return None
-
-        if is_git_dir():
-            install = "git"
-        elif is_pip_package():
-            install = "pip"
-        else:
-            install = "other"
-
-        event["tags"] = {
-            "sys_argv": sys.argv[0],
-            "sys_argv_name": Path(sys.argv[0]).name,
-            "install": install,
-            "platforms": PLATFORMS,
-            "version": importlib.metadata.version("lancedb"),
-        }
-        return event
-
-    TESTS_RUNNING = is_pytest_running() or is_github_actions_ci()
-    ONLINE = is_online()
-    if CONFIG["diagnostics"] and not TESTS_RUNNING and ONLINE and is_pip_package():
-        # and not is_git_dir(): # not running inside a git dir. Maybe too restrictive?
-
-        # If sentry_sdk package is not installed then return and do not use Sentry
-        try:
-            import sentry_sdk  # noqa
-        except ImportError:
-            return
-
-        sentry_sdk.init(
-            dsn="https://c63ef8c64e05d1aa1a96513361f3ca2f@o4505950840946688.ingest.sentry.io/4505950933614592",
-            debug=False,
-            include_local_variables=False,
-            traces_sample_rate=0.5,
-            environment="production",  # 'dev' or 'production'
-            before_send=before_send,
-            ignore_errors=[KeyboardInterrupt, FileNotFoundError, bdb.BdbQuit],
-        )
-        sentry_sdk.set_user({"id": CONFIG["uuid"]})  # SHA-256 anonymized UUID hash
-
-        # Disable all sentry logging
-        for logger in "sentry_sdk", "sentry_sdk.errors":
-            logging.getLogger(logger).setLevel(logging.CRITICAL)
-
-
-set_sentry()
--- a/python/python/tests/test_cli.py
+++ b/python/python/tests/test_cli.py
@@ -1,31 +0,0 @@
-from click.testing import CliRunner
-from lancedb.cli.cli import cli
-from lancedb.utils import CONFIG
-
-
-def test_entry():
-    runner = CliRunner()
-    result = runner.invoke(cli)
-    assert result.exit_code == 0  # Main check
-    assert "lancedb" in result.output.lower()  # lazy check
-
-
-def test_diagnostics():
-    runner = CliRunner()
-    result = runner.invoke(cli, ["diagnostics", "--disabled"])
-    assert result.exit_code == 0  # Main check
-    assert not CONFIG["diagnostics"]
-
-    result = runner.invoke(cli, ["diagnostics", "--enabled"])
-    assert result.exit_code == 0  # Main check
-    assert CONFIG["diagnostics"]
-
-
-def test_config():
-    runner = CliRunner()
-    result = runner.invoke(cli, ["config"])
-    assert result.exit_code == 0  # Main check
-    cfg = CONFIG.copy()
-    cfg.pop("uuid")
-    for item in cfg:  # check for keys only as formatting is subject to change
-        assert item in result.output
--- a/python/python/tests/test_db.py
+++ b/python/python/tests/test_db.py
@@ -28,13 +28,25 @@ def test_basic(tmp_path):
    assert db.uri == str(tmp_path)
    assert db.table_names() == []

+    class SimpleModel(LanceModel):
+        item: str
+        price: float
+        vector: Vector(2)
+
    table = db.create_table(
        "test",
        data=[
            {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
        ],
+        schema=SimpleModel,
    )
+
+    with pytest.raises(
+        ValueError, match="Cannot add a single LanceModel to a table. Use a list."
+    ):
+        table.add(SimpleModel(item="baz", price=30.0, vector=[1.0, 2.0]))
+
    rs = table.search([100, 100]).limit(1).to_pandas()
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "bar"
@@ -43,6 +55,11 @@ def test_basic(tmp_path):
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "foo"

+    table.create_fts_index(["item"])
+    rs = table.search("bar", query_type="fts").to_pandas()
+    assert len(rs) == 1
+    assert rs["item"].iloc[0] == "bar"
+
    assert db.table_names() == ["test"]
    assert "test" in db
    assert len(db) == 1
--- a/python/python/tests/test_embeddings_slow.py
+++ b/python/python/tests/test_embeddings_slow.py
@@ -45,7 +45,7 @@ except Exception:


@pytest.mark.slow
-@pytest.mark.parametrize("alias", ["sentence-transformers", "openai"])
+@pytest.mark.parametrize("alias", ["sentence-transformers", "openai", "huggingface"])
 def test_basic_text_embeddings(alias, tmp_path):
    db = lancedb.connect(tmp_path)
    registry = get_registry()
@@ -84,7 +84,7 @@ def test_basic_text_embeddings(alias, tmp_path):
        )
    )

-    query = "greetings"
+    query = "greeting"
    actual = (
        table.search(query, vector_column_name="vector").limit(1).to_pydantic(Words)[0]
    )
@@ -184,9 +184,9 @@ def test_imagebind(tmp_path):
    import shutil
    import tempfile

-    import lancedb.embeddings.imagebind
    import pandas as pd
    import requests
+
    from lancedb.embeddings import get_registry
    from lancedb.pydantic import LanceModel, Vector

@@ -321,8 +321,6 @@ def test_gemini_embedding(tmp_path):
 )
@pytest.mark.slow
 def test_gte_embedding(tmp_path):
-    import lancedb.embeddings.gte
-
    model = get_registry().get("gte-text").create()

    class TextModel(LanceModel):
--- a/python/python/tests/test_s3.py
+++ b/python/python/tests/test_s3.py
@@ -0,0 +1,158 @@
+#  Copyright 2024 Lance Developers
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import asyncio
+import copy
+
+import pytest
+import pyarrow as pa
+import lancedb
+
+
+# These are all keys that are accepted by storage_options
+CONFIG = {
+    "allow_http": "true",
+    "aws_access_key_id": "ACCESSKEY",
+    "aws_secret_access_key": "SECRETKEY",
+    "aws_endpoint": "http://localhost:4566",
+    "aws_region": "us-east-1",
+}
+
+
+def get_boto3_client(*args, **kwargs):
+    import boto3
+
+    return boto3.client(
+        *args,
+        region_name=CONFIG["aws_region"],
+        aws_access_key_id=CONFIG["aws_access_key_id"],
+        aws_secret_access_key=CONFIG["aws_secret_access_key"],
+        **kwargs,
+    )
+
+
+@pytest.fixture(scope="module")
+def s3_bucket():
+    s3 = get_boto3_client("s3", endpoint_url=CONFIG["aws_endpoint"])
+    bucket_name = "lance-integtest"
+    # if bucket exists, delete it
+    try:
+        delete_bucket(s3, bucket_name)
+    except s3.exceptions.NoSuchBucket:
+        pass
+    s3.create_bucket(Bucket=bucket_name)
+    yield bucket_name
+
+    delete_bucket(s3, bucket_name)
+
+
+def delete_bucket(s3, bucket_name):
+    # Delete all objects first
+    for obj in s3.list_objects(Bucket=bucket_name).get("Contents", []):
+        s3.delete_object(Bucket=bucket_name, Key=obj["Key"])
+    s3.delete_bucket(Bucket=bucket_name)
+
+
+@pytest.mark.s3_test
+def test_s3_lifecycle(s3_bucket: str):
+    storage_options = copy.copy(CONFIG)
+
+    uri = f"s3://{s3_bucket}/test_lifecycle"
+    data = pa.table({"x": [1, 2, 3]})
+
+    async def test():
+        db = await lancedb.connect_async(uri, storage_options=storage_options)
+
+        table = await db.create_table("test", schema=data.schema)
+        assert await table.count_rows() == 0
+
+        table = await db.create_table("test", data, mode="overwrite")
+        assert await table.count_rows() == 3
+
+        await table.add(data, mode="append")
+        assert await table.count_rows() == 6
+
+        table = await db.open_table("test")
+        assert await table.count_rows() == 6
+
+        await db.drop_table("test")
+
+        await db.drop_database()
+
+    asyncio.run(test())
+
+
+@pytest.fixture()
+def kms_key():
+    kms = get_boto3_client("kms", endpoint_url=CONFIG["aws_endpoint"])
+    key_id = kms.create_key()["KeyMetadata"]["KeyId"]
+    yield key_id
+    kms.schedule_key_deletion(KeyId=key_id, PendingWindowInDays=7)
+
+
+def validate_objects_encrypted(bucket: str, path: str, kms_key: str):
+    s3 = get_boto3_client("s3", endpoint_url=CONFIG["aws_endpoint"])
+    objects = s3.list_objects_v2(Bucket=bucket, Prefix=path)["Contents"]
+    for obj in objects:
+        info = s3.head_object(Bucket=bucket, Key=obj["Key"])
+        assert info["ServerSideEncryption"] == "aws:kms", (
+            "object %s not encrypted" % obj["Key"]
+        )
+        assert info["SSEKMSKeyId"].endswith(kms_key), (
+            "object %s not encrypted with correct key" % obj["Key"]
+        )
+
+
+@pytest.mark.s3_test
+def test_s3_sse(s3_bucket: str, kms_key: str):
+    storage_options = copy.copy(CONFIG)
+
+    uri = f"s3://{s3_bucket}/test_lifecycle"
+    data = pa.table({"x": [1, 2, 3]})
+
+    async def test():
+        # Create a table with SSE
+        db = await lancedb.connect_async(uri, storage_options=storage_options)
+
+        table = await db.create_table(
+            "table1",
+            schema=data.schema,
+            storage_options={
+                "aws_server_side_encryption": "aws:kms",
+                "aws_sse_kms_key_id": kms_key,
+            },
+        )
+        await table.add(data)
+        await table.update({"x": "1"})
+
+        path = "test_lifecycle/table1.lance"
+        validate_objects_encrypted(s3_bucket, path, kms_key)
+
+        # Test we can set encryption at connection level too.
+        db = await lancedb.connect_async(
+            uri,
+            storage_options=dict(
+                aws_server_side_encryption="aws:kms",
+                aws_sse_kms_key_id=kms_key,
+                **storage_options,
+            ),
+        )
+
+        table = await db.create_table("table2", schema=data.schema)
+        await table.add(data)
+        await table.update({"x": "1"})
+
+        path = "test_lifecycle/table2.lance"
+        validate_objects_encrypted(s3_bucket, path, kms_key)
+
+    asyncio.run(test())
--- a/python/python/tests/test_telemetry.py
+++ b/python/python/tests/test_telemetry.py
@@ -1,60 +0,0 @@
-import json
-
-import lancedb
-import pytest
-from lancedb.utils.events import _Events
-
-
-@pytest.fixture(autouse=True)
-def request_log_path(tmp_path):
-    return tmp_path / "request.json"
-
-
-def mock_register_event(name: str, **kwargs):
-    if _Events._instance is None:
-        _Events._instance = _Events()
-
-    _Events._instance.enabled = True
-    _Events._instance.rate_limit = 0
-    _Events._instance(name, **kwargs)
-
-
-def test_event_reporting(monkeypatch, request_log_path, tmp_path) -> None:
-    def mock_request(**kwargs):
-        json_data = kwargs.get("json", {})
-        with open(request_log_path, "w") as f:
-            json.dump(json_data, f)
-
-    monkeypatch.setattr(
-        lancedb.table, "register_event", mock_register_event
-    )  # Force enable registering events and strip exception handling
-    monkeypatch.setattr(lancedb.utils.events, "threaded_request", mock_request)
-
-    db = lancedb.connect(tmp_path)
-    db.create_table(
-        "test",
-        data=[
-            {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
-            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
-        ],
-        mode="overwrite",
-    )
-
-    assert request_log_path.exists()  # test if event was registered
-
-    with open(request_log_path, "r") as f:
-        json_data = json.load(f)
-
-    # TODO: don't hardcode these here. Instead create a module level json scehma in
-    # lancedb.utils.events for better evolvability
-    batch_keys = ["api_key", "distinct_id", "batch"]
-    event_keys = ["event", "properties", "timestamp", "distinct_id"]
-    property_keys = ["cli", "install", "platforms", "version", "session_id"]
-
-    assert all([key in json_data for key in batch_keys])
-    assert all([key in json_data["batch"][0] for key in event_keys])
-    assert all([key in json_data["batch"][0]["properties"] for key in property_keys])
-
-    # cleanup & reset
-    monkeypatch.undo()
-    _Events._instance = None
--- a/python/src/connection.rs
+++ b/python/src/connection.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::{sync::Arc, time::Duration};
+use std::{collections::HashMap, sync::Arc, time::Duration};

 use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow};
 use lancedb::connection::{Connection as LanceConnection, CreateTableMode};
@@ -90,19 +90,21 @@ impl Connection {
        name: String,
        mode: &str,
        data: &PyAny,
+        storage_options: Option<HashMap<String, String>>,
    ) -> PyResult<&'a PyAny> {
        let inner = self_.get_inner()?.clone();

        let mode = Self::parse_create_mode_str(mode)?;

        let batches = ArrowArrayStreamReader::from_pyarrow(data)?;
+        let mut builder = inner.create_table(name, batches).mode(mode);
+
+        if let Some(storage_options) = storage_options {
+            builder = builder.storage_options(storage_options);
+        }
+
        future_into_py(self_.py(), async move {
-            let table = inner
-                .create_table(name, batches)
-                .mode(mode)
-                .execute()
-                .await
-                .infer_error()?;
+            let table = builder.execute().await.infer_error()?;
            Ok(Table::new(table))
        })
    }
@@ -112,6 +114,7 @@ impl Connection {
        name: String,
        mode: &str,
        schema: &PyAny,
+        storage_options: Option<HashMap<String, String>>,
    ) -> PyResult<&'a PyAny> {
        let inner = self_.get_inner()?.clone();

@@ -119,21 +122,31 @@ impl Connection {

        let schema = Schema::from_pyarrow(schema)?;

+        let mut builder = inner.create_empty_table(name, Arc::new(schema)).mode(mode);
+
+        if let Some(storage_options) = storage_options {
+            builder = builder.storage_options(storage_options);
+        }
+
        future_into_py(self_.py(), async move {
-            let table = inner
-                .create_empty_table(name, Arc::new(schema))
-                .mode(mode)
-                .execute()
-                .await
-                .infer_error()?;
+            let table = builder.execute().await.infer_error()?;
            Ok(Table::new(table))
        })
    }

-    pub fn open_table(self_: PyRef<'_, Self>, name: String) -> PyResult<&PyAny> {
+    #[pyo3(signature = (name, storage_options = None))]
+    pub fn open_table(
+        self_: PyRef<'_, Self>,
+        name: String,
+        storage_options: Option<HashMap<String, String>>,
+    ) -> PyResult<&PyAny> {
        let inner = self_.get_inner()?.clone();
+        let mut builder = inner.open_table(name);
+        if let Some(storage_options) = storage_options {
+            builder = builder.storage_options(storage_options);
+        }
        future_into_py(self_.py(), async move {
-            let table = inner.open_table(&name).execute().await.infer_error()?;
+            let table = builder.execute().await.infer_error()?;
            Ok(Table::new(table))
        })
    }
@@ -162,6 +175,7 @@ pub fn connect(
    region: Option<String>,
    host_override: Option<String>,
    read_consistency_interval: Option<f64>,
+    storage_options: Option<HashMap<String, String>>,
 ) -> PyResult<&PyAny> {
    future_into_py(py, async move {
        let mut builder = lancedb::connect(&uri);
@@ -178,6 +192,9 @@ pub fn connect(
            let read_consistency_interval = Duration::from_secs_f64(read_consistency_interval);
            builder = builder.read_consistency_interval(read_consistency_interval);
        }
+        if let Some(storage_options) = storage_options {
+            builder = builder.storage_options(storage_options);
+        }
        Ok(Connection::new(builder.execute().await.infer_error()?))
    })
 }
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.4.16"
+version = "0.4.17"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/ffi/node/src/lib.rs
+++ b/rust/ffi/node/src/lib.rs
@@ -12,19 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::sync::Arc;
-
-use async_trait::async_trait;
-use lance::io::ObjectStoreParams;
 use neon::prelude::*;
-use object_store::aws::{AwsCredential, AwsCredentialProvider};
-use object_store::CredentialProvider;
 use once_cell::sync::OnceCell;
 use tokio::runtime::Runtime;

 use lancedb::connect;
 use lancedb::connection::Connection;
-use lancedb::table::ReadParams;

 use crate::error::ResultExt;
 use crate::query::JsQuery;
@@ -44,33 +37,6 @@ struct JsDatabase {

 impl Finalize for JsDatabase {}

-// TODO: object_store didn't export this type so I copied it.
-// Make a request to object_store to export this type
-#[derive(Debug)]
-pub struct StaticCredentialProvider<T> {
-    credential: Arc<T>,
-}
-
-impl<T> StaticCredentialProvider<T> {
-    pub fn new(credential: T) -> Self {
-        Self {
-            credential: Arc::new(credential),
-        }
-    }
-}
-
-#[async_trait]
-impl<T> CredentialProvider for StaticCredentialProvider<T>
-where
-    T: std::fmt::Debug + Send + Sync,
-{
-    type Credential = T;
-
-    async fn get_credential(&self) -> object_store::Result<Arc<T>> {
-        Ok(Arc::clone(&self.credential))
-    }
-}
-
 fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {
    static RUNTIME: OnceCell<Runtime> = OnceCell::new();
    static LOG: OnceCell<()> = OnceCell::new();
@@ -82,29 +48,28 @@ fn runtime<'a, C: Context<'a>>(cx: &mut C) -> NeonResult<&'static Runtime> {

 fn database_new(mut cx: FunctionContext) -> JsResult<JsPromise> {
    let path = cx.argument::<JsString>(0)?.value(&mut cx);
-    let aws_creds = get_aws_creds(&mut cx, 1)?;
-    let region = get_aws_region(&mut cx, 4)?;
    let read_consistency_interval = cx
        .argument_opt(5)
        .and_then(|arg| arg.downcast::<JsNumber, _>(&mut cx).ok())
        .map(|v| v.value(&mut cx))
        .map(std::time::Duration::from_secs_f64);

+    let storage_options_js = cx.argument::<JsArray>(1)?.to_vec(&mut cx)?;
+    let mut storage_options: Vec<(String, String)> = Vec::with_capacity(storage_options_js.len());
+    for handle in storage_options_js {
+        let obj = handle.downcast::<JsArray, _>(&mut cx).unwrap();
+        let key = obj.get::<JsString, _, _>(&mut cx, 0)?.value(&mut cx);
+        let value = obj.get::<JsString, _, _>(&mut cx, 0)?.value(&mut cx);
+
+        storage_options.push((key, value));
+    }
+
    let rt = runtime(&mut cx)?;
    let channel = cx.channel();
    let (deferred, promise) = cx.promise();

-    let mut conn_builder = connect(&path);
-    if let Some(region) = region {
-        conn_builder = conn_builder.region(&region);
-    }
-    if let Some(aws_creds) = aws_creds {
-        conn_builder = conn_builder.aws_creds(AwsCredential {
-            key_id: aws_creds.key_id,
-            secret_key: aws_creds.secret_key,
-            token: aws_creds.token,
-        });
-    }
+    let mut conn_builder = connect(&path).storage_options(storage_options);
+
    if let Some(interval) = read_consistency_interval {
        conn_builder = conn_builder.read_consistency_interval(interval);
    }
@@ -143,93 +108,19 @@ fn database_table_names(mut cx: FunctionContext) -> JsResult<JsPromise> {
    Ok(promise)
 }

-/// Get AWS creds arguments from the context
-/// Consumes 3 arguments
-fn get_aws_creds(
-    cx: &mut FunctionContext,
-    arg_starting_location: i32,
-) -> NeonResult<Option<AwsCredential>> {
-    let secret_key_id = cx
-        .argument_opt(arg_starting_location)
-        .filter(|arg| arg.is_a::<JsString, _>(cx))
-        .and_then(|arg| arg.downcast_or_throw::<JsString, FunctionContext>(cx).ok())
-        .map(|v| v.value(cx));
-
-    let secret_key = cx
-        .argument_opt(arg_starting_location + 1)
-        .filter(|arg| arg.is_a::<JsString, _>(cx))
-        .and_then(|arg| arg.downcast_or_throw::<JsString, FunctionContext>(cx).ok())
-        .map(|v| v.value(cx));
-
-    let temp_token = cx
-        .argument_opt(arg_starting_location + 2)
-        .filter(|arg| arg.is_a::<JsString, _>(cx))
-        .and_then(|arg| arg.downcast_or_throw::<JsString, FunctionContext>(cx).ok())
-        .map(|v| v.value(cx));
-
-    match (secret_key_id, secret_key, temp_token) {
-        (Some(key_id), Some(key), optional_token) => Ok(Some(AwsCredential {
-            key_id,
-            secret_key: key,
-            token: optional_token,
-        })),
-        (None, None, None) => Ok(None),
-        _ => cx.throw_error("Invalid credentials configuration"),
-    }
-}
-
-fn get_aws_credential_provider(
-    cx: &mut FunctionContext,
-    arg_starting_location: i32,
-) -> NeonResult<Option<AwsCredentialProvider>> {
-    Ok(get_aws_creds(cx, arg_starting_location)?.map(|aws_cred| {
-        Arc::new(StaticCredentialProvider::new(aws_cred))
-            as Arc<dyn CredentialProvider<Credential = AwsCredential>>
-    }))
-}
-
-/// Get AWS region arguments from the context
-fn get_aws_region(cx: &mut FunctionContext, arg_location: i32) -> NeonResult<Option<String>> {
-    let region = cx
-        .argument_opt(arg_location)
-        .filter(|arg| arg.is_a::<JsString, _>(cx))
-        .map(|arg| arg.downcast_or_throw::<JsString, FunctionContext>(cx));
-
-    match region {
-        Some(Ok(region)) => Ok(Some(region.value(cx))),
-        None => Ok(None),
-        Some(Err(e)) => Err(e),
-    }
-}
-
 fn database_open_table(mut cx: FunctionContext) -> JsResult<JsPromise> {
    let db = cx
        .this()
        .downcast_or_throw::<JsBox<JsDatabase>, _>(&mut cx)?;
    let table_name = cx.argument::<JsString>(0)?.value(&mut cx);

-    let aws_creds = get_aws_credential_provider(&mut cx, 1)?;
-
-    let aws_region = get_aws_region(&mut cx, 4)?;
-
-    let params = ReadParams {
-        store_options: Some(ObjectStoreParams::with_aws_credentials(
-            aws_creds, aws_region,
-        )),
-        ..ReadParams::default()
-    };
-
    let rt = runtime(&mut cx)?;
    let channel = cx.channel();
    let database = db.database.clone();

    let (deferred, promise) = cx.promise();
    rt.spawn(async move {
-        let table_rst = database
-            .open_table(&table_name)
-            .lance_read_params(params)
-            .execute()
-            .await;
+        let table_rst = database.open_table(&table_name).execute().await;

        deferred.settle_with(&channel, move |mut cx| {
            let js_table = JsTable::from(table_rst.or_throw(&mut cx)?);
--- a/rust/ffi/node/src/table.rs
+++ b/rust/ffi/node/src/table.rs
@@ -17,7 +17,6 @@ use std::ops::Deref;
 use arrow_array::{RecordBatch, RecordBatchIterator};
 use lance::dataset::optimize::CompactionOptions;
 use lance::dataset::{ColumnAlteration, NewColumnTransform, WriteMode, WriteParams};
-use lance::io::ObjectStoreParams;
 use lancedb::table::{OptimizeAction, WriteOptions};

 use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer};
@@ -26,7 +25,7 @@ use neon::prelude::*;
 use neon::types::buffer::TypedArray;

 use crate::error::ResultExt;
-use crate::{convert, get_aws_credential_provider, get_aws_region, runtime, JsDatabase};
+use crate::{convert, runtime, JsDatabase};

 pub struct JsTable {
    pub table: LanceDbTable,
@@ -59,6 +58,10 @@ impl JsTable {
                return cx.throw_error("Table::create only supports 'overwrite' and 'create' modes")
            }
        };
+        let params = WriteParams {
+            mode,
+            ..WriteParams::default()
+        };

        let rt = runtime(&mut cx)?;
        let channel = cx.channel();
@@ -66,17 +69,6 @@ impl JsTable {
        let (deferred, promise) = cx.promise();
        let database = db.database.clone();

-        let aws_creds = get_aws_credential_provider(&mut cx, 3)?;
-        let aws_region = get_aws_region(&mut cx, 6)?;
-
-        let params = WriteParams {
-            store_params: Some(ObjectStoreParams::with_aws_credentials(
-                aws_creds, aws_region,
-            )),
-            mode,
-            ..WriteParams::default()
-        };
-
        rt.spawn(async move {
            let batch_reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema);
            let table_rst = database
@@ -112,13 +104,8 @@ impl JsTable {
            "overwrite" => WriteMode::Overwrite,
            s => return cx.throw_error(format!("invalid write mode {}", s)),
        };
-        let aws_creds = get_aws_credential_provider(&mut cx, 2)?;
-        let aws_region = get_aws_region(&mut cx, 5)?;

        let params = WriteParams {
-            store_params: Some(ObjectStoreParams::with_aws_credentials(
-                aws_creds, aws_region,
-            )),
            mode: write_mode,
            ..WriteParams::default()
        };
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.4.16"
+version = "0.4.17"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -46,8 +46,13 @@ tempfile = "3.5.0"
 rand = { version = "0.8.3", features = ["small_rng"] }
 uuid = { version = "1.7.0", features = ["v4"] }
 walkdir = "2"
+# For s3 integration tests (dev deps aren't allowed to be optional atm)
+aws-sdk-s3 = { version = "1.0" }
+aws-sdk-kms = { version = "1.0" }
+aws-config = { version = "1.0" }

 [features]
 default = ["remote"]
 remote = ["dep:reqwest"]
 fp16kernels = ["lance-linalg/fp16kernels"]
+s3-test = []
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -14,6 +14,7 @@

 //! LanceDB Database

+use std::collections::HashMap;
 use std::fs::create_dir_all;
 use std::path::Path;
 use std::sync::Arc;
@@ -22,9 +23,7 @@ use arrow_array::{RecordBatchIterator, RecordBatchReader};
 use arrow_schema::SchemaRef;
 use lance::dataset::{ReadParams, WriteMode};
 use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
-use object_store::{
-    aws::AwsCredential, local::LocalFileSystem, CredentialProvider, StaticCredentialProvider,
-};
+use object_store::{aws::AwsCredential, local::LocalFileSystem};
 use snafu::prelude::*;

 use crate::arrow::IntoArrow;
@@ -208,6 +207,50 @@ impl<const HAS_DATA: bool, T: IntoArrow> CreateTableBuilder<HAS_DATA, T> {
        self.mode = mode;
        self
    }
+
+    /// Set an option for the storage layer.
+    ///
+    /// Options already set on the connection will be inherited by the table,
+    /// but can be overridden here.
+    ///
+    /// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
+    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
+        let store_options = self
+            .write_options
+            .lance_write_params
+            .get_or_insert(Default::default())
+            .store_params
+            .get_or_insert(Default::default())
+            .storage_options
+            .get_or_insert(Default::default());
+        store_options.insert(key.into(), value.into());
+        self
+    }
+
+    /// Set multiple options for the storage layer.
+    ///
+    /// Options already set on the connection will be inherited by the table,
+    /// but can be overridden here.
+    ///
+    /// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
+    pub fn storage_options(
+        mut self,
+        pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
+    ) -> Self {
+        let store_options = self
+            .write_options
+            .lance_write_params
+            .get_or_insert(Default::default())
+            .store_params
+            .get_or_insert(Default::default())
+            .storage_options
+            .get_or_insert(Default::default());
+
+        for (key, value) in pairs {
+            store_options.insert(key.into(), value.into());
+        }
+        self
+    }
 }

 #[derive(Clone, Debug)]
@@ -252,6 +295,48 @@ impl OpenTableBuilder {
        self
    }

+    /// Set an option for the storage layer.
+    ///
+    /// Options already set on the connection will be inherited by the table,
+    /// but can be overridden here.
+    ///
+    /// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
+    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
+        let storage_options = self
+            .lance_read_params
+            .get_or_insert(Default::default())
+            .store_options
+            .get_or_insert(Default::default())
+            .storage_options
+            .get_or_insert(Default::default());
+        storage_options.insert(key.into(), value.into());
+        self
+    }
+
+    /// Set multiple options for the storage layer.
+    ///
+    /// Options already set on the connection will be inherited by the table,
+    /// but can be overridden here.
+    ///
+    /// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
+    pub fn storage_options(
+        mut self,
+        pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
+    ) -> Self {
+        let storage_options = self
+            .lance_read_params
+            .get_or_insert(Default::default())
+            .store_options
+            .get_or_insert(Default::default())
+            .storage_options
+            .get_or_insert(Default::default());
+
+        for (key, value) in pairs {
+            storage_options.insert(key.into(), value.into());
+        }
+        self
+    }
+
    /// Open the table
    pub async fn execute(self) -> Result<Table> {
        self.parent.clone().do_open_table(self).await
@@ -385,8 +470,7 @@ pub struct ConnectBuilder {
    /// LanceDB Cloud host override, only required if using an on-premises Lance Cloud instance
    host_override: Option<String>,

-    /// User provided AWS credentials
-    aws_creds: Option<AwsCredential>,
+    storage_options: HashMap<String, String>,

    /// The interval at which to check for updates from other processes.
    ///
@@ -409,8 +493,8 @@ impl ConnectBuilder {
            api_key: None,
            region: None,
            host_override: None,
-            aws_creds: None,
            read_consistency_interval: None,
+            storage_options: HashMap::new(),
        }
    }

@@ -430,8 +514,37 @@ impl ConnectBuilder {
    }

    /// [`AwsCredential`] to use when connecting to S3.
+    #[deprecated(note = "Pass through storage_options instead")]
    pub fn aws_creds(mut self, aws_creds: AwsCredential) -> Self {
-        self.aws_creds = Some(aws_creds);
+        self.storage_options
+            .insert("aws_access_key_id".into(), aws_creds.key_id.clone());
+        self.storage_options
+            .insert("aws_secret_access_key".into(), aws_creds.secret_key.clone());
+        if let Some(token) = &aws_creds.token {
+            self.storage_options
+                .insert("aws_session_token".into(), token.clone());
+        }
+        self
+    }
+
+    /// Set an option for the storage layer.
+    ///
+    /// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
+    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
+        self.storage_options.insert(key.into(), value.into());
+        self
+    }
+
+    /// Set multiple options for the storage layer.
+    ///
+    /// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
+    pub fn storage_options(
+        mut self,
+        pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
+    ) -> Self {
+        for (key, value) in pairs {
+            self.storage_options.insert(key.into(), value.into());
+        }
        self
    }

@@ -522,6 +635,9 @@ struct Database {
    pub(crate) store_wrapper: Option<Arc<dyn WrappingObjectStore>>,

    read_consistency_interval: Option<std::time::Duration>,
+
+    // Storage options to be inherited by tables created from this connection
+    storage_options: HashMap<String, String>,
 }

 impl std::fmt::Display for Database {
@@ -604,20 +720,11 @@ impl Database {
                };

                let plain_uri = url.to_string();
-                let os_params: ObjectStoreParams = if let Some(aws_creds) = &options.aws_creds {
-                    let credential_provider: Arc<
-                        dyn CredentialProvider<Credential = AwsCredential>,
-                    > = Arc::new(StaticCredentialProvider::new(AwsCredential {
-                        key_id: aws_creds.key_id.clone(),
-                        secret_key: aws_creds.secret_key.clone(),
-                        token: aws_creds.token.clone(),
-                    }));
-                    ObjectStoreParams::with_aws_credentials(
-                        Some(credential_provider),
-                        options.region.clone(),
-                    )
-                } else {
-                    ObjectStoreParams::default()
+
+                let storage_options = options.storage_options.clone();
+                let os_params = ObjectStoreParams {
+                    storage_options: Some(storage_options.clone()),
+                    ..Default::default()
                };
                let (object_store, base_path) =
                    ObjectStore::from_uri_and_params(&plain_uri, &os_params).await?;
@@ -641,6 +748,7 @@ impl Database {
                    object_store,
                    store_wrapper: write_store_wrapper,
                    read_consistency_interval: options.read_consistency_interval,
+                    storage_options,
                })
            }
            Err(_) => Self::open_path(uri, options.read_consistency_interval).await,
@@ -662,6 +770,7 @@ impl Database {
            object_store,
            store_wrapper: None,
            read_consistency_interval,
+            storage_options: HashMap::new(),
        })
    }

@@ -734,11 +843,26 @@ impl ConnectionInternal for Database {

    async fn do_create_table(
        &self,
-        options: CreateTableBuilder<false, NoData>,
+        mut options: CreateTableBuilder<false, NoData>,
        data: Box<dyn RecordBatchReader + Send>,
    ) -> Result<Table> {
        let table_uri = self.table_uri(&options.name)?;

+        // Inherit storage options from the connection
+        let storage_options = options
+            .write_options
+            .lance_write_params
+            .get_or_insert_with(Default::default)
+            .store_params
+            .get_or_insert_with(Default::default)
+            .storage_options
+            .get_or_insert_with(Default::default);
+        for (key, value) in self.storage_options.iter() {
+            if !storage_options.contains_key(key) {
+                storage_options.insert(key.clone(), value.clone());
+            }
+        }
+
        let mut write_params = options.write_options.lance_write_params.unwrap_or_default();
        if matches!(&options.mode, CreateTableMode::Overwrite) {
            write_params.mode = WriteMode::Overwrite;
@@ -768,8 +892,23 @@ impl ConnectionInternal for Database {
        }
    }

-    async fn do_open_table(&self, options: OpenTableBuilder) -> Result<Table> {
+    async fn do_open_table(&self, mut options: OpenTableBuilder) -> Result<Table> {
        let table_uri = self.table_uri(&options.name)?;
+
+        // Inherit storage options from the connection
+        let storage_options = options
+            .lance_read_params
+            .get_or_insert_with(Default::default)
+            .store_options
+            .get_or_insert_with(Default::default)
+            .storage_options
+            .get_or_insert_with(Default::default);
+        for (key, value) in self.storage_options.iter() {
+            if !storage_options.contains_key(key) {
+                storage_options.insert(key.clone(), value.clone());
+            }
+        }
+
        let native_table = Arc::new(
            NativeTable::open_with_params(
                &table_uri,
@@ -801,7 +940,10 @@ impl ConnectionInternal for Database {
    }

    async fn drop_db(&self) -> Result<()> {
-        todo!()
+        self.object_store
+            .remove_dir_all(self.base_path.clone())
+            .await?;
+        Ok(())
    }
 }

--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -14,6 +14,7 @@

 //! LanceDB Table APIs

+use std::collections::HashMap;
 use std::path::Path;
 use std::sync::Arc;

@@ -757,6 +758,8 @@ pub struct NativeTable {
    // the object store wrapper to use on write path
    store_wrapper: Option<Arc<dyn WrappingObjectStore>>,

+    storage_options: HashMap<String, String>,
+
    // This comes from the connection options. We store here so we can pass down
    // to the dataset when we recreate it (for example, in checkout_latest).
    read_consistency_interval: Option<std::time::Duration>,
@@ -822,6 +825,13 @@ impl NativeTable {
            None => params,
        };

+        let storage_options = params
+            .store_options
+            .clone()
+            .unwrap_or_default()
+            .storage_options
+            .unwrap_or_default();
+
        let dataset = DatasetBuilder::from_uri(uri)
            .with_read_params(params)
            .load()
@@ -840,6 +850,7 @@ impl NativeTable {
            uri: uri.to_string(),
            dataset,
            store_wrapper: write_store_wrapper,
+            storage_options,
            read_consistency_interval,
        })
    }
@@ -908,6 +919,13 @@ impl NativeTable {
            None => params,
        };

+        let storage_options = params
+            .store_params
+            .clone()
+            .unwrap_or_default()
+            .storage_options
+            .unwrap_or_default();
+
        let dataset = Dataset::write(batches, uri, Some(params))
            .await
            .map_err(|e| match e {
@@ -921,6 +939,7 @@ impl NativeTable {
            uri: uri.to_string(),
            dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval),
            store_wrapper: write_store_wrapper,
+            storage_options,
            read_consistency_interval,
        })
    }
@@ -1312,7 +1331,7 @@ impl TableInternal for NativeTable {
        add: AddDataBuilder<NoData>,
        data: Box<dyn RecordBatchReader + Send>,
    ) -> Result<()> {
-        let lance_params = add.write_options.lance_write_params.unwrap_or(WriteParams {
+        let mut lance_params = add.write_options.lance_write_params.unwrap_or(WriteParams {
            mode: match add.mode {
                AddDataMode::Append => WriteMode::Append,
                AddDataMode::Overwrite => WriteMode::Overwrite,
@@ -1320,6 +1339,18 @@ impl TableInternal for NativeTable {
            ..Default::default()
        });

+        // Bring storage options from table
+        let storage_options = lance_params
+            .store_params
+            .get_or_insert(Default::default())
+            .storage_options
+            .get_or_insert(Default::default());
+        for (key, value) in self.storage_options.iter() {
+            if !storage_options.contains_key(key) {
+                storage_options.insert(key.clone(), value.clone());
+            }
+        }
+
        // patch the params if we have a write store wrapper
        let lance_params = match self.store_wrapper.clone() {
            Some(wrapper) => lance_params.patch_with_store_wrapper(wrapper)?,
--- a/rust/lancedb/tests/object_store_test.rs
+++ b/rust/lancedb/tests/object_store_test.rs
@@ -0,0 +1,290 @@
+// Copyright 2023 LanceDB Developers.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#![cfg(feature = "s3-test")]
+use std::sync::Arc;
+
+use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
+use arrow_schema::{DataType, Field, Schema};
+
+use aws_config::{BehaviorVersion, ConfigLoader, Region, SdkConfig};
+use aws_sdk_s3::{config::Credentials, types::ServerSideEncryption, Client as S3Client};
+use lancedb::Result;
+
+const CONFIG: &[(&str, &str)] = &[
+    ("access_key_id", "ACCESS_KEY"),
+    ("secret_access_key", "SECRET_KEY"),
+    ("endpoint", "http://127.0.0.1:4566"),
+    ("allow_http", "true"),
+];
+
+async fn aws_config() -> SdkConfig {
+    let credentials = Credentials::new(CONFIG[0].1, CONFIG[1].1, None, None, "static");
+    ConfigLoader::default()
+        .credentials_provider(credentials)
+        .endpoint_url(CONFIG[2].1)
+        .behavior_version(BehaviorVersion::latest())
+        .region(Region::new("us-east-1"))
+        .load()
+        .await
+}
+
+struct S3Bucket(String);
+
+impl S3Bucket {
+    async fn new(bucket: &str) -> Self {
+        let config = aws_config().await;
+        let client = S3Client::new(&config);
+
+        // In case it wasn't deleted earlier
+        Self::delete_bucket(client.clone(), bucket).await;
+
+        client.create_bucket().bucket(bucket).send().await.unwrap();
+
+        Self(bucket.to_string())
+    }
+
+    async fn delete_bucket(client: S3Client, bucket: &str) {
+        // Before we delete the bucket, we need to delete all objects in it
+        let res = client
+            .list_objects_v2()
+            .bucket(bucket)
+            .send()
+            .await
+            .map_err(|err| err.into_service_error());
+        match res {
+            Err(e) if e.is_no_such_bucket() => return,
+            Err(e) => panic!("Failed to list objects in bucket: {}", e),
+            _ => {}
+        }
+        let objects = res.unwrap().contents.unwrap_or_default();
+        for object in objects {
+            client
+                .delete_object()
+                .bucket(bucket)
+                .key(object.key.unwrap())
+                .send()
+                .await
+                .unwrap();
+        }
+        client.delete_bucket().bucket(bucket).send().await.unwrap();
+    }
+}
+
+impl Drop for S3Bucket {
+    fn drop(&mut self) {
+        let bucket_name = self.0.clone();
+        tokio::task::spawn(async move {
+            let config = aws_config().await;
+            let client = S3Client::new(&config);
+            Self::delete_bucket(client, &bucket_name).await;
+        });
+    }
+}
+
+fn test_data() -> RecordBatch {
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Int32, false),
+        Field::new("b", DataType::Utf8, false),
+    ]));
+    RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(Int32Array::from(vec![1, 2, 3])),
+            Arc::new(StringArray::from(vec!["a", "b", "c"])),
+        ],
+    )
+    .unwrap()
+}
+
+#[tokio::test]
+async fn test_minio_lifecycle() -> Result<()> {
+    // test create, update, drop, list on localstack minio
+    let bucket = S3Bucket::new("test-bucket").await;
+    let uri = format!("s3://{}", bucket.0);
+
+    let db = lancedb::connect(&uri)
+        .storage_options(CONFIG.iter().cloned())
+        .execute()
+        .await?;
+
+    let data = test_data();
+    let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
+
+    let table = db.create_table("test_table", data).execute().await?;
+
+    let row_count = table.count_rows(None).await?;
+    assert_eq!(row_count, 3);
+
+    let table_names = db.table_names().execute().await?;
+    assert_eq!(table_names, vec!["test_table"]);
+
+    // Re-open the table
+    let table = db.open_table("test_table").execute().await?;
+    let row_count = table.count_rows(None).await?;
+    assert_eq!(row_count, 3);
+
+    let data = test_data();
+    let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
+    table.add(data).execute().await?;
+
+    db.drop_table("test_table").await?;
+
+    Ok(())
+}
+
+struct KMSKey(String);
+
+impl KMSKey {
+    async fn new() -> Self {
+        let config = aws_config().await;
+        let client = aws_sdk_kms::Client::new(&config);
+        let key = client
+            .create_key()
+            .description("test key")
+            .send()
+            .await
+            .unwrap()
+            .key_metadata
+            .unwrap()
+            .key_id;
+        Self(key)
+    }
+}
+
+impl Drop for KMSKey {
+    fn drop(&mut self) {
+        let key_id = self.0.clone();
+        tokio::task::spawn(async move {
+            let config = aws_config().await;
+            let client = aws_sdk_kms::Client::new(&config);
+            client
+                .schedule_key_deletion()
+                .key_id(&key_id)
+                .send()
+                .await
+                .unwrap();
+        });
+    }
+}
+
+async fn validate_objects_encrypted(bucket: &str, path: &str, kms_key_id: &str) {
+    // Get S3 client
+    let config = aws_config().await;
+    let client = S3Client::new(&config);
+
+    // list the objects are the path
+    let objects = client
+        .list_objects_v2()
+        .bucket(bucket)
+        .prefix(path)
+        .send()
+        .await
+        .unwrap()
+        .contents
+        .unwrap();
+
+    let mut errors = vec![];
+    let mut correctly_encrypted = vec![];
+
+    // For each object, call head
+    for object in &objects {
+        let head = client
+            .head_object()
+            .bucket(bucket)
+            .key(object.key().unwrap())
+            .send()
+            .await
+            .unwrap();
+
+        // Verify the object is encrypted
+        if head.server_side_encryption() != Some(&ServerSideEncryption::AwsKms) {
+            errors.push(format!("Object {} is not encrypted", object.key().unwrap()));
+            continue;
+        }
+        if !(head
+            .ssekms_key_id()
+            .map(|arn| arn.ends_with(kms_key_id))
+            .unwrap_or(false))
+        {
+            errors.push(format!(
+                "Object {} has wrong key id: {:?}, vs expected: {}",
+                object.key().unwrap(),
+                head.ssekms_key_id(),
+                kms_key_id
+            ));
+            continue;
+        }
+        correctly_encrypted.push(object.key().unwrap().to_string());
+    }
+
+    if !errors.is_empty() {
+        panic!(
+            "{} of {} correctly encrypted: {:?}\n{} of {} not correct: {:?}",
+            correctly_encrypted.len(),
+            objects.len(),
+            correctly_encrypted,
+            errors.len(),
+            objects.len(),
+            errors
+        );
+    }
+}
+
+#[tokio::test]
+async fn test_encryption() -> Result<()> {
+    // test encryption on localstack minio
+    let bucket = S3Bucket::new("test-encryption").await;
+    let key = KMSKey::new().await;
+
+    let uri = format!("s3://{}", bucket.0);
+    let db = lancedb::connect(&uri)
+        .storage_options(CONFIG.iter().cloned())
+        .execute()
+        .await?;
+
+    // Create a table with encryption
+    let data = test_data();
+    let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
+
+    let mut builder = db.create_table("test_table", data);
+    for (key, value) in CONFIG {
+        builder = builder.storage_option(*key, *value);
+    }
+    let table = builder
+        .storage_option("aws_server_side_encryption", "aws:kms")
+        .storage_option("aws_sse_kms_key_id", &key.0)
+        .execute()
+        .await?;
+    validate_objects_encrypted(&bucket.0, "test_table", &key.0).await;
+
+    table.delete("a = 1").await?;
+    validate_objects_encrypted(&bucket.0, "test_table", &key.0).await;
+
+    // Test we can set encryption at the connection level.
+    let db = lancedb::connect(&uri)
+        .storage_options(CONFIG.iter().cloned())
+        .storage_option("aws_server_side_encryption", "aws:kms")
+        .storage_option("aws_sse_kms_key_id", &key.0)
+        .execute()
+        .await?;
+
+    let table = db.open_table("test_table").execute().await?;
+
+    let data = test_data();
+    let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
+    table.add(data).execute().await?;
+    validate_objects_encrypted(&bucket.0, "test_table", &key.0).await;
+
+    Ok(())
+}