Bump version: 0.21.0-beta.2 → 0.21.0

Bump version: 0.21.0-beta.1 → 0.21.0-beta.2
feat: upgrade to lance 0.24.1 (#2199 )
2026-01-06 11:52:57 +00:00 · 2025-03-10 23:12:56 +00:00 · 2025-03-10 23:12:56 +00:00 · 2025-03-10 15:18:37 -07:00 · 2025-03-10 15:00:53 -07:00 · 2025-03-10 09:01:23 -07:00
192 changed files with 8790 additions and 3505 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.15.1-beta.2"
+current_version = "0.18.0-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -106,6 +106,18 @@ jobs:
        python ci/mock_openai.py &
        cd nodejs/examples
        npm test
+    - name: Check docs
+      run: |
+        # We run this as part of the job because the binary needs to be built
+        # first to export the types of the native code.
+        set -e
+        npm ci
+        npm run docs
+        if ! git diff --exit-code; then
+          echo "Docs need to be updated"
+          echo "Run 'npm run docs', fix any warnings, and commit the changes."
+          exit 1
+        fi
  macos:
    timeout-minutes: 30
    runs-on: "macos-14"
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -33,13 +33,14 @@ jobs:
          python-version: "3.12"
      - name: Install ruff
        run: |
-          pip install ruff==0.8.4
+          pip install ruff==0.9.9
      - name: Format check
        run: ruff format --check .
      - name: Lint
        run: ruff check .
-  doctest:
-    name: "Doctest"
+
+  type-check:
+    name: "Type Check"
    timeout-minutes: 30
    runs-on: "ubuntu-22.04"
    defaults:
@@ -54,7 +55,36 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.12"
+      - name: Install protobuf compiler
+        run: |
+          sudo apt update
+          sudo apt install -y protobuf-compiler
+          pip install toml
+      - name: Install dependencies
+        run: |
+          python ../ci/parse_requirements.py pyproject.toml --extras dev,tests,embeddings > requirements.txt
+          pip install -r requirements.txt
+      - name: Run pyright
+        run: pyright
+
+  doctest:
+    name: "Doctest"
+    timeout-minutes: 30
+    runs-on: "ubuntu-24.04"
+    defaults:
+      run:
+        shell: bash
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
          cache: "pip"
      - name: Install protobuf
        run: |
@@ -75,8 +105,8 @@ jobs:
    timeout-minutes: 30
    strategy:
      matrix:
-        python-minor-version: ["9", "11"]
-    runs-on: "ubuntu-22.04"
+        python-minor-version: ["9", "12"]
+    runs-on: "ubuntu-24.04"
    defaults:
      run:
        shell: bash
@@ -127,7 +157,7 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.12"
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: python
@@ -157,7 +187,7 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
-          python-version: "3.11"
+          python-version: "3.12"
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: python
@@ -168,7 +198,7 @@ jobs:
        run: rm -rf target/wheels
  pydantic1x:
    timeout-minutes: 30
-    runs-on: "ubuntu-22.04"
+    runs-on: "ubuntu-24.04"
    defaults:
      run:
        shell: bash
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -61,7 +61,12 @@ jobs:
      CXX: clang++
    steps:
      - uses: actions/checkout@v4
-      # Remote cargo.lock to force a fresh build
+      # Building without a lock file often requires the latest Rust version since downstream
+      # dependencies may have updated their minimum Rust version.
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          toolchain: "stable"
+      # Remove cargo.lock to force a fresh build
      - name: Remove Cargo.lock
        run: rm -f Cargo.lock
      - uses: rui314/setup-mold@v1
@@ -179,15 +184,17 @@ jobs:
    steps:
      - name: Checkout
        uses: actions/checkout@v4
-      - name: Install dependencies
+      - name: Install dependencies (part 1)
        run: |
          set -e
          apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
-
-          curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y
-          source $HOME/.cargo/env
-          rustup target add aarch64-pc-windows-msvc
-
+      - name: Install rust
+        uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          target: aarch64-pc-windows-msvc
+      - name: Install dependencies (part 2)
+        run: |
+          set -e
          mkdir -p sysroot
          cd sysroot
          sh ../ci/sysroot-aarch64-pc-windows-msvc.sh
@@ -259,7 +266,7 @@ jobs:
      - name: Install Rust
        run: |
          Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
-          .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
+          .\rustup-init.exe -y --default-host aarch64-pc-windows-msvc --default-toolchain 1.83.0
        shell: powershell
      - name: Add Rust to PATH
        run: |
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,9 +7,15 @@ repos:
      - id: trailing-whitespace
  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
-    rev: v0.2.2
+    rev: v0.9.9
    hooks:
      - id: ruff
+  # - repo: https://github.com/RobertCraigie/pyright-python
+  #   rev: v1.1.395
+  #   hooks:
+  #     - id: pyright
+  #       args: ["--project", "python"]
+  #       additional_dependencies: [pyarrow-stubs]
  - repo: local
    hooks:
      - id: local-biome-check
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,44 +21,52 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.23.0", "features" = [
-    "dynamodb",
-], git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
-lance-io = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
-lance-index = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
-lance-linalg = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
-lance-table = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
-lance-testing = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
-lance-datafusion = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
-lance-encoding = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
+lance = { "version" = "=0.24.1", "features" = ["dynamodb"] }
+lance-io = { version = "=0.24.1" }
+lance-index = { version = "=0.24.1" }
+lance-linalg = { version = "=0.24.1" }
+lance-table = { version = "=0.24.1" }
+lance-testing = { version = "=0.24.1" }
+lance-datafusion = { version = "=0.24.1" }
+lance-encoding = { version = "=0.24.1" }
 # Note that this one does not include pyarrow
-arrow = { version = "53.2", optional = false }
-arrow-array = "53.2"
-arrow-data = "53.2"
-arrow-ipc = "53.2"
-arrow-ord = "53.2"
-arrow-schema = "53.2"
-arrow-arith = "53.2"
-arrow-cast = "53.2"
+arrow = { version = "54.1", optional = false }
+arrow-array = "54.1"
+arrow-data = "54.1"
+arrow-ipc = "54.1"
+arrow-ord = "54.1"
+arrow-schema = "54.1"
+arrow-arith = "54.1"
+arrow-cast = "54.1"
 async-trait = "0"
-chrono = "0.4.35"
-datafusion-common = "44.0"
-datafusion-physical-plan = "44.0"
-env_logger = "0.10"
+datafusion = { version = "45.0", default-features = false }
+datafusion-catalog = "45.0"
+datafusion-common = { version = "45.0", default-features = false }
+datafusion-execution = "45.0"
+datafusion-expr = "45.0"
+datafusion-physical-plan = "45.0"
+env_logger = "0.11"
 half = { "version" = "=2.4.1", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
 log = "0.4"
-moka = { version = "0.11", features = ["future"] }
-object_store = "0.10.2"
+moka = { version = "0.12", features = ["future"] }
+object_store = "0.11.0"
 pin-project = "1.0.7"
-snafu = "0.7.4"
+snafu = "0.8"
 url = "2"
 num-traits = "0.2"
 rand = "0.8"
 regex = "1.10"
 lazy_static = "1"
+semver = "1.0.25"
+
+# Temporary pins to work around downstream issues
+# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
+chrono = "=0.4.39"
+# https://github.com/RustCrypto/formats/issues/1684
+base64ct = "=1.6.0"

 # Workaround for: https://github.com/eira-fransham/crunchy/issues/13
 crunchy = "=0.2.2"
--- a/ci/parse_requirements.py
+++ b/ci/parse_requirements.py
@@ -0,0 +1,41 @@
+import argparse
+import toml
+
+
+def parse_dependencies(pyproject_path, extras=None):
+    with open(pyproject_path, "r") as file:
+        pyproject = toml.load(file)
+
+    dependencies = pyproject.get("project", {}).get("dependencies", [])
+    for dependency in dependencies:
+        print(dependency)
+
+    optional_dependencies = pyproject.get("project", {}).get(
+        "optional-dependencies", {}
+    )
+
+    if extras:
+        for extra in extras.split(","):
+            for dep in optional_dependencies.get(extra, []):
+                print(dep)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate requirements.txt from pyproject.toml"
+    )
+    parser.add_argument("path", type=str, help="Path to pyproject.toml")
+    parser.add_argument(
+        "--extras",
+        type=str,
+        help="Comma-separated list of extras to include",
+        default="",
+    )
+
+    args = parser.parse_args()
+
+    parse_dependencies(args.path, args.extras)
+
+
+if __name__ == "__main__":
+    main()
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -4,6 +4,9 @@ repo_url: https://github.com/lancedb/lancedb
 edit_uri: https://github.com/lancedb/lancedb/tree/main/docs/src
 repo_name: lancedb/lancedb
 docs_dir: src
+watch:
+  - src
+  - ../python/python

 theme:
  name: "material"
@@ -63,6 +66,7 @@ plugins:
            - https://arrow.apache.org/docs/objects.inv
            - https://pandas.pydata.org/docs/objects.inv
            - https://lancedb.github.io/lance/objects.inv
+            - https://docs.pydantic.dev/latest/objects.inv
  - mkdocs-jupyter
  - render_swagger:
      allow_arbitrary_locations: true
@@ -178,6 +182,7 @@ nav:
                  - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
                  - Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
          - User-defined embedding functions: embeddings/custom_embedding_function.md
+          - Variables and secrets: embeddings/variables_and_secrets.md
          - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
          - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
      - 🔌 Integrations:
@@ -311,6 +316,7 @@ nav:
              - Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
              - Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
      - User-defined embedding functions: embeddings/custom_embedding_function.md
+      - Variables and secrets: embeddings/variables_and_secrets.md
      - "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
      - "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
  - Integrations:
@@ -371,6 +377,7 @@ extra_css:

 extra_javascript:
  - "extra_js/init_ask_ai_widget.js"
+  - "extra_js/reo.js"

 extra:
  analytics:
--- a/docs/openapi.yml
+++ b/docs/openapi.yml
@@ -38,6 +38,13 @@ components:
      required: true
      schema:
        type: string
+    index_name:
+      name: index_name
+      in: path
+      description: name of the index
+      required: true
+      schema:
+        type: string
  responses:
    invalid_request:
      description: Invalid request
@@ -485,3 +492,22 @@ paths:
          $ref: "#/components/responses/unauthorized"
        "404":
          $ref: "#/components/responses/not_found"
+  /v1/table/{name}/index/{index_name}/drop/:
+    post:
+      description: Drop an index from the table
+      tags:
+        - Tables
+      summary: Drop an index from the table
+      operationId: dropIndex
+      parameters:
+        - $ref: "#/components/parameters/table_name"
+        - $ref: "#/components/parameters/index_name"
+      responses:
+        "200":
+          description: Index successfully dropped
+        "400":
+          $ref: "#/components/responses/invalid_request"
+        "401":
+          $ref: "#/components/responses/unauthorized"
+        "404":
+          $ref: "#/components/responses/not_found"
--- a/docs/src/ann_indexes.ts
+++ b/docs/src/ann_indexes.ts
@@ -3,6 +3,7 @@ import * as vectordb from "vectordb";
 // --8<-- [end:import]

 (async () => {
+  console.log("ann_indexes.ts: start");
  // --8<-- [start:ingest]
  const db = await vectordb.connect("data/sample-lancedb");

@@ -49,5 +50,5 @@ import * as vectordb from "vectordb";
    .execute();
  // --8<-- [end:search3]

-  console.log("Ann indexes: done");
+  console.log("ann_indexes.ts: done");
 })();
--- a/docs/src/basic.md
+++ b/docs/src/basic.md
@@ -133,11 +133,20 @@ recommend switching to stable releases.
 ## Connect to a database

 === "Python"
+    === "Sync API"

        ```python
        --8<-- "python/python/tests/docs/test_basic.py:imports"
-    --8<-- "python/python/tests/docs/test_basic.py:connect"

+        --8<-- "python/python/tests/docs/test_basic.py:set_uri"
+        --8<-- "python/python/tests/docs/test_basic.py:connect"
+        ```
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:imports"
+
+        --8<-- "python/python/tests/docs/test_basic.py:set_uri"
        --8<-- "python/python/tests/docs/test_basic.py:connect_async"
        ```

@@ -183,19 +192,31 @@ table.

 === "Python"

-    ```python
-    --8<-- "python/python/tests/docs/test_basic.py:create_table"
-    --8<-- "python/python/tests/docs/test_basic.py:create_table_async"
-    ```
-
    If the table already exists, LanceDB will raise an error by default.
    If you want to overwrite the table, you can pass in `mode="overwrite"`
    to the `create_table` method.

+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:create_table"
+        ```
+
        You can also pass in a pandas DataFrame directly:

        ```python
        --8<-- "python/python/tests/docs/test_basic.py:create_table_pandas"
+        ```
+
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:create_table_async"
+        ```
+
+        You can also pass in a pandas DataFrame directly:
+
+        ```python
        --8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas"
        ```

@@ -247,8 +268,14 @@ similar to a `CREATE TABLE` statement in SQL.

 === "Python"

+    === "Sync API"
+
        ```python
        --8<-- "python/python/tests/docs/test_basic.py:create_empty_table"
+        ```
+    === "Async API"
+
+        ```python
        --8<-- "python/python/tests/docs/test_basic.py:create_empty_table_async"
        ```

@@ -281,8 +308,14 @@ Once created, you can open a table as follows:

 === "Python"

+    === "Sync API"
+
        ```python
        --8<-- "python/python/tests/docs/test_basic.py:open_table"
+        ```
+    === "Async API"
+
+        ```python
        --8<-- "python/python/tests/docs/test_basic.py:open_table_async"
        ```

@@ -310,8 +343,14 @@ If you forget the name of your table, you can always get a listing of all table

 === "Python"

+    === "Sync API"
+
        ```python
        --8<-- "python/python/tests/docs/test_basic.py:table_names"
+        ```
+    === "Async API"
+
+        ```python
        --8<-- "python/python/tests/docs/test_basic.py:table_names_async"
        ```

@@ -340,8 +379,14 @@ After a table has been created, you can always add more data to it as follows:

 === "Python"

+    === "Sync API"
+
        ```python
        --8<-- "python/python/tests/docs/test_basic.py:add_data"
+        ```
+    === "Async API"
+
+        ```python
        --8<-- "python/python/tests/docs/test_basic.py:add_data_async"
        ```

@@ -370,8 +415,14 @@ Once you've embedded the query, you can find its nearest neighbors as follows:

 === "Python"

+    === "Sync API"
+
        ```python
        --8<-- "python/python/tests/docs/test_basic.py:vector_search"
+        ```
+    === "Async API"
+
+        ```python
        --8<-- "python/python/tests/docs/test_basic.py:vector_search_async"
        ```

@@ -412,8 +463,14 @@ LanceDB allows you to create an ANN index on a table as follows:

 === "Python"

-    ```py
+    === "Sync API"
+
+        ```python
        --8<-- "python/python/tests/docs/test_basic.py:create_index"
+        ```
+    === "Async API"
+
+        ```python
        --8<-- "python/python/tests/docs/test_basic.py:create_index_async"
        ```

@@ -451,8 +508,14 @@ This can delete any number of rows that match the filter.

 === "Python"

+    === "Sync API"
+
        ```python
        --8<-- "python/python/tests/docs/test_basic.py:delete_rows"
+        ```
+    === "Async API"
+
+        ```python
        --8<-- "python/python/tests/docs/test_basic.py:delete_rows_async"
        ```

@@ -483,7 +546,10 @@ simple or complex as needed. To see what expressions are supported, see the

 === "Python"

+    === "Sync API"
        Read more: [lancedb.table.Table.delete][]
+    === "Async API"
+        Read more: [lancedb.table.AsyncTable.delete][]

 === "Typescript[^1]"

@@ -505,8 +571,14 @@ Use the `drop_table()` method on the database to remove a table.

 === "Python"

+    === "Sync API"
+
        ```python
        --8<-- "python/python/tests/docs/test_basic.py:drop_table"
+        ```
+    === "Async API"
+
+        ```python
        --8<-- "python/python/tests/docs/test_basic.py:drop_table_async"
        ```

@@ -543,10 +615,17 @@ You can use the embedding API when working with embedding models. It automatical

 === "Python"

+    === "Sync API"
+
        ```python
        --8<-- "python/python/tests/docs/test_embeddings_optional.py:imports"
+
        --8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings"
        ```
+    === "Async API"
+
+        Coming soon to the async API.
+        https://github.com/lancedb/lancedb/issues/1938

 === "Typescript[^1]"

--- a/docs/src/basic_legacy.ts
+++ b/docs/src/basic_legacy.ts
@@ -107,7 +107,6 @@ const example = async () => {
  // --8<-- [start:search]
  const query = await tbl.search([100, 100]).limit(2).execute();
  // --8<-- [end:search]
-  console.log(query);

  // --8<-- [start:delete]
  await tbl.delete('item = "fizz"');
@@ -119,8 +118,9 @@ const example = async () => {
 };

 async function main() {
+  console.log("basic_legacy.ts: start");
  await example();
-  console.log("Basic example: done");
+  console.log("basic_legacy.ts: done");
 }

 main();
--- a/docs/src/embeddings/custom_embedding_function.md
+++ b/docs/src/embeddings/custom_embedding_function.md
@@ -55,6 +55,14 @@ Let's implement `SentenceTransformerEmbeddings` class. All you need to do is imp

 This is a stripped down version of our implementation of `SentenceTransformerEmbeddings` that removes certain optimizations and default settings.

+!!! danger "Use sensitive keys to prevent leaking secrets"
+    To prevent leaking secrets, such as API keys, you should add any sensitive
+    parameters of an embedding function to the output of the
+    [sensitive_keys()][lancedb.embeddings.base.EmbeddingFunction.sensitive_keys] /
+    [getSensitiveKeys()](../../js/namespaces/embedding/classes/EmbeddingFunction/#getsensitivekeys)
+    method. This prevents users from accidentally instantiating the embedding
+    function with hard-coded secrets.
+
 Now you can use this embedding function to create your table schema and that's it! you can then ingest data and run queries without manually vectorizing the inputs.

 === "Python"
--- a/docs/src/embeddings/variables_and_secrets.md
+++ b/docs/src/embeddings/variables_and_secrets.md
@@ -0,0 +1,53 @@
+# Variable and Secrets
+
+Most embedding configuration options are saved in the table's metadata. However,
+this isn't always appropriate. For example, API keys should never be stored in the
+metadata. Additionally, other configuration options might be best set at runtime,
+such as the `device` configuration that controls whether to use GPU or CPU for
+inference. If you hardcoded this to GPU, you wouldn't be able to run the code on
+a server without one.
+
+To handle these cases, you can set variables on the embedding registry and
+reference them in the embedding configuration. These variables will be available
+during the runtime of your program, but not saved in the table's metadata. When
+the table is loaded from a different process, the variables must be set again.
+
+To set a variable, use the `set_var()` / `setVar()` method on the embedding registry.
+To reference a variable, use the syntax `$env:VARIABLE_NAME`. If there is a default
+value, you can use the syntax `$env:VARIABLE_NAME:DEFAULT_VALUE`.
+
+## Using variables to set secrets
+
+Sensitive configuration, such as API keys, must either be set as environment
+variables or using variables on the embedding registry. If you pass in a hardcoded
+value, LanceDB will raise an error. Instead, if you want to set an API key via
+configuration, use a variable:
+
+=== "Python"
+
+    ```python
+    --8<-- "python/python/tests/docs/test_embeddings_optional.py:register_secret"
+    ```
+
+=== "Typescript"
+
+    ```typescript
+    --8<-- "nodejs/examples/embedding.test.ts:register_secret"
+    ```
+
+## Using variables to set the device parameter
+
+Many embedding functions that run locally have a `device` parameter that controls
+whether to use GPU or CPU for inference. Because not all computers have a GPU,
+it's helpful to be able to set the `device` parameter at runtime, rather than
+have it hard coded in the embedding configuration. To make it work even if the
+variable isn't set, you could provide a default value of `cpu` in the embedding
+configuration.
+
+Some embedding libraries even have a method to detect which devices are available,
+which could be used to dynamically set the device at runtime. For example, in Python
+you can check if a CUDA GPU is available using `torch.cuda.is_available()`.
+
+```python
+--8<-- "python/python/tests/docs/test_embeddings_optional.py:register_device"
+```
--- a/docs/src/extra_js/reo.js
+++ b/docs/src/extra_js/reo.js
@@ -0,0 +1 @@
+!function(){var e,t,n;e="9627b71b382d201",t=function(){Reo.init({clientID:"9627b71b382d201"})},(n=document.createElement("script")).src="https://static.reo.dev/"+e+"/reo.js",n.defer=!0,n.onload=t,document.head.appendChild(n)}();
--- a/docs/src/guides/tables.md
+++ b/docs/src/guides/tables.md
@@ -601,6 +601,38 @@ After a table has been created, you can always add more data to it using the `ad
    )
    ```

+## Upserting into a table
+
+Upserting lets you insert new rows or update existing rows in a table. To upsert
+in LanceDB, use the merge insert API.
+
+=== "Python"
+
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic"
+        ```
+        **API Reference**: [lancedb.table.Table.merge_insert][]
+
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic_async"
+        ```
+        **API Reference**: [lancedb.table.AsyncTable.merge_insert][]
+
+=== "Typescript[^1]"
+
+    === "@lancedb/lancedb"
+
+        ```typescript
+        --8<-- "nodejs/examples/merge_insert.test.ts:upsert_basic"
+        ```
+        **API Reference**: [lancedb.Table.mergeInsert](../js/classes/Table.md/#mergeInsert)
+
+Read more in the guide on [merge insert](tables/merge_insert.md).
+
 ## Deleting from a table

 Use the `delete()` method on tables to delete rows from a table. To choose which rows to delete, provide a filter that matches on the metadata columns. This can delete any number of rows that match the filter.
--- a/docs/src/guides/tables/merge_insert.md
+++ b/docs/src/guides/tables/merge_insert.md
@@ -0,0 +1,135 @@
+The merge insert command is a flexible API that can be used to perform:
+
+1. Upsert
+2. Insert-if-not-exists
+3. Replace range
+
+It works by joining the input data with the target table on a key you provide.
+Often this key is a unique row id key. You can then specify what to do when
+there is a match and when there is not a match. For example, for upsert you want
+to update if the row has a match and insert if the row doesn't have a match.
+Whereas for insert-if-not-exists you only want to insert if the row doesn't have
+a match.
+
+You can also read more in the API reference:
+
+* Python
+    * Sync: [lancedb.table.Table.merge_insert][]
+    * Async: [lancedb.table.AsyncTable.merge_insert][]
+* Typescript: [lancedb.Table.mergeInsert](../../js/classes/Table.md/#mergeinsert)
+
+!!! tip "Use scalar indices to speed up merge insert"
+
+    The merge insert command needs to perform a join between the input data and the
+    target table on the `on` key you provide. This requires scanning that entire
+    column, which can be expensive for large tables. To speed up this operation,
+    you can create a scalar index on the `on` column, which will allow LanceDB to
+    find matches without having to scan the whole tables.
+
+    Read more about scalar indices in [Building a Scalar Index](../scalar_index.md)
+    guide.
+
+!!! info "Embedding Functions"
+
+    Like the create table and add APIs, the merge insert API will automatically
+    compute embeddings if the table has a embedding definition in its schema.
+    If the input data doesn't contain the source column, or the vector column
+    is already filled, then the embeddings won't be computed. See the
+    [Embedding Functions](../../embeddings/embedding_functions.md) guide for more
+    information.
+
+## Upsert
+
+Upsert updates rows if they exist and inserts them if they don't. To do this
+with merge insert, enable both `when_matched_update_all()` and
+`when_not_matched_insert_all()`.
+
+=== "Python"
+
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic"
+        ```
+
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic_async"
+        ```
+
+=== "Typescript"
+
+    === "@lancedb/lancedb"
+
+        ```typescript
+        --8<-- "nodejs/examples/merge_insert.test.ts:upsert_basic"
+        ```
+
+!!! note "Providing subsets of columns"
+
+    If a column is nullable, it can be omitted from input data and it will be
+    considered `null`. Columns can also be provided in any order.
+
+## Insert-if-not-exists
+
+To avoid inserting duplicate rows, you can use the insert-if-not-exists command.
+This will only insert rows that do not have a match in the target table. To do
+this with merge insert, enable just `when_not_matched_insert_all()`.
+
+
+=== "Python"
+
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:insert_if_not_exists"
+        ```
+
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:insert_if_not_exists_async"
+        ```
+
+=== "Typescript"
+
+    === "@lancedb/lancedb"
+
+        ```typescript
+        --8<-- "nodejs/examples/merge_insert.test.ts:insert_if_not_exists"
+        ```
+
+
+## Replace range
+
+You can also replace a range of rows in the target table with the input data.
+For example, if you have a table of document chunks, where each chunk has
+both a `doc_id` and a `chunk_id`, you can replace all chunks for a given
+`doc_id` with updated chunks. This can be tricky otherwise because if you
+try to use upsert when the new data has fewer chunks you will end up with
+extra chunks. To avoid this, add another clause to delete any chunks for
+the document that are not in the new data, with
+`when_not_matched_by_source_delete`.
+
+=== "Python"
+
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:replace_range"
+        ```
+
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:replace_range_async"
+        ```
+
+=== "Typescript"
+
+    === "@lancedb/lancedb"
+
+        ```typescript
+        --8<-- "nodejs/examples/merge_insert.test.ts:replace_range"
+        ```
--- a/docs/src/js/README.md
+++ b/docs/src/js/README.md
@@ -36,7 +36,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
 console.log(results);
 ```

-The [quickstart](../basic.md) contains a more complete example.
+The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.

 ## Development

--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -23,18 +23,6 @@ be closed when they are garbage collected.
 Any created tables are independent and will continue to work even if
 the underlying connection has been closed.

-## Constructors
-
-### new Connection()
-
-```ts
-new Connection(): Connection
-```
-
-#### Returns
-
-[`Connection`](Connection.md)
-
 ## Methods

 ### close()
@@ -71,7 +59,7 @@ Creates a new empty Table
 * **name**: `string`
    The name of the table.

-* **schema**: `SchemaLike`
+* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
    The schema of the table

 * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
@@ -117,7 +105,7 @@ Creates a new Table and initialize it with new data.
 * **name**: `string`
    The name of the table.

-* **data**: `TableLike` \| `Record`&lt;`string`, `unknown`&gt;[]
+* **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`&lt;`string`, `unknown`&gt;[]
    Non-empty Array of Records
    to be inserted into the table

@@ -143,6 +131,20 @@ Return a brief description of the connection

 ***

+### dropAllTables()
+
+```ts
+abstract dropAllTables(): Promise<void>
+```
+
+Drop all tables in the database.
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
+
+***
+
 ### dropTable()

 ```ts
@@ -189,7 +191,7 @@ Open a table in the database.
 * **name**: `string`
    The name of the table

-* **options?**: `Partial`&lt;`OpenTableOptions`&gt;
+* **options?**: `Partial`&lt;[`OpenTableOptions`](../interfaces/OpenTableOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/Index.md
+++ b/docs/src/js/classes/Index.md
@@ -72,11 +72,9 @@ The results of a full text search are ordered by relevance measured by BM25.

 You can combine filters with full text search.

-For now, the full text search index only supports English, and doesn't support phrase search.
-
 #### Parameters

-* **options?**: `Partial`&lt;`FtsOptions`&gt;
+* **options?**: `Partial`&lt;[`FtsOptions`](../interfaces/FtsOptions.md)&gt;

 #### Returns

@@ -98,7 +96,7 @@ the vectors.

 #### Parameters

-* **options?**: `Partial`&lt;`HnswPqOptions`&gt;
+* **options?**: `Partial`&lt;[`HnswPqOptions`](../interfaces/HnswPqOptions.md)&gt;

 #### Returns

@@ -120,7 +118,7 @@ the vectors.

 #### Parameters

-* **options?**: `Partial`&lt;`HnswSqOptions`&gt;
+* **options?**: `Partial`&lt;[`HnswSqOptions`](../interfaces/HnswSqOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/MergeInsertBuilder.md
+++ b/docs/src/js/classes/MergeInsertBuilder.md
@@ -0,0 +1,126 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / MergeInsertBuilder
+
+# Class: MergeInsertBuilder
+
+A builder used to create and run a merge insert operation
+
+## Constructors
+
+### new MergeInsertBuilder()
+
+```ts
+new MergeInsertBuilder(native, schema): MergeInsertBuilder
+```
+
+Construct a MergeInsertBuilder. __Internal use only.__
+
+#### Parameters
+
+* **native**: `NativeMergeInsertBuilder`
+
+* **schema**: `Schema`&lt;`any`&gt; \| `Promise`&lt;`Schema`&lt;`any`&gt;&gt;
+
+#### Returns
+
+[`MergeInsertBuilder`](MergeInsertBuilder.md)
+
+## Methods
+
+### execute()
+
+```ts
+execute(data): Promise<void>
+```
+
+Executes the merge insert operation
+
+Nothing is returned but the `Table` is updated
+
+#### Parameters
+
+* **data**: [`Data`](../type-aliases/Data.md)
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
+
+***
+
+### whenMatchedUpdateAll()
+
+```ts
+whenMatchedUpdateAll(options?): MergeInsertBuilder
+```
+
+Rows that exist in both the source table (new data) and
+the target table (old data) will be updated, replacing
+the old row with the corresponding matching row.
+
+If there are multiple matches then the behavior is undefined.
+Currently this causes multiple copies of the row to be created
+but that behavior is subject to change.
+
+An optional condition may be specified.  If it is, then only
+matched rows that satisfy the condtion will be updated.  Any
+rows that do not satisfy the condition will be left as they
+are.  Failing to satisfy the condition does not cause a
+"matched row" to become a "not matched" row.
+
+The condition should be an SQL string.  Use the prefix
+target. to refer to rows in the target table (old data)
+and the prefix source. to refer to rows in the source
+table (new data).
+
+For example, "target.last_update < source.last_update"
+
+#### Parameters
+
+* **options?**
+
+* **options.where?**: `string`
+
+#### Returns
+
+[`MergeInsertBuilder`](MergeInsertBuilder.md)
+
+***
+
+### whenNotMatchedBySourceDelete()
+
+```ts
+whenNotMatchedBySourceDelete(options?): MergeInsertBuilder
+```
+
+Rows that exist only in the target table (old data) will be
+deleted.  An optional condition can be provided to limit what
+data is deleted.
+
+#### Parameters
+
+* **options?**
+
+* **options.where?**: `string`
+    An optional condition to limit what data is deleted
+
+#### Returns
+
+[`MergeInsertBuilder`](MergeInsertBuilder.md)
+
+***
+
+### whenNotMatchedInsertAll()
+
+```ts
+whenNotMatchedInsertAll(): MergeInsertBuilder
+```
+
+Rows that exist only in the source table (new data) should
+be inserted into the target table.
+
+#### Returns
+
+[`MergeInsertBuilder`](MergeInsertBuilder.md)
--- a/docs/src/js/classes/Query.md
+++ b/docs/src/js/classes/Query.md
@@ -8,30 +8,14 @@

 A builder for LanceDB queries.

+## See
+
+[Table#query](Table.md#query), [Table#search](Table.md#search)
+
 ## Extends

 - [`QueryBase`](QueryBase.md)&lt;`NativeQuery`&gt;

-## Constructors
-
-### new Query()
-
-```ts
-new Query(tbl): Query
-```
-
-#### Parameters
-
-* **tbl**: `Table`
-
-#### Returns
-
-[`Query`](Query.md)
-
-#### Overrides
-
-[`QueryBase`](QueryBase.md).[`constructor`](QueryBase.md#constructors)
-
 ## Properties

 ### inner
@@ -46,42 +30,6 @@ protected inner: Query | Promise<Query>;

 ## Methods

-### \[asyncIterator\]()
-
-```ts
-asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
-```
-
-#### Returns
-
-`AsyncIterator`&lt;`RecordBatch`&lt;`any`&gt;, `any`, `undefined`&gt;
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`[asyncIterator]`](QueryBase.md#%5Basynciterator%5D)
-
-***
-
-### doCall()
-
-```ts
-protected doCall(fn): void
-```
-
-#### Parameters
-
-* **fn**
-
-#### Returns
-
-`void`
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`doCall`](QueryBase.md#docall)
-
-***
-
 ### execute()

 ```ts
@@ -92,7 +40,7 @@ Execute the query and return the results as an

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -161,7 +109,7 @@ fastSearch(): this
 Skip searching un-indexed data. This can make search faster, but will miss
 any data that is not yet indexed.

-Use lancedb.Table#optimize to index all un-indexed data.
+Use [Table#optimize](Table.md#optimize) to index all un-indexed data.

 #### Returns

@@ -189,7 +137,7 @@ A filter statement to be applied to this query.

 `this`

-#### Alias
+#### See

 where

@@ -213,7 +161,7 @@ fullTextSearch(query, options?): this

 * **query**: `string`

-* **options?**: `Partial`&lt;`FullTextSearchOptions`&gt;
+* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;

 #### Returns

@@ -250,26 +198,6 @@ called then every valid row from the table will be returned.

 ***

-### nativeExecute()
-
-```ts
-protected nativeExecute(options?): Promise<RecordBatchIterator>
-```
-
-#### Parameters
-
-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
-
-#### Returns
-
-`Promise`&lt;`RecordBatchIterator`&gt;
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`nativeExecute`](QueryBase.md#nativeexecute)
-
-***
-
 ### nearestTo()

 ```ts
@@ -294,7 +222,7 @@ If there is more than one vector column you must use

 #### Parameters

-* **vector**: `IntoVector`
+* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)

 #### Returns

@@ -427,7 +355,7 @@ Collect the results as an array of objects.

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -449,7 +377,7 @@ Collect the results as an Arrow

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/QueryBase.md
+++ b/docs/src/js/classes/QueryBase.md
@@ -8,6 +8,11 @@

 Common methods supported by all query types

+## See
+
+ - [Query](Query.md)
+ - [VectorQuery](VectorQuery.md)
+
 ## Extended by

 - [`Query`](Query.md)
@@ -21,22 +26,6 @@ Common methods supported by all query types

 - `AsyncIterable`&lt;`RecordBatch`&gt;

-## Constructors
-
-### new QueryBase()
-
-```ts
-protected new QueryBase<NativeQueryType>(inner): QueryBase<NativeQueryType>
-```
-
-#### Parameters
-
-* **inner**: `NativeQueryType` \| `Promise`&lt;`NativeQueryType`&gt;
-
-#### Returns
-
-[`QueryBase`](QueryBase.md)&lt;`NativeQueryType`&gt;
-
 ## Properties

 ### inner
@@ -47,38 +36,6 @@ protected inner: NativeQueryType | Promise<NativeQueryType>;

 ## Methods

-### \[asyncIterator\]()
-
-```ts
-asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
-```
-
-#### Returns
-
-`AsyncIterator`&lt;`RecordBatch`&lt;`any`&gt;, `any`, `undefined`&gt;
-
-#### Implementation of
-
-`AsyncIterable.[asyncIterator]`
-
-***
-
-### doCall()
-
-```ts
-protected doCall(fn): void
-```
-
-#### Parameters
-
-* **fn**
-
-#### Returns
-
-`void`
-
-***
-
 ### execute()

 ```ts
@@ -89,7 +46,7 @@ Execute the query and return the results as an

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -150,7 +107,7 @@ fastSearch(): this
 Skip searching un-indexed data. This can make search faster, but will miss
 any data that is not yet indexed.

-Use lancedb.Table#optimize to index all un-indexed data.
+Use [Table#optimize](Table.md#optimize) to index all un-indexed data.

 #### Returns

@@ -174,7 +131,7 @@ A filter statement to be applied to this query.

 `this`

-#### Alias
+#### See

 where

@@ -194,7 +151,7 @@ fullTextSearch(query, options?): this

 * **query**: `string`

-* **options?**: `Partial`&lt;`FullTextSearchOptions`&gt;
+* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;

 #### Returns

@@ -223,22 +180,6 @@ called then every valid row from the table will be returned.

 ***

-### nativeExecute()
-
-```ts
-protected nativeExecute(options?): Promise<RecordBatchIterator>
-```
-
-#### Parameters
-
-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
-
-#### Returns
-
-`Promise`&lt;`RecordBatchIterator`&gt;
-
-***
-
 ### offset()

 ```ts
@@ -314,7 +255,7 @@ Collect the results as an array of objects.

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -332,7 +273,7 @@ Collect the results as an Arrow

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -14,21 +14,13 @@ will be freed when the Table is garbage collected.  To eagerly free the cache yo
 can call the `close` method.  Once the Table is closed, it cannot be used for any
 further operations.

+Tables are created using the methods [Connection#createTable](Connection.md#createtable)
+and [Connection#createEmptyTable](Connection.md#createemptytable). Existing tables are opened
+using [Connection#openTable](Connection.md#opentable).
+
 Closing a table is optional.  It not closed, it will be closed when it is garbage
 collected.

-## Constructors
-
-### new Table()
-
-```ts
-new Table(): Table
-```
-
-#### Returns
-
-[`Table`](Table.md)
-
 ## Accessors

 ### name
@@ -216,6 +208,9 @@ Indices on vector columns will speed up vector searches.
 Indices on scalar columns will speed up filtering (in both
 vector and non-vector searches)

+We currently don't support custom named indexes.
+The index name will always be `${column}_idx`.
+
 #### Parameters

 * **column**: `string`
@@ -226,11 +221,6 @@ vector and non-vector searches)

 `Promise`&lt;`void`&gt;

-#### Note
-
-We currently don't support custom named indexes,
-The index name will always be `${column}_idx`
-
 #### Examples

 ```ts
@@ -329,18 +319,14 @@ Drop an index from the table.

 * **name**: `string`
    The name of the index.
+    This does not delete the index from disk, it just removes it from the table.
+    To delete the index, run [Table#optimize](Table.md#optimize) after dropping the index.
+    Use [Table.listIndices](Table.md#listindices) to find the names of the indices.

 #### Returns

 `Promise`&lt;`void`&gt;

-#### Note
-
-This does not delete the index from disk, it just removes it from the table.
-To delete the index, run [Table#optimize](Table.md#optimize) after dropping the index.
-
-Use [Table.listIndices](Table.md#listindices) to find the names of the indices.
-
 ***

 ### indexStats()
@@ -404,7 +390,7 @@ List all the versions of the table

 #### Returns

-`Promise`&lt;`Version`[]&gt;
+`Promise`&lt;[`Version`](../interfaces/Version.md)[]&gt;

 ***

@@ -420,7 +406,7 @@ abstract mergeInsert(on): MergeInsertBuilder

 #### Returns

-`MergeInsertBuilder`
+[`MergeInsertBuilder`](MergeInsertBuilder.md)

 ***

@@ -464,7 +450,7 @@ Modeled after ``VACUUM`` in PostgreSQL.

 #### Returns

-`Promise`&lt;`OptimizeStats`&gt;
+`Promise`&lt;[`OptimizeStats`](../interfaces/OptimizeStats.md)&gt;

 ***

@@ -581,7 +567,7 @@ Get the schema of the table.
 abstract search(
   query,
   queryType?,
-   ftsColumns?): VectorQuery | Query
+   ftsColumns?): Query | VectorQuery
 ```

 Create a search query to find the nearest neighbors
@@ -589,7 +575,7 @@ of the given query

 #### Parameters

-* **query**: `string` \| `IntoVector`
+* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md)
    the query, a vector or string

 * **queryType?**: `string`
@@ -603,7 +589,7 @@ of the given query

 #### Returns

-[`VectorQuery`](VectorQuery.md) \| [`Query`](Query.md)
+[`Query`](Query.md) \| [`VectorQuery`](VectorQuery.md)

 ***

@@ -722,7 +708,7 @@ by `query`.

 #### Parameters

-* **vector**: `IntoVector`
+* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)

 #### Returns

@@ -745,38 +731,3 @@ Retrieve the version of the table
 #### Returns

 `Promise`&lt;`number`&gt;
-
-***
-
-### parseTableData()
-
-```ts
-static parseTableData(
-   data,
-   options?,
-   streaming?): Promise<object>
-```
-
-#### Parameters
-
-* **data**: `TableLike` \| `Record`&lt;`string`, `unknown`&gt;[]
-
-* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
-
-* **streaming?**: `boolean` = `false`
-
-#### Returns
-
-`Promise`&lt;`object`&gt;
-
-##### buf
-
-```ts
-buf: Buffer;
-```
-
-##### mode
-
-```ts
-mode: string;
-```
--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -10,30 +10,14 @@ A builder used to construct a vector search

 This builder can be reused to execute the query many times.

+## See
+
+[Query#nearestTo](Query.md#nearestto)
+
 ## Extends

 - [`QueryBase`](QueryBase.md)&lt;`NativeVectorQuery`&gt;

-## Constructors
-
-### new VectorQuery()
-
-```ts
-new VectorQuery(inner): VectorQuery
-```
-
-#### Parameters
-
-* **inner**: `VectorQuery` \| `Promise`&lt;`VectorQuery`&gt;
-
-#### Returns
-
-[`VectorQuery`](VectorQuery.md)
-
-#### Overrides
-
-[`QueryBase`](QueryBase.md).[`constructor`](QueryBase.md#constructors)
-
 ## Properties

 ### inner
@@ -48,22 +32,6 @@ protected inner: VectorQuery | Promise<VectorQuery>;

 ## Methods

-### \[asyncIterator\]()
-
-```ts
-asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
-```
-
-#### Returns
-
-`AsyncIterator`&lt;`RecordBatch`&lt;`any`&gt;, `any`, `undefined`&gt;
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`[asyncIterator]`](QueryBase.md#%5Basynciterator%5D)
-
-***
-
 ### addQueryVector()

 ```ts
@@ -72,7 +40,7 @@ addQueryVector(vector): VectorQuery

 #### Parameters

-* **vector**: `IntoVector`
+* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)

 #### Returns

@@ -179,26 +147,6 @@ By default "l2" is used.

 ***

-### doCall()
-
-```ts
-protected doCall(fn): void
-```
-
-#### Parameters
-
-* **fn**
-
-#### Returns
-
-`void`
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`doCall`](QueryBase.md#docall)
-
-***
-
 ### ef()

 ```ts
@@ -233,7 +181,7 @@ Execute the query and return the results as an

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -302,7 +250,7 @@ fastSearch(): this
 Skip searching un-indexed data. This can make search faster, but will miss
 any data that is not yet indexed.

-Use lancedb.Table#optimize to index all un-indexed data.
+Use [Table#optimize](Table.md#optimize) to index all un-indexed data.

 #### Returns

@@ -330,7 +278,7 @@ A filter statement to be applied to this query.

 `this`

-#### Alias
+#### See

 where

@@ -354,7 +302,7 @@ fullTextSearch(query, options?): this

 * **query**: `string`

-* **options?**: `Partial`&lt;`FullTextSearchOptions`&gt;
+* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;

 #### Returns

@@ -391,26 +339,6 @@ called then every valid row from the table will be returned.

 ***

-### nativeExecute()
-
-```ts
-protected nativeExecute(options?): Promise<RecordBatchIterator>
-```
-
-#### Parameters
-
-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
-
-#### Returns
-
-`Promise`&lt;`RecordBatchIterator`&gt;
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`nativeExecute`](QueryBase.md#nativeexecute)
-
-***
-
 ### nprobes()

 ```ts
@@ -625,7 +553,7 @@ Collect the results as an array of objects.

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -647,7 +575,7 @@ Collect the results as an Arrow

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

--- a/docs/src/js/enumerations/WriteMode.md
+++ b/docs/src/js/enumerations/WriteMode.md
@@ -1,33 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / WriteMode
-
-# Enumeration: WriteMode
-
-Write mode for writing a table.
-
-## Enumeration Members
-
-### Append
-
-```ts
-Append: "Append";
-```
-
-***
-
-### Create
-
-```ts
-Create: "Create";
-```
-
-***
-
-### Overwrite
-
-```ts
-Overwrite: "Overwrite";
-```
--- a/docs/src/js/functions/connect.md
+++ b/docs/src/js/functions/connect.md
@@ -6,10 +6,10 @@

 # Function: connect()

-## connect(uri, opts)
+## connect(uri, options)

 ```ts
-function connect(uri, opts?): Promise<Connection>
+function connect(uri, options?): Promise<Connection>
 ```

 Connect to a LanceDB instance at the given URI.
@@ -26,7 +26,8 @@ Accepted formats:
    The uri of the database. If the database uri starts
    with `db://` then it connects to a remote database.

-* **opts?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
+* **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
+    The options to use when connecting to the database

 ### Returns

@@ -49,10 +50,10 @@ const conn = await connect(
 });
 ```

-## connect(opts)
+## connect(options)

 ```ts
-function connect(opts): Promise<Connection>
+function connect(options): Promise<Connection>
 ```

 Connect to a LanceDB instance at the given URI.
@@ -65,7 +66,8 @@ Accepted formats:

 ### Parameters

-* **opts**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt; & `object`
+* **options**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt; & `object`
+    The options to use when connecting to the database

 ### Returns

--- a/docs/src/js/functions/makeArrowTable.md
+++ b/docs/src/js/functions/makeArrowTable.md
@@ -22,8 +22,6 @@ when creating a table or adding data to it)
 This function converts an array of Record<String, any> (row-major JS objects)
 to an Arrow Table (a columnar structure)

-Note that it currently does not support nulls.
-
 If a schema is provided then it will be used to determine the resulting array
 types.  Fields will also be reordered to fit the order defined by the schema.

@@ -31,6 +29,9 @@ If a schema is not provided then the types will be inferred and the field order
 will be controlled by the order of properties in the first record.  If a type
 is inferred it will always be nullable.

+If not all fields are found in the data, then a subset of the schema will be
+returned.
+
 If the input is empty then a schema must be provided to create an empty table.

 When a schema is not specified then data types will be inferred.  The inference
@@ -38,6 +39,7 @@ rules are as follows:

 - boolean => Bool
 - number => Float64
+ - bigint => Int64
 - String => Utf8
 - Buffer => Binary
 - Record<String, any> => Struct
@@ -57,6 +59,7 @@ rules are as follows:

 ## Example

+```ts
 import { fromTableToBuffer, makeArrowTable } from "../arrow";
 import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";

@@ -78,7 +81,6 @@ The `vectorColumns` option can be used to support other vector column
 names and data types.

 ```ts
-
 const schema = new Schema([
  new Field("a", new Float64()),
  new Field("b", new Float64()),
@@ -97,8 +99,7 @@ const schema = new Schema([

 You can specify the vector column types and names using the options as well

-```typescript
-
+```ts
 const schema = new Schema([
  new Field('a', new Float64()),
  new Field('b', new Float64()),
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -9,15 +9,12 @@
 - [embedding](namespaces/embedding/README.md)
 - [rerankers](namespaces/rerankers/README.md)

-## Enumerations
-
- [WriteMode](enumerations/WriteMode.md)
-
 ## Classes

 - [Connection](classes/Connection.md)
 - [Index](classes/Index.md)
 - [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
+- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
 - [Query](classes/Query.md)
 - [QueryBase](classes/QueryBase.md)
 - [RecordBatchIterator](classes/RecordBatchIterator.md)
@@ -31,23 +28,39 @@
 - [AddDataOptions](interfaces/AddDataOptions.md)
 - [ClientConfig](interfaces/ClientConfig.md)
 - [ColumnAlteration](interfaces/ColumnAlteration.md)
+- [CompactionStats](interfaces/CompactionStats.md)
 - [ConnectionOptions](interfaces/ConnectionOptions.md)
 - [CreateTableOptions](interfaces/CreateTableOptions.md)
 - [ExecutableQuery](interfaces/ExecutableQuery.md)
+- [FtsOptions](interfaces/FtsOptions.md)
+- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
+- [HnswPqOptions](interfaces/HnswPqOptions.md)
+- [HnswSqOptions](interfaces/HnswSqOptions.md)
 - [IndexConfig](interfaces/IndexConfig.md)
 - [IndexOptions](interfaces/IndexOptions.md)
 - [IndexStatistics](interfaces/IndexStatistics.md)
 - [IvfPqOptions](interfaces/IvfPqOptions.md)
+- [OpenTableOptions](interfaces/OpenTableOptions.md)
 - [OptimizeOptions](interfaces/OptimizeOptions.md)
+- [OptimizeStats](interfaces/OptimizeStats.md)
+- [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
+- [RemovalStats](interfaces/RemovalStats.md)
 - [RetryConfig](interfaces/RetryConfig.md)
 - [TableNamesOptions](interfaces/TableNamesOptions.md)
 - [TimeoutConfig](interfaces/TimeoutConfig.md)
 - [UpdateOptions](interfaces/UpdateOptions.md)
- [WriteOptions](interfaces/WriteOptions.md)
+- [Version](interfaces/Version.md)

 ## Type Aliases

 - [Data](type-aliases/Data.md)
+- [DataLike](type-aliases/DataLike.md)
+- [FieldLike](type-aliases/FieldLike.md)
+- [IntoSql](type-aliases/IntoSql.md)
+- [IntoVector](type-aliases/IntoVector.md)
+- [RecordBatchLike](type-aliases/RecordBatchLike.md)
+- [SchemaLike](type-aliases/SchemaLike.md)
+- [TableLike](type-aliases/TableLike.md)

 ## Functions

--- a/docs/src/js/interfaces/ClientConfig.md
+++ b/docs/src/js/interfaces/ClientConfig.md
@@ -8,6 +8,14 @@

 ## Properties

+### extraHeaders?
+
+```ts
+optional extraHeaders: Record<string, string>;
+```
+
+***
+
 ### retryConfig?

 ```ts
--- a/docs/src/js/interfaces/CompactionStats.md
+++ b/docs/src/js/interfaces/CompactionStats.md
@@ -0,0 +1,49 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / CompactionStats
+
+# Interface: CompactionStats
+
+Statistics about a compaction operation.
+
+## Properties
+
+### filesAdded
+
+```ts
+filesAdded: number;
+```
+
+The number of new, compacted data files added
+
+***
+
+### filesRemoved
+
+```ts
+filesRemoved: number;
+```
+
+The number of data files removed
+
+***
+
+### fragmentsAdded
+
+```ts
+fragmentsAdded: number;
+```
+
+The number of new, compacted fragments added
+
+***
+
+### fragmentsRemoved
+
+```ts
+fragmentsRemoved: number;
+```
+
+The number of fragments removed
--- a/docs/src/js/interfaces/CreateTableOptions.md
+++ b/docs/src/js/interfaces/CreateTableOptions.md
@@ -8,7 +8,7 @@

 ## Properties

-### dataStorageVersion?
+### ~~dataStorageVersion?~~

 ```ts
 optional dataStorageVersion: string;
@@ -19,6 +19,10 @@ The version of the data storage format to use.
 The default is `stable`.
 Set to "legacy" to use the old format.

+#### Deprecated
+
+Pass `new_table_data_storage_version` to storageOptions instead.
+
 ***

 ### embeddingFunction?
@@ -29,7 +33,7 @@ optional embeddingFunction: EmbeddingFunctionConfig;

 ***

-### enableV2ManifestPaths?
+### ~~enableV2ManifestPaths?~~

 ```ts
 optional enableV2ManifestPaths: boolean;
@@ -41,6 +45,10 @@ turning this on will make the dataset unreadable for older versions
 of LanceDB (prior to 0.10.0). To migrate an existing dataset, instead
 use the LocalTable#migrateManifestPathsV2 method.

+#### Deprecated
+
+Pass `new_table_enable_v2_manifest_paths` to storageOptions instead.
+
 ***

 ### existOk
@@ -90,17 +98,3 @@ Options already set on the connection will be inherited by the table,
 but can be overridden here.

 The available options are described at https://lancedb.github.io/lancedb/guides/storage/
-
-***
-
-### useLegacyFormat?
-
-```ts
-optional useLegacyFormat: boolean;
-```
-
-If true then data files will be written with the legacy format
-
-The default is false.
-
-Deprecated. Use data storage version instead.
--- a/docs/src/js/interfaces/FtsOptions.md
+++ b/docs/src/js/interfaces/FtsOptions.md
@@ -0,0 +1,103 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / FtsOptions
+
+# Interface: FtsOptions
+
+Options to create a full text search index
+
+## Properties
+
+### asciiFolding?
+
+```ts
+optional asciiFolding: boolean;
+```
+
+whether to remove punctuation
+
+***
+
+### baseTokenizer?
+
+```ts
+optional baseTokenizer: "raw" | "simple" | "whitespace";
+```
+
+The tokenizer to use when building the index.
+The default is "simple".
+
+The following tokenizers are available:
+
+"simple" - Simple tokenizer. This tokenizer splits the text into tokens using whitespace and punctuation as a delimiter.
+
+"whitespace" - Whitespace tokenizer. This tokenizer splits the text into tokens using whitespace as a delimiter.
+
+"raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token.
+
+***
+
+### language?
+
+```ts
+optional language: string;
+```
+
+language for stemming and stop words
+this is only used when `stem` or `remove_stop_words` is true
+
+***
+
+### lowercase?
+
+```ts
+optional lowercase: boolean;
+```
+
+whether to lowercase tokens
+
+***
+
+### maxTokenLength?
+
+```ts
+optional maxTokenLength: number;
+```
+
+maximum token length
+tokens longer than this length will be ignored
+
+***
+
+### removeStopWords?
+
+```ts
+optional removeStopWords: boolean;
+```
+
+whether to remove stop words
+
+***
+
+### stem?
+
+```ts
+optional stem: boolean;
+```
+
+whether to stem tokens
+
+***
+
+### withPosition?
+
+```ts
+optional withPosition: boolean;
+```
+
+Whether to build the index with positions.
+True by default.
+If set to false, the index will not store the positions of the tokens in the text,
+which will make the index smaller and faster to build, but will not support phrase queries.
--- a/docs/src/js/interfaces/FullTextSearchOptions.md
+++ b/docs/src/js/interfaces/FullTextSearchOptions.md
@@ -0,0 +1,22 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / FullTextSearchOptions
+
+# Interface: FullTextSearchOptions
+
+Options that control the behavior of a full text search
+
+## Properties
+
+### columns?
+
+```ts
+optional columns: string | string[];
+```
+
+The columns to search
+
+If not specified, all indexed columns will be searched.
+For now, only one column can be searched.
--- a/docs/src/js/interfaces/HnswPqOptions.md
+++ b/docs/src/js/interfaces/HnswPqOptions.md
@@ -0,0 +1,149 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / HnswPqOptions
+
+# Interface: HnswPqOptions
+
+Options to create an `HNSW_PQ` index
+
+## Properties
+
+### distanceType?
+
+```ts
+optional distanceType: "l2" | "cosine" | "dot";
+```
+
+The distance metric used to train the index.
+
+Default value is "l2".
+
+The following distance types are available:
+
+"l2" - Euclidean distance. This is a very common distance metric that
+accounts for both magnitude and direction when determining the distance
+between vectors. L2 distance has a range of [0, ∞).
+
+"cosine" - Cosine distance.  Cosine distance is a distance metric
+calculated from the cosine similarity between two vectors. Cosine
+similarity is a measure of similarity between two non-zero vectors of an
+inner product space. It is defined to equal the cosine of the angle
+between them.  Unlike L2, the cosine distance is not affected by the
+magnitude of the vectors.  Cosine distance has a range of [0, 2].
+
+"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
+distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
+L2 norm is 1), then dot distance is equivalent to the cosine distance.
+
+***
+
+### efConstruction?
+
+```ts
+optional efConstruction: number;
+```
+
+The number of candidates to evaluate during the construction of the HNSW graph.
+
+The default value is 300.
+
+This value controls the tradeoff between build speed and accuracy.
+The higher the value the more accurate the build but the slower it will be.
+150 to 300 is the typical range. 100 is a minimum for good quality search
+results. In most cases, there is no benefit to setting this higher than 500.
+This value should be set to a value that is not less than `ef` in the search phase.
+
+***
+
+### m?
+
+```ts
+optional m: number;
+```
+
+The number of neighbors to select for each vector in the HNSW graph.
+
+The default value is 20.
+
+This value controls the tradeoff between search speed and accuracy.
+The higher the value the more accurate the search but the slower it will be.
+
+***
+
+### maxIterations?
+
+```ts
+optional maxIterations: number;
+```
+
+Max iterations to train kmeans.
+
+The default value is 50.
+
+When training an IVF index we use kmeans to calculate the partitions.  This parameter
+controls how many iterations of kmeans to run.
+
+Increasing this might improve the quality of the index but in most cases the parameter
+is unused because kmeans will converge with fewer iterations.  The parameter is only
+used in cases where kmeans does not appear to converge.  In those cases it is unlikely
+that setting this larger will lead to the index converging anyways.
+
+***
+
+### numPartitions?
+
+```ts
+optional numPartitions: number;
+```
+
+The number of IVF partitions to create.
+
+For HNSW, we recommend a small number of partitions. Setting this to 1 works
+well for most tables. For very large tables, training just one HNSW graph
+will require too much memory. Each partition becomes its own HNSW graph, so
+setting this value higher reduces the peak memory use of training.
+
+***
+
+### numSubVectors?
+
+```ts
+optional numSubVectors: number;
+```
+
+Number of sub-vectors of PQ.
+
+This value controls how much the vector is compressed during the quantization step.
+The more sub vectors there are the less the vector is compressed.  The default is
+the dimension of the vector divided by 16.  If the dimension is not evenly divisible
+by 16 we use the dimension divded by 8.
+
+The above two cases are highly preferred.  Having 8 or 16 values per subvector allows
+us to use efficient SIMD instructions.
+
+If the dimension is not visible by 8 then we use 1 subvector.  This is not ideal and
+will likely result in poor performance.
+
+***
+
+### sampleRate?
+
+```ts
+optional sampleRate: number;
+```
+
+The rate used to calculate the number of training vectors for kmeans.
+
+Default value is 256.
+
+When an IVF index is trained, we need to calculate partitions.  These are groups
+of vectors that are similar to each other.  To do this we use an algorithm called kmeans.
+
+Running kmeans on a large dataset can be slow.  To speed this up we run kmeans on a
+random sample of the data.  This parameter controls the size of the sample.  The total
+number of vectors used to train the index is `sample_rate * num_partitions`.
+
+Increasing this value might improve the quality of the index but in most cases the
+default should be sufficient.
--- a/docs/src/js/interfaces/HnswSqOptions.md
+++ b/docs/src/js/interfaces/HnswSqOptions.md
@@ -0,0 +1,128 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / HnswSqOptions
+
+# Interface: HnswSqOptions
+
+Options to create an `HNSW_SQ` index
+
+## Properties
+
+### distanceType?
+
+```ts
+optional distanceType: "l2" | "cosine" | "dot";
+```
+
+The distance metric used to train the index.
+
+Default value is "l2".
+
+The following distance types are available:
+
+"l2" - Euclidean distance. This is a very common distance metric that
+accounts for both magnitude and direction when determining the distance
+between vectors. L2 distance has a range of [0, ∞).
+
+"cosine" - Cosine distance.  Cosine distance is a distance metric
+calculated from the cosine similarity between two vectors. Cosine
+similarity is a measure of similarity between two non-zero vectors of an
+inner product space. It is defined to equal the cosine of the angle
+between them.  Unlike L2, the cosine distance is not affected by the
+magnitude of the vectors.  Cosine distance has a range of [0, 2].
+
+"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
+distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
+L2 norm is 1), then dot distance is equivalent to the cosine distance.
+
+***
+
+### efConstruction?
+
+```ts
+optional efConstruction: number;
+```
+
+The number of candidates to evaluate during the construction of the HNSW graph.
+
+The default value is 300.
+
+This value controls the tradeoff between build speed and accuracy.
+The higher the value the more accurate the build but the slower it will be.
+150 to 300 is the typical range. 100 is a minimum for good quality search
+results. In most cases, there is no benefit to setting this higher than 500.
+This value should be set to a value that is not less than `ef` in the search phase.
+
+***
+
+### m?
+
+```ts
+optional m: number;
+```
+
+The number of neighbors to select for each vector in the HNSW graph.
+
+The default value is 20.
+
+This value controls the tradeoff between search speed and accuracy.
+The higher the value the more accurate the search but the slower it will be.
+
+***
+
+### maxIterations?
+
+```ts
+optional maxIterations: number;
+```
+
+Max iterations to train kmeans.
+
+The default value is 50.
+
+When training an IVF index we use kmeans to calculate the partitions.  This parameter
+controls how many iterations of kmeans to run.
+
+Increasing this might improve the quality of the index but in most cases the parameter
+is unused because kmeans will converge with fewer iterations.  The parameter is only
+used in cases where kmeans does not appear to converge.  In those cases it is unlikely
+that setting this larger will lead to the index converging anyways.
+
+***
+
+### numPartitions?
+
+```ts
+optional numPartitions: number;
+```
+
+The number of IVF partitions to create.
+
+For HNSW, we recommend a small number of partitions. Setting this to 1 works
+well for most tables. For very large tables, training just one HNSW graph
+will require too much memory. Each partition becomes its own HNSW graph, so
+setting this value higher reduces the peak memory use of training.
+
+***
+
+### sampleRate?
+
+```ts
+optional sampleRate: number;
+```
+
+The rate used to calculate the number of training vectors for kmeans.
+
+Default value is 256.
+
+When an IVF index is trained, we need to calculate partitions.  These are groups
+of vectors that are similar to each other.  To do this we use an algorithm called kmeans.
+
+Running kmeans on a large dataset can be slow.  To speed this up we run kmeans on a
+random sample of the data.  This parameter controls the size of the sample.  The total
+number of vectors used to train the index is `sample_rate * num_partitions`.
+
+Increasing this value might improve the quality of the index but in most cases the
+default should be sufficient.
--- a/docs/src/js/interfaces/OpenTableOptions.md
+++ b/docs/src/js/interfaces/OpenTableOptions.md
@@ -0,0 +1,40 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / OpenTableOptions
+
+# Interface: OpenTableOptions
+
+## Properties
+
+### indexCacheSize?
+
+```ts
+optional indexCacheSize: number;
+```
+
+Set the size of the index cache, specified as a number of entries
+
+The exact meaning of an "entry" will depend on the type of index:
+- IVF: there is one entry for each IVF partition
+- BTREE: there is one entry for the entire index
+
+This cache applies to the entire opened table, across all indices.
+Setting this value higher will increase performance on larger datasets
+at the expense of more RAM
+
+***
+
+### storageOptions?
+
+```ts
+optional storageOptions: Record<string, string>;
+```
+
+Configuration for object storage.
+
+Options already set on the connection will be inherited by the table,
+but can be overridden here.
+
+The available options are described at https://lancedb.github.io/lancedb/guides/storage/
--- a/docs/src/js/interfaces/OptimizeStats.md
+++ b/docs/src/js/interfaces/OptimizeStats.md
@@ -0,0 +1,29 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / OptimizeStats
+
+# Interface: OptimizeStats
+
+Statistics about an optimize operation
+
+## Properties
+
+### compaction
+
+```ts
+compaction: CompactionStats;
+```
+
+Statistics about the compaction operation
+
+***
+
+### prune
+
+```ts
+prune: RemovalStats;
+```
+
+Statistics about the removal operation
--- a/docs/src/js/interfaces/QueryExecutionOptions.md
+++ b/docs/src/js/interfaces/QueryExecutionOptions.md
@@ -0,0 +1,22 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / QueryExecutionOptions
+
+# Interface: QueryExecutionOptions
+
+Options that control the behavior of a particular query execution
+
+## Properties
+
+### maxBatchLength?
+
+```ts
+optional maxBatchLength: number;
+```
+
+The maximum number of rows to return in a single batch
+
+Batches may have fewer rows if the underlying data is stored
+in smaller chunks.
--- a/docs/src/js/interfaces/RemovalStats.md
+++ b/docs/src/js/interfaces/RemovalStats.md
@@ -0,0 +1,29 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / RemovalStats
+
+# Interface: RemovalStats
+
+Statistics about a cleanup operation
+
+## Properties
+
+### bytesRemoved
+
+```ts
+bytesRemoved: number;
+```
+
+The number of bytes removed
+
+***
+
+### oldVersionsRemoved
+
+```ts
+oldVersionsRemoved: number;
+```
+
+The number of old versions removed
--- a/docs/src/js/interfaces/Version.md
+++ b/docs/src/js/interfaces/Version.md
@@ -0,0 +1,31 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / Version
+
+# Interface: Version
+
+## Properties
+
+### metadata
+
+```ts
+metadata: Record<string, string>;
+```
+
+***
+
+### timestamp
+
+```ts
+timestamp: Date;
+```
+
+***
+
+### version
+
+```ts
+version: number;
+```
--- a/docs/src/js/interfaces/WriteOptions.md
+++ b/docs/src/js/interfaces/WriteOptions.md
@@ -1,19 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / WriteOptions
-
-# Interface: WriteOptions
-
-Write options when creating a Table.
-
-## Properties
-
-### mode?
-
-```ts
-optional mode: WriteMode;
-```
-
-Write mode for writing to a table.
--- a/docs/src/js/namespaces/embedding/README.md
+++ b/docs/src/js/namespaces/embedding/README.md
@@ -17,6 +17,14 @@
 ### Interfaces

 - [EmbeddingFunctionConfig](interfaces/EmbeddingFunctionConfig.md)
+- [EmbeddingFunctionConstructor](interfaces/EmbeddingFunctionConstructor.md)
+- [EmbeddingFunctionCreate](interfaces/EmbeddingFunctionCreate.md)
+- [FieldOptions](interfaces/FieldOptions.md)
+- [FunctionOptions](interfaces/FunctionOptions.md)
+
+### Type Aliases
+
+- [CreateReturnType](type-aliases/CreateReturnType.md)

 ### Functions

--- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md
+++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md
@@ -8,6 +8,23 @@

 An embedding function that automatically creates vector representation for a given column.

+It's important subclasses pass the **original** options to the super constructor
+and then pass those options to `resolveVariables` to resolve any variables before
+using them.
+
+## Example
+
+```ts
+class MyEmbeddingFunction extends EmbeddingFunction {
+  constructor(options: {model: string, timeout: number}) {
+    super(optionsRaw);
+    const options = this.resolveVariables(optionsRaw);
+    this.model = options.model;
+    this.timeout = options.timeout;
+  }
+}
+```
+
 ## Extended by

 - [`TextEmbeddingFunction`](TextEmbeddingFunction.md)
@@ -16,7 +33,7 @@ An embedding function that automatically creates vector representation for a giv

 • **T** = `any`

-• **M** *extends* `FunctionOptions` = `FunctionOptions`
+• **M** *extends* [`FunctionOptions`](../interfaces/FunctionOptions.md) = [`FunctionOptions`](../interfaces/FunctionOptions.md)

 ## Constructors

@@ -82,12 +99,33 @@ The datatype of the embeddings

 ***

+### getSensitiveKeys()
+
+```ts
+protected getSensitiveKeys(): string[]
+```
+
+Provide a list of keys in the function options that should be treated as
+sensitive. If users pass raw values for these keys, they will be rejected.
+
+#### Returns
+
+`string`[]
+
+***
+
 ### init()?

 ```ts
 optional init(): Promise<void>
 ```

+Optionally load any resources needed for the embedding function.
+
+This method is called after the embedding function has been initialized
+but before any embeddings are computed. It is useful for loading local models
+or other resources that are needed for the embedding function to work.
+
 #### Returns

 `Promise`&lt;`void`&gt;
@@ -108,6 +146,24 @@ The number of dimensions of the embeddings

 ***

+### resolveVariables()
+
+```ts
+protected resolveVariables(config): Partial<M>
+```
+
+Apply variables to the config.
+
+#### Parameters
+
+* **config**: `Partial`&lt;`M`&gt;
+
+#### Returns
+
+`Partial`&lt;`M`&gt;
+
+***
+
 ### sourceField()

 ```ts
@@ -118,53 +174,31 @@ sourceField is used in combination with `LanceSchema` to provide a declarative d

 #### Parameters

-* **optionsOrDatatype**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;`FieldOptions`&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+* **optionsOrDatatype**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
    The options for the field or the datatype

 #### Returns

-[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;]
+[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]

 #### See

-lancedb.LanceSchema
+[LanceSchema](../functions/LanceSchema.md)

 ***

 ### toJSON()

 ```ts
-abstract toJSON(): Partial<M>
+toJSON(): Record<string, any>
 ```

-Convert the embedding function to a JSON object
-It is used to serialize the embedding function to the schema
-It's important that any object returned by this method contains all the necessary
-information to recreate the embedding function
-
-It should return the same object that was passed to the constructor
-If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
+Get the original arguments to the constructor, to serialize them so they
+can be used to recreate the embedding function later.

 #### Returns

-`Partial`&lt;`M`&gt;
-
-#### Example
-
-```ts
-class MyEmbeddingFunction extends EmbeddingFunction {
-  constructor(options: {model: string, timeout: number}) {
-    super();
-    this.model = options.model;
-    this.timeout = options.timeout;
-  }
-  toJSON() {
-    return {
-      model: this.model,
-      timeout: this.timeout,
-    };
-}
-```
+`Record`&lt;`string`, `any`&gt;

 ***

@@ -178,12 +212,13 @@ vectorField is used in combination with `LanceSchema` to provide a declarative d

 #### Parameters

-* **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;`FieldOptions`&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+* **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+    The options for the field

 #### Returns

-[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;]
+[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]

 #### See

-lancedb.LanceSchema
+[LanceSchema](../functions/LanceSchema.md)
--- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md
+++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md
@@ -51,7 +51,7 @@ Fetch an embedding function by name

 #### Type Parameters

-• **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`unknown`, `FunctionOptions`&gt;
+• **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`unknown`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;

 #### Parameters

@@ -60,7 +60,7 @@ Fetch an embedding function by name

 #### Returns

-`undefined` \| `EmbeddingFunctionCreate`&lt;`T`&gt;
+`undefined` \| [`EmbeddingFunctionCreate`](../interfaces/EmbeddingFunctionCreate.md)&lt;`T`&gt;

 ***

@@ -80,6 +80,28 @@ getTableMetadata(functions): Map<string, string>

 ***

+### getVar()
+
+```ts
+getVar(name): undefined | string
+```
+
+Get a variable.
+
+#### Parameters
+
+* **name**: `string`
+
+#### Returns
+
+`undefined` \| `string`
+
+#### See
+
+[setVar](EmbeddingFunctionRegistry.md#setvar)
+
+***
+
 ### length()

 ```ts
@@ -104,7 +126,7 @@ Register an embedding function

 #### Type Parameters

-• **T** *extends* `EmbeddingFunctionConstructor`&lt;[`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt; = `EmbeddingFunctionConstructor`&lt;[`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;
+• **T** *extends* [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)&lt;[`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt; = [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)&lt;[`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;

 #### Parameters

@@ -145,3 +167,31 @@ reset the registry to the initial state
 #### Returns

 `void`
+
+***
+
+### setVar()
+
+```ts
+setVar(name, value): void
+```
+
+Set a variable. These can be accessed in the embedding function
+configuration using the syntax `$var:variable_name`. If they are not
+set, an error will be thrown letting you know which key is unset. If you
+want to supply a default value, you can add an additional part in the
+configuration like so: `$var:variable_name:default_value`. Default values
+can be used for runtime configurations that are not sensitive, such as
+whether to use a GPU for inference.
+
+The name must not contain colons. The default value can contain colons.
+
+#### Parameters
+
+* **name**: `string`
+
+* **value**: `string`
+
+#### Returns
+
+`void`
--- a/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md
+++ b/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md
@@ -14,7 +14,7 @@ an abstract class for implementing embedding functions that take text as input

 ## Type Parameters

-• **M** *extends* `FunctionOptions` = `FunctionOptions`
+• **M** *extends* [`FunctionOptions`](../interfaces/FunctionOptions.md) = [`FunctionOptions`](../interfaces/FunctionOptions.md)

 ## Constructors

@@ -114,12 +114,37 @@ abstract generateEmbeddings(texts, ...args): Promise<number[][] | Float32Array[]

 ***

+### getSensitiveKeys()
+
+```ts
+protected getSensitiveKeys(): string[]
+```
+
+Provide a list of keys in the function options that should be treated as
+sensitive. If users pass raw values for these keys, they will be rejected.
+
+#### Returns
+
+`string`[]
+
+#### Inherited from
+
+[`EmbeddingFunction`](EmbeddingFunction.md).[`getSensitiveKeys`](EmbeddingFunction.md#getsensitivekeys)
+
+***
+
 ### init()?

 ```ts
 optional init(): Promise<void>
 ```

+Optionally load any resources needed for the embedding function.
+
+This method is called after the embedding function has been initialized
+but before any embeddings are computed. It is useful for loading local models
+or other resources that are needed for the embedding function to work.
+
 #### Returns

 `Promise`&lt;`void`&gt;
@@ -148,6 +173,28 @@ The number of dimensions of the embeddings

 ***

+### resolveVariables()
+
+```ts
+protected resolveVariables(config): Partial<M>
+```
+
+Apply variables to the config.
+
+#### Parameters
+
+* **config**: `Partial`&lt;`M`&gt;
+
+#### Returns
+
+`Partial`&lt;`M`&gt;
+
+#### Inherited from
+
+[`EmbeddingFunction`](EmbeddingFunction.md).[`resolveVariables`](EmbeddingFunction.md#resolvevariables)
+
+***
+
 ### sourceField()

 ```ts
@@ -158,11 +205,11 @@ sourceField is used in combination with `LanceSchema` to provide a declarative d

 #### Returns

-[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;]
+[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]

 #### See

-lancedb.LanceSchema
+[LanceSchema](../functions/LanceSchema.md)

 #### Overrides

@@ -173,37 +220,15 @@ lancedb.LanceSchema
 ### toJSON()

 ```ts
-abstract toJSON(): Partial<M>
+toJSON(): Record<string, any>
 ```

-Convert the embedding function to a JSON object
-It is used to serialize the embedding function to the schema
-It's important that any object returned by this method contains all the necessary
-information to recreate the embedding function
-
-It should return the same object that was passed to the constructor
-If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
+Get the original arguments to the constructor, to serialize them so they
+can be used to recreate the embedding function later.

 #### Returns

-`Partial`&lt;`M`&gt;
-
-#### Example
-
-```ts
-class MyEmbeddingFunction extends EmbeddingFunction {
-  constructor(options: {model: string, timeout: number}) {
-    super();
-    this.model = options.model;
-    this.timeout = options.timeout;
-  }
-  toJSON() {
-    return {
-      model: this.model,
-      timeout: this.timeout,
-    };
-}
-```
+`Record`&lt;`string`, `any`&gt;

 #### Inherited from

@@ -221,15 +246,16 @@ vectorField is used in combination with `LanceSchema` to provide a declarative d

 #### Parameters

-* **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;`FieldOptions`&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+* **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+    The options for the field

 #### Returns

-[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;]
+[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]

 #### See

-lancedb.LanceSchema
+[LanceSchema](../functions/LanceSchema.md)

 #### Inherited from

--- a/docs/src/js/namespaces/embedding/functions/LanceSchema.md
+++ b/docs/src/js/namespaces/embedding/functions/LanceSchema.md
@@ -14,7 +14,7 @@ Create a schema with embedding functions.

 ## Parameters

-* **fields**: `Record`&lt;`string`, `object` \| [`object`, `Map`&lt;`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;]&gt;
+* **fields**: `Record`&lt;`string`, `object` \| [`object`, `Map`&lt;`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]&gt;

 ## Returns

--- a/docs/src/js/namespaces/embedding/functions/register.md
+++ b/docs/src/js/namespaces/embedding/functions/register.md
@@ -20,7 +20,7 @@ function register(name?): (ctor) => any

 ### Parameters

-* **ctor**: `EmbeddingFunctionConstructor`&lt;[`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;
+* **ctor**: [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)&lt;[`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;

 ### Returns

--- a/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionConstructor.md
+++ b/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionConstructor.md
@@ -0,0 +1,27 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / EmbeddingFunctionConstructor
+
+# Interface: EmbeddingFunctionConstructor&lt;T&gt;
+
+## Type Parameters
+
+• **T** *extends* [`EmbeddingFunction`](../classes/EmbeddingFunction.md) = [`EmbeddingFunction`](../classes/EmbeddingFunction.md)
+
+## Constructors
+
+### new EmbeddingFunctionConstructor()
+
+```ts
+new EmbeddingFunctionConstructor(modelOptions?): T
+```
+
+#### Parameters
+
+* **modelOptions?**: `T`\[`"TOptions"`\]
+
+#### Returns
+
+`T`
--- a/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionCreate.md
+++ b/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionCreate.md
@@ -0,0 +1,27 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / EmbeddingFunctionCreate
+
+# Interface: EmbeddingFunctionCreate&lt;T&gt;
+
+## Type Parameters
+
+• **T** *extends* [`EmbeddingFunction`](../classes/EmbeddingFunction.md)
+
+## Methods
+
+### create()
+
+```ts
+create(options?): CreateReturnType<T>
+```
+
+#### Parameters
+
+* **options?**: `T`\[`"TOptions"`\]
+
+#### Returns
+
+[`CreateReturnType`](../type-aliases/CreateReturnType.md)&lt;`T`&gt;
--- a/docs/src/js/namespaces/embedding/interfaces/FieldOptions.md
+++ b/docs/src/js/namespaces/embedding/interfaces/FieldOptions.md
@@ -0,0 +1,27 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / FieldOptions
+
+# Interface: FieldOptions&lt;T&gt;
+
+## Type Parameters
+
+• **T** *extends* `DataType` = `DataType`
+
+## Properties
+
+### datatype
+
+```ts
+datatype: T;
+```
+
+***
+
+### dims?
+
+```ts
+optional dims: number;
+```
--- a/docs/src/js/namespaces/embedding/interfaces/FunctionOptions.md
+++ b/docs/src/js/namespaces/embedding/interfaces/FunctionOptions.md
@@ -0,0 +1,13 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / FunctionOptions
+
+# Interface: FunctionOptions
+
+Options for a given embedding function
+
+## Indexable
+
+ \[`key`: `string`\]: `any`
--- a/docs/src/js/namespaces/embedding/type-aliases/CreateReturnType.md
+++ b/docs/src/js/namespaces/embedding/type-aliases/CreateReturnType.md
@@ -0,0 +1,15 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / CreateReturnType
+
+# Type Alias: CreateReturnType&lt;T&gt;
+
+```ts
+type CreateReturnType<T>: T extends object ? Promise<T> : T;
+```
+
+## Type Parameters
+
+• **T**
--- a/docs/src/js/namespaces/rerankers/classes/RRFReranker.md
+++ b/docs/src/js/namespaces/rerankers/classes/RRFReranker.md
@@ -8,24 +8,6 @@

 Reranks the results using the Reciprocal Rank Fusion (RRF) algorithm.

-Internally this uses the Rust implementation
-
-## Constructors
-
-### new RRFReranker()
-
-```ts
-new RRFReranker(inner): RRFReranker
-```
-
-#### Parameters
-
-* **inner**: `RrfReranker`
-
-#### Returns
-
-[`RRFReranker`](RRFReranker.md)
-
 ## Methods

 ### rerankHybrid()
--- a/docs/src/js/type-aliases/DataLike.md
+++ b/docs/src/js/type-aliases/DataLike.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / DataLike
+
+# Type Alias: DataLike
+
+```ts
+type DataLike: Data | object;
+```
--- a/docs/src/js/type-aliases/FieldLike.md
+++ b/docs/src/js/type-aliases/FieldLike.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / FieldLike
+
+# Type Alias: FieldLike
+
+```ts
+type FieldLike: Field | object;
+```
--- a/docs/src/js/type-aliases/IntoSql.md
+++ b/docs/src/js/type-aliases/IntoSql.md
@@ -0,0 +1,19 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / IntoSql
+
+# Type Alias: IntoSql
+
+```ts
+type IntoSql:
+  | string
+  | number
+  | boolean
+  | null
+  | Date
+  | ArrayBufferLike
+  | Buffer
+  | IntoSql[];
+```
--- a/docs/src/js/type-aliases/IntoVector.md
+++ b/docs/src/js/type-aliases/IntoVector.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / IntoVector
+
+# Type Alias: IntoVector
+
+```ts
+type IntoVector: Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
+```
--- a/docs/src/js/type-aliases/RecordBatchLike.md
+++ b/docs/src/js/type-aliases/RecordBatchLike.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / RecordBatchLike
+
+# Type Alias: RecordBatchLike
+
+```ts
+type RecordBatchLike: RecordBatch | object;
+```
--- a/docs/src/js/type-aliases/SchemaLike.md
+++ b/docs/src/js/type-aliases/SchemaLike.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / SchemaLike
+
+# Type Alias: SchemaLike
+
+```ts
+type SchemaLike: Schema | object;
+```
--- a/docs/src/js/type-aliases/TableLike.md
+++ b/docs/src/js/type-aliases/TableLike.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / TableLike
+
+# Type Alias: TableLike
+
+```ts
+type TableLike: ArrowTable | object;
+```
--- a/docs/src/python/polars_arrow.md
+++ b/docs/src/python/polars_arrow.md
@@ -9,24 +9,51 @@ LanceDB supports [Polars](https://github.com/pola-rs/polars), a blazingly fast D

 First, we connect to a LanceDB database.

+=== "Sync API"

    ```py
    --8<-- "python/python/tests/docs/test_python.py:import-lancedb"
    --8<-- "python/python/tests/docs/test_python.py:connect_to_lancedb"
    ```

+=== "Async API"
+
+    ```py
+    --8<-- "python/python/tests/docs/test_python.py:import-lancedb"
+    --8<-- "python/python/tests/docs/test_python.py:connect_to_lancedb_async"
+    ```
+
+
 We can load a Polars `DataFrame` to LanceDB directly.

+=== "Sync API"
+
    ```py
    --8<-- "python/python/tests/docs/test_python.py:import-polars"
    --8<-- "python/python/tests/docs/test_python.py:create_table_polars"
    ```
+
+=== "Async API"
+
+    ```py
+    --8<-- "python/python/tests/docs/test_python.py:import-polars"
+    --8<-- "python/python/tests/docs/test_python.py:create_table_polars_async"
+    ```
+
 We can now perform similarity search via the LanceDB Python API.

+=== "Sync API"
+
    ```py
    --8<-- "python/python/tests/docs/test_python.py:vector_search_polars"
    ```

+=== "Async API"
+
+    ```py
+    --8<-- "python/python/tests/docs/test_python.py:vector_search_polars_async"
+    ```
+
 In addition to the selected columns, LanceDB also returns a vector
 and also the `_distance` column which is the distance between the query
 vector and the returned vector.
@@ -112,4 +139,3 @@ The reason it's beneficial to not convert the LanceDB Table
 to a DataFrame is because the table can potentially be way larger
 than memory, and Polars LazyFrames allow us to work with such
 larger-than-memory datasets by not loading it into memory all at once.
-
--- a/docs/src/python/pydantic.md
+++ b/docs/src/python/pydantic.md
@@ -2,14 +2,19 @@

 [Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
 LanceDB integrates with Pydantic for schema inference, data ingestion, and query result casting.
+Using [LanceModel][lancedb.pydantic.LanceModel], users can seamlessly
+integrate Pydantic with the rest of the LanceDB APIs.

-## Schema
+```python

-LanceDB supports to create Apache Arrow Schema from a
-[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
-via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
+--8<-- "python/python/tests/docs/test_pydantic_integration.py:imports"
+
+--8<-- "python/python/tests/docs/test_pydantic_integration.py:base_model"
+
+--8<-- "python/python/tests/docs/test_pydantic_integration.py:set_url"
+--8<-- "python/python/tests/docs/test_pydantic_integration.py:base_example"
+```

-::: lancedb.pydantic.pydantic_to_schema

 ## Vector Field

@@ -34,3 +39,9 @@ Current supported type conversions:
 | `list`              | `pyarrow.List`    |
 | `BaseModel`         | `pyarrow.Struct`    |
 | `Vector(n)`         | `pyarrow.FixedSizeList(float32, n)` |
+
+LanceDB supports to create Apache Arrow Schema from a
+[Pydantic BaseModel][pydantic.BaseModel]
+via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
+
+::: lancedb.pydantic.pydantic_to_schema
--- a/docs/src/search.md
+++ b/docs/src/search.md
@@ -122,7 +122,7 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi

 === "Python"

-    === "sync API"
+    === "Sync API"

        ```python
        --8<-- "python/python/tests/docs/test_binary_vector.py:imports"
@@ -130,7 +130,7 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi
        --8<-- "python/python/tests/docs/test_binary_vector.py:sync_binary_vector"
        ```

-    === "async API"
+    === "Async API"

        ```python
        --8<-- "python/python/tests/docs/test_binary_vector.py:imports"
@@ -153,7 +153,7 @@ The vector value type can be `float16`, `float32` or `float64`.

 === "Python"

-    === "sync API"
+    === "Sync API"

        ```python
        --8<-- "python/python/tests/docs/test_multivector.py:imports"
@@ -161,7 +161,7 @@ The vector value type can be `float16`, `float32` or `float64`.
        --8<-- "python/python/tests/docs/test_multivector.py:sync_multivector"
        ```

-    === "async API"
+    === "Async API"

        ```python
        --8<-- "python/python/tests/docs/test_multivector.py:imports"
@@ -175,7 +175,7 @@ You can also search for vectors within a specific distance range from the query

 === "Python"

-    === "sync API"
+    === "Sync API"

        ```python
        --8<-- "python/python/tests/docs/test_distance_range.py:imports"
@@ -183,7 +183,7 @@ You can also search for vectors within a specific distance range from the query
        --8<-- "python/python/tests/docs/test_distance_range.py:sync_distance_range"
        ```

-    === "async API"
+    === "Async API"

        ```python
        --8<-- "python/python/tests/docs/test_distance_range.py:imports"
--- a/docs/src/search_legacy.ts
+++ b/docs/src/search_legacy.ts
@@ -20,6 +20,7 @@ async function setup() {
 }

 async () => {
+  console.log("search_legacy.ts: start");
  await setup();

  // --8<-- [start:search1]
@@ -37,5 +38,5 @@ async () => {
    .execute();
  // --8<-- [end:search2]

-  console.log("search: done");
+  console.log("search_legacy.ts: done");
 };
--- a/docs/src/sql_legacy.ts
+++ b/docs/src/sql_legacy.ts
@@ -1,6 +1,7 @@
 import * as vectordb from "vectordb";

 (async () => {
+  console.log("sql_legacy.ts: start");
  const db = await vectordb.connect("data/sample-lancedb");

  let data = [];
@@ -34,5 +35,5 @@ import * as vectordb from "vectordb";
  await tbl.filter("id = 10").limit(10).execute();
  // --8<-- [end:sql_search]

-  console.log("SQL search: done");
+  console.log("sql_legacy.ts: done");
 })();
--- a/docs/test/md_testing.py
+++ b/docs/test/md_testing.py
@@ -11,9 +11,11 @@ excluded_globs = [
    "../src/examples/*.md",
    "../src/integrations/*.md",
    "../src/guides/tables.md",
+    "../src/guides/tables/merge_insert.md",
    "../src/python/duckdb.md",
    "../src/python/pandas_and_pyarrow.md",
    "../src/python/polars_arrow.md",
+    "../src/python/pydantic.md",
    "../src/embeddings/*.md",
    "../src/concepts/*.md",
    "../src/ann_indexes.md",
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.15.1-beta.2</version>
+        <version>0.18.0-beta.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.15.1-beta.2</version>
+    <version>0.18.0-beta.0</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.15.1-beta.1",
+  "version": "0.18.0-beta.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.15.1-beta.1",
+      "version": "0.18.0-beta.0",
      "cpu": [
        "x64",
        "arm64"
@@ -52,14 +52,14 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.15.1-beta.1",
-        "@lancedb/vectordb-darwin-x64": "0.15.1-beta.1",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.1",
-        "@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.1",
-        "@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.1",
-        "@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.1",
-        "@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.1",
-        "@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.1"
+        "@lancedb/vectordb-darwin-arm64": "0.18.0-beta.0",
+        "@lancedb/vectordb-darwin-x64": "0.18.0-beta.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.18.0-beta.0",
+        "@lancedb/vectordb-linux-arm64-musl": "0.18.0-beta.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.18.0-beta.0",
+        "@lancedb/vectordb-linux-x64-musl": "0.18.0-beta.0",
+        "@lancedb/vectordb-win32-arm64-msvc": "0.18.0-beta.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.18.0-beta.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -329,6 +329,110 @@
        "@jridgewell/sourcemap-codec": "^1.4.10"
      }
    },
+    "node_modules/@lancedb/vectordb-darwin-arm64": {
+      "version": "0.18.0-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.18.0-beta.0.tgz",
+      "integrity": "sha512-dLLgMPllYJOiRfPqkqkmoQu48RIa7K4dOF/qFP8Aex3zqeHE/0sFm3DYjtSFc6SR/6yT8u6Y9iFo2cQp5rCFJA==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-darwin-x64": {
+      "version": "0.18.0-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.18.0-beta.0.tgz",
+      "integrity": "sha512-la0eauU0rzHO5eeVjBt8o/5UW4VzRYAuRA7nqUFLX5T6SWP5+UWjqusVVbWGz3ski+8uEX6VhlaFZP5uIJKGIg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
+      "version": "0.18.0-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.18.0-beta.0.tgz",
+      "integrity": "sha512-AkXI/lB3yu1Di2G1lhilf89V6qPTppb13aAt+/6gU5/PSfA94y9VXD67D4WyvRbuQghJjDvAavMlWMrJc2NuMw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-linux-arm64-musl": {
+      "version": "0.18.0-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.18.0-beta.0.tgz",
+      "integrity": "sha512-kTVcJ4LA8w/7egY4m0EXOt8c1DeFUquVtyvexO+VzIFeeHfBkkrMI0DkE0CpHmk+gctkG7EY39jzjgLnPvppnw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
+      "version": "0.18.0-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.18.0-beta.0.tgz",
+      "integrity": "sha512-KbtIy5DkaWTsKENm5Q27hjovrR7FRuoHhl0wDJtO/2CUZYlrskjEIfcfkfA2CrEQesBug4s5jgsvNM4Wcp6zoA==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-linux-x64-musl": {
+      "version": "0.18.0-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.18.0-beta.0.tgz",
+      "integrity": "sha512-SF07gmoGVExcF5v+IE6kBbCbXJSDyTgC7QCt+MDS1NsgoQ9OH7IyH7r6HJu16tKflUOUKlUHnP0hQOPpv1fWpg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-win32-arm64-msvc": {
+      "version": "0.18.0-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.18.0-beta.0.tgz",
+      "integrity": "sha512-YYBuSBGDlxJgSI5gHjDmQo9sl05lAXfzil6QiKfgmUMsBtb2sT+GoUCgG6qzsfe99sWiTf+pMeWDsQgfrj9vNw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
+      "version": "0.18.0-beta.0",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.18.0-beta.0.tgz",
+      "integrity": "sha512-t9TXeUnMU7YbP+/nUJpStm75aWwUydZj2AK+G2XwDtQrQo4Xg7/NETEbBeogmIOHuidNQYia8jEeQCUon5/+Dw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
    "node_modules/@neon-rs/cli": {
      "version": "0.0.160",
      "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.15.1-beta.2",
+  "version": "0.18.0-beta.0",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
@@ -92,13 +92,13 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.15.1-beta.2",
-    "@lancedb/vectordb-darwin-arm64": "0.15.1-beta.2",
-    "@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.2",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.2",
-    "@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.2",
-    "@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.2",
-    "@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.2",
-    "@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.2"
+    "@lancedb/vectordb-darwin-x64": "0.18.0-beta.0",
+    "@lancedb/vectordb-darwin-arm64": "0.18.0-beta.0",
+    "@lancedb/vectordb-linux-x64-gnu": "0.18.0-beta.0",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.18.0-beta.0",
+    "@lancedb/vectordb-linux-x64-musl": "0.18.0-beta.0",
+    "@lancedb/vectordb-linux-arm64-musl": "0.18.0-beta.0",
+    "@lancedb/vectordb-win32-x64-msvc": "0.18.0-beta.0",
+    "@lancedb/vectordb-win32-arm64-msvc": "0.18.0-beta.0"
  }
 }
--- a/node/src/index.ts
+++ b/node/src/index.ts
@@ -47,7 +47,8 @@ const {
  tableSchema,
  tableAddColumns,
  tableAlterColumns,
-  tableDropColumns
+  tableDropColumns,
+  tableDropIndex
  // eslint-disable-next-line @typescript-eslint/no-var-requires
 } = require("../native.js");

@@ -604,6 +605,13 @@ export interface Table<T = number[]> {
   */
  dropColumns(columnNames: string[]): Promise<void>

+  /**
+   * Drop an index from the table
+   *
+   * @param indexName The name of the index to drop
+   */
+  dropIndex(indexName: string): Promise<void>
+
  /**
   * Instrument the behavior of this Table with middleware.
   *
@@ -1206,6 +1214,10 @@ export class LocalTable<T = number[]> implements Table<T> {
    return tableDropColumns.call(this._tbl, columnNames);
  }

+  async dropIndex(indexName: string): Promise<void> {
+    return tableDropIndex.call(this._tbl, indexName);
+  }
+
  withMiddleware(middleware: HttpMiddleware): Table<T> {
    return this;
  }
--- a/node/src/remote/index.ts
+++ b/node/src/remote/index.ts
@@ -471,6 +471,18 @@ export class RemoteTable<T = number[]> implements Table<T> {
      )
    }
  }
+  async dropIndex (index_name: string): Promise<void> {
+    const res = await this._client.post(
+        `/v1/table/${encodeURIComponent(this._name)}/index/${encodeURIComponent(index_name)}/drop/`
+    )
+    if (res.status !== 200) {
+      throw new Error(
+          `Server Error, status: ${res.status}, ` +
+          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+          `message: ${res.statusText}: ${await res.body()}`
+      )
+    }
+  }

  async countRows (filter?: string): Promise<number> {
    const result = await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/count_rows/`, {
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
@@ -894,6 +894,27 @@ describe("LanceDB client", function () {
      expect(stats.distanceType).to.equal("l2");
      expect(stats.numIndices).to.equal(1);
    }).timeout(50_000);
+
+    // not yet implemented
+    // it("can drop index", async function () {
+    //   const uri = await createTestDB(32, 300);
+    //   const con = await lancedb.connect(uri);
+    //   const table = await con.openTable("vectors");
+    //   await table.createIndex({
+    //     type: "ivf_pq",
+    //     column: "vector",
+    //     num_partitions: 2,
+    //     max_iters: 2,
+    //     num_sub_vectors: 2
+    //   });
+    //
+    //   const indices = await table.listIndices();
+    //   expect(indices).to.have.lengthOf(1);
+    //   expect(indices[0].name).to.equal("vector_idx");
+    //
+    //   await table.dropIndex("vector_idx");
+    //   expect(await table.listIndices()).to.have.lengthOf(0);
+    // }).timeout(50_000);
  });

  describe("when using a custom embedding function", function () {
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.15.1-beta.2"
+version = "0.18.0-beta.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/README.md
+++ b/nodejs/README.md
@@ -32,7 +32,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
 console.log(results);
 ```

-The [quickstart](../basic.md) contains a more complete example.
+The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.

 ## Development

--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -55,6 +55,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      Float64,
      Struct,
      List,
+      Int16,
      Int32,
      Int64,
      Float,
@@ -108,13 +109,16 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          false,
        ),
      ]);
-
      const table = (await tableCreationMethod(
        records,
        recordsReversed,
        schema,
        // biome-ignore lint/suspicious/noExplicitAny: <explanation>
      )) as any;
+
+      // We expect deterministic ordering of the fields
+      expect(table.schema.names).toEqual(schema.names);
+
      schema.fields.forEach(
        (
          // biome-ignore lint/suspicious/noExplicitAny: <explanation>
@@ -141,13 +145,13 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
    describe("The function makeArrowTable", function () {
      it("will use data types from a provided schema instead of inference", async function () {
        const schema = new Schema([
-          new Field("a", new Int32()),
-          new Field("b", new Float32()),
+          new Field("a", new Int32(), false),
+          new Field("b", new Float32(), true),
          new Field(
            "c",
            new FixedSizeList(3, new Field("item", new Float16())),
          ),
-          new Field("d", new Int64()),
+          new Field("d", new Int64(), true),
        ]);
        const table = makeArrowTable(
          [
@@ -165,12 +169,15 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(actual.numRows).toBe(3);
        const actualSchema = actual.schema;
        expect(actualSchema).toEqual(schema);
+        expect(table.getChild("a")?.toJSON()).toEqual([1, 4, 7]);
+        expect(table.getChild("b")?.toJSON()).toEqual([2, 5, 8]);
+        expect(table.getChild("d")?.toJSON()).toEqual([9n, 10n, null]);
      });

      it("will assume the column `vector` is FixedSizeList<Float32> by default", async function () {
        const schema = new Schema([
          new Field("a", new Float(Precision.DOUBLE), true),
-          new Field("b", new Float(Precision.DOUBLE), true),
+          new Field("b", new Int64(), true),
          new Field(
            "vector",
            new FixedSizeList(
@@ -181,9 +188,9 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          ),
        ]);
        const table = makeArrowTable([
-          { a: 1, b: 2, vector: [1, 2, 3] },
-          { a: 4, b: 5, vector: [4, 5, 6] },
-          { a: 7, b: 8, vector: [7, 8, 9] },
+          { a: 1, b: 2n, vector: [1, 2, 3] },
+          { a: 4, b: 5n, vector: [4, 5, 6] },
+          { a: 7, b: 8n, vector: [7, 8, 9] },
        ]);

        const buf = await fromTableToBuffer(table);
@@ -193,6 +200,19 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(actual.numRows).toBe(3);
        const actualSchema = actual.schema;
        expect(actualSchema).toEqual(schema);
+
+        expect(table.getChild("a")?.toJSON()).toEqual([1, 4, 7]);
+        expect(table.getChild("b")?.toJSON()).toEqual([2n, 5n, 8n]);
+        expect(
+          table
+            .getChild("vector")
+            ?.toJSON()
+            .map((v) => v.toJSON()),
+        ).toEqual([
+          [1, 2, 3],
+          [4, 5, 6],
+          [7, 8, 9],
+        ]);
      });

      it("can support multiple vector columns", async function () {
@@ -206,7 +226,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          ),
          new Field(
            "vec2",
-            new FixedSizeList(3, new Field("item", new Float16(), true)),
+            new FixedSizeList(3, new Field("item", new Float64(), true)),
            true,
          ),
        ]);
@@ -219,7 +239,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          {
            vectorColumns: {
              vec1: { type: new Float16() },
-              vec2: { type: new Float16() },
+              vec2: { type: new Float64() },
            },
          },
        );
@@ -307,6 +327,53 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          false,
        );
      });
+
+      it("will allow subsets of columns if nullable", async function () {
+        const schema = new Schema([
+          new Field("a", new Int64(), true),
+          new Field(
+            "s",
+            new Struct([
+              new Field("x", new Int32(), true),
+              new Field("y", new Int32(), true),
+            ]),
+            true,
+          ),
+          new Field("d", new Int16(), true),
+        ]);
+
+        const table = makeArrowTable([{ a: 1n }], { schema });
+        expect(table.numCols).toBe(1);
+        expect(table.numRows).toBe(1);
+
+        const table2 = makeArrowTable([{ a: 1n, d: 2 }], { schema });
+        expect(table2.numCols).toBe(2);
+
+        const table3 = makeArrowTable([{ s: { y: 3 } }], { schema });
+        expect(table3.numCols).toBe(1);
+        const expectedSchema = new Schema([
+          new Field("s", new Struct([new Field("y", new Int32(), true)]), true),
+        ]);
+        expect(table3.schema).toEqual(expectedSchema);
+      });
+
+      it("will work even if columns are sparsely provided", async function () {
+        const sparseRecords = [{ a: 1n }, { b: 2n }, { c: 3n }, { d: 4n }];
+        const table = makeArrowTable(sparseRecords);
+        expect(table.numCols).toBe(4);
+        expect(table.numRows).toBe(4);
+
+        const schema = new Schema([
+          new Field("a", new Int64(), true),
+          new Field("b", new Int32(), true),
+          new Field("c", new Int64(), true),
+          new Field("d", new Int16(), true),
+        ]);
+        const table2 = makeArrowTable(sparseRecords, { schema });
+        expect(table2.numCols).toBe(4);
+        expect(table2.numRows).toBe(4);
+        expect(table2.schema).toEqual(schema);
+      });
    });

    class DummyEmbedding extends EmbeddingFunction<string> {
--- a/nodejs/test/connection.test.ts
+++ b/nodejs/test/connection.test.ts
@@ -17,14 +17,14 @@ describe("when connecting", () => {
  it("should connect", async () => {
    const db = await connect(tmpDir.name);
    expect(db.display()).toBe(
-      `NativeDatabase(uri=${tmpDir.name}, read_consistency_interval=None)`,
+      `ListingDatabase(uri=${tmpDir.name}, read_consistency_interval=None)`,
    );
  });

  it("should allow read consistency interval to be specified", async () => {
    const db = await connect(tmpDir.name, { readConsistencyInterval: 5 });
    expect(db.display()).toBe(
-      `NativeDatabase(uri=${tmpDir.name}, read_consistency_interval=5s)`,
+      `ListingDatabase(uri=${tmpDir.name}, read_consistency_interval=5s)`,
    );
  });
 });
@@ -61,6 +61,26 @@ describe("given a connection", () => {
    await expect(tbl.countRows()).resolves.toBe(1);
  });

+  it("should be able to drop tables`", async () => {
+    await db.createTable("test", [{ id: 1 }, { id: 2 }]);
+    await db.createTable("test2", [{ id: 1 }, { id: 2 }]);
+    await db.createTable("test3", [{ id: 1 }, { id: 2 }]);
+
+    await expect(db.tableNames()).resolves.toEqual(["test", "test2", "test3"]);
+
+    await db.dropTable("test2");
+
+    await expect(db.tableNames()).resolves.toEqual(["test", "test3"]);
+
+    await db.dropAllTables();
+
+    await expect(db.tableNames()).resolves.toEqual([]);
+
+    // Make sure we can still create more tables after dropping all
+
+    await db.createTable("test4", [{ id: 1 }, { id: 2 }]);
+  });
+
  it("should fail if creating table twice, unless overwrite is true", async () => {
    let tbl = await db.createTable("test", [{ id: 1 }, { id: 2 }]);
    await expect(tbl.countRows()).resolves.toBe(2);
@@ -96,14 +116,15 @@ describe("given a connection", () => {
    const data = [...Array(10000).keys()].map((i) => ({ id: i }));

    // Create in v1 mode
-    let table = await db.createTable("test", data, { useLegacyFormat: true });
+    let table = await db.createTable("test", data, {
+      storageOptions: { newTableDataStorageVersion: "legacy" },
+    });

    const isV2 = async (table: Table) => {
      const data = await table
        .query()
        .limit(10000)
        .toArrow({ maxBatchLength: 100000 });
-      console.log(data.batches.length);
      return data.batches.length < 5;
    };

@@ -122,7 +143,7 @@ describe("given a connection", () => {
    const schema = new Schema([new Field("id", new Float64(), true)]);

    table = await db.createEmptyTable("test_v2_empty", schema, {
-      useLegacyFormat: false,
+      storageOptions: { newTableDataStorageVersion: "stable" },
    });

    await table.add(data);
--- a/nodejs/test/embedding.test.ts
+++ b/nodejs/test/embedding.test.ts
@@ -17,6 +17,8 @@ import {
 import { EmbeddingFunction, LanceSchema } from "../lancedb/embedding";
 import { getRegistry, register } from "../lancedb/embedding/registry";

+const testOpenAIInteg = process.env.OPENAI_API_KEY == null ? test.skip : test;
+
 describe("embedding functions", () => {
  let tmpDir: tmp.DirResult;
  beforeEach(() => {
@@ -29,9 +31,6 @@ describe("embedding functions", () => {

  it("should be able to create a table with an embedding function", async () => {
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
      ndims() {
        return 3;
      }
@@ -75,9 +74,6 @@ describe("embedding functions", () => {
  it("should be able to append and upsert using embedding function", async () => {
    @register()
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
      ndims() {
        return 3;
      }
@@ -143,9 +139,6 @@ describe("embedding functions", () => {
  it("should be able to create an empty table with an embedding function", async () => {
    @register()
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
      ndims() {
        return 3;
      }
@@ -194,9 +187,6 @@ describe("embedding functions", () => {
  it("should error when appending to a table with an unregistered embedding function", async () => {
    @register("mock")
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
-      }
      ndims() {
        return 3;
      }
@@ -241,13 +231,35 @@ describe("embedding functions", () => {
      `Function "mock" not found in registry`,
    );
  });
+
+  testOpenAIInteg("propagates variables through all methods", async () => {
+    delete process.env.OPENAI_API_KEY;
+    const registry = getRegistry();
+    registry.setVar("openai_api_key", "sk-...");
+    const func = registry.get("openai")?.create({
+      model: "text-embedding-ada-002",
+      apiKey: "$var:openai_api_key",
+    }) as EmbeddingFunction;
+
+    const db = await connect("memory://");
+    const wordsSchema = LanceSchema({
+      text: func.sourceField(new Utf8()),
+      vector: func.vectorField(),
+    });
+    const tbl = await db.createEmptyTable("words", wordsSchema, {
+      mode: "overwrite",
+    });
+    await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
+
+    const query = "greetings";
+    const actual = (await tbl.search(query).limit(1).toArray())[0];
+    expect(actual).toHaveProperty("text");
+  });
+
  test.each([new Float16(), new Float32(), new Float64()])(
    "should be able to provide manual embeddings with multiple float datatype",
    async (floatType) => {
      class MockEmbeddingFunction extends EmbeddingFunction<string> {
-        toJSON(): object {
-          return {};
-        }
        ndims() {
          return 3;
        }
@@ -292,10 +304,6 @@ describe("embedding functions", () => {
    async (floatType) => {
      @register("test1")
      class MockEmbeddingFunctionWithoutNDims extends EmbeddingFunction<string> {
-        toJSON(): object {
-          return {};
-        }
-
        embeddingDataType(): Float {
          return floatType;
        }
@@ -310,9 +318,6 @@ describe("embedding functions", () => {
      }
      @register("test")
      class MockEmbeddingFunction extends EmbeddingFunction<string> {
-        toJSON(): object {
-          return {};
-        }
        ndims() {
          return 3;
        }
--- a/nodejs/test/registry.test.ts
+++ b/nodejs/test/registry.test.ts
@@ -11,7 +11,11 @@ import * as arrow18 from "apache-arrow-18";
 import * as tmp from "tmp";

 import { connect } from "../lancedb";
-import { EmbeddingFunction, LanceSchema } from "../lancedb/embedding";
+import {
+  EmbeddingFunction,
+  FunctionOptions,
+  LanceSchema,
+} from "../lancedb/embedding";
 import { getRegistry, register } from "../lancedb/embedding/registry";

 describe.each([arrow15, arrow16, arrow17, arrow18])("LanceSchema", (arrow) => {
@@ -39,11 +43,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
  it("should register a new item to the registry", async () => {
    @register("mock-embedding")
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {
-          someText: "hello",
-        };
-      }
      constructor() {
        super();
      }
@@ -89,11 +88,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
  });
  test("should error if registering with the same name", async () => {
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {
-          someText: "hello",
-        };
-      }
      constructor() {
        super();
      }
@@ -114,13 +108,9 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
  });
  test("schema should contain correct metadata", async () => {
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {
-          someText: "hello",
-        };
-      }
-      constructor() {
+      constructor(args: FunctionOptions = {}) {
        super();
+        this.resolveVariables(args);
      }
      ndims() {
        return 3;
@@ -132,7 +122,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
        return data.map(() => [1, 2, 3]);
      }
    }
-    const func = new MockEmbeddingFunction();
+    const func = new MockEmbeddingFunction({ someText: "hello" });

    const schema = LanceSchema({
      id: new arrow.Int32(),
@@ -155,3 +145,79 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
    expect(schema.metadata).toEqual(expectedMetadata);
  });
 });
+
+describe("Registry.setVar", () => {
+  const registry = getRegistry();
+
+  beforeEach(() => {
+    @register("mock-embedding")
+    // biome-ignore lint/correctness/noUnusedVariables :
+    class MockEmbeddingFunction extends EmbeddingFunction<string> {
+      constructor(optionsRaw: FunctionOptions = {}) {
+        super();
+        const options = this.resolveVariables(optionsRaw);
+
+        expect(optionsRaw["someKey"].startsWith("$var:someName")).toBe(true);
+        expect(options["someKey"]).toBe("someValue");
+
+        if (options["secretKey"]) {
+          expect(optionsRaw["secretKey"]).toBe("$var:secretKey");
+          expect(options["secretKey"]).toBe("mySecret");
+        }
+      }
+      async computeSourceEmbeddings(data: string[]) {
+        return data.map(() => [1, 2, 3]);
+      }
+      embeddingDataType() {
+        return new arrow18.Float32() as apiArrow.Float;
+      }
+      protected getSensitiveKeys() {
+        return ["secretKey"];
+      }
+    }
+  });
+  afterEach(() => {
+    registry.reset();
+  });
+
+  it("Should error if the variable is not set", () => {
+    console.log(registry.get("mock-embedding"));
+    expect(() =>
+      registry.get("mock-embedding")!.create({ someKey: "$var:someName" }),
+    ).toThrow('Variable "someName" not found');
+  });
+
+  it("should use default values if not set", () => {
+    registry
+      .get("mock-embedding")!
+      .create({ someKey: "$var:someName:someValue" });
+  });
+
+  it("should set a variable that the embedding function understand", () => {
+    registry.setVar("someName", "someValue");
+    registry.get("mock-embedding")!.create({ someKey: "$var:someName" });
+  });
+
+  it("should reject secrets that aren't passed as variables", () => {
+    registry.setVar("someName", "someValue");
+    expect(() =>
+      registry
+        .get("mock-embedding")!
+        .create({ secretKey: "someValue", someKey: "$var:someName" }),
+    ).toThrow(
+      'The key "secretKey" is sensitive and cannot be set directly. Please use the $var: syntax to set it.',
+    );
+  });
+
+  it("should not serialize secrets", () => {
+    registry.setVar("someName", "someValue");
+    registry.setVar("secretKey", "mySecret");
+    const func = registry
+      .get("mock-embedding")!
+      .create({ secretKey: "$var:secretKey", someKey: "$var:someName" });
+    expect(func.toJSON()).toEqual({
+      secretKey: "$var:secretKey",
+      someKey: "$var:someName",
+    });
+  });
+});
--- a/nodejs/test/remote.test.ts
+++ b/nodejs/test/remote.test.ts
@@ -104,4 +104,26 @@ describe("remote connection", () => {
      },
    );
  });
+
+  it("should pass on requested extra headers", async () => {
+    await withMockDatabase(
+      (req, res) => {
+        expect(req.headers["x-my-header"]).toEqual("my-value");
+
+        const body = JSON.stringify({ tables: [] });
+        res.writeHead(200, { "Content-Type": "application/json" }).end(body);
+      },
+      async (db) => {
+        const tableNames = await db.tableNames();
+        expect(tableNames).toEqual([]);
+      },
+      {
+        clientConfig: {
+          extraHeaders: {
+            "x-my-header": "my-value",
+          },
+        },
+      },
+    );
+  });
 });
--- a/nodejs/test/s3_integration.test.ts
+++ b/nodejs/test/s3_integration.test.ts
@@ -175,6 +175,8 @@ maybeDescribe("storage_options", () => {

    tableNames = await db.tableNames();
    expect(tableNames).toEqual([]);
+
+    await db.dropAllTables();
  });

  it("can configure encryption at connection and table level", async () => {
@@ -210,6 +212,8 @@ maybeDescribe("storage_options", () => {
    await table.add([{ a: 2, b: 3 }]);

    await bucket.assertAllEncrypted("test/table2.lance", kmsKey.keyId);
+
+    await db.dropAllTables();
  });
 });

@@ -298,5 +302,32 @@ maybeDescribe("DynamoDB Lock", () => {

    const rowCount = await table.countRows();
    expect(rowCount).toBe(6);
+
+    await db.dropAllTables();
+  });
+
+  it("clears dynamodb state after dropping all tables", async () => {
+    const uri = `s3+ddb://${bucket.name}/test?ddbTableName=${commitTable.name}`;
+    const db = await connect(uri, {
+      storageOptions: CONFIG,
+      readConsistencyInterval: 0,
+    });
+
+    await db.createTable("foo", [{ a: 1, b: 2 }]);
+    await db.createTable("bar", [{ a: 1, b: 2 }]);
+
+    let tableNames = await db.tableNames();
+    expect(tableNames).toEqual(["bar", "foo"]);
+
+    await db.dropAllTables();
+    tableNames = await db.tableNames();
+    expect(tableNames).toEqual([]);
+
+    // We can create a new table with the same name as the one we dropped.
+    await db.createTable("foo", [{ a: 1, b: 2 }]);
+    tableNames = await db.tableNames();
+    expect(tableNames).toEqual(["foo"]);
+
+    await db.dropAllTables();
  });
 });
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -253,6 +253,31 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      const arrowTbl = await table.toArrow();
      expect(arrowTbl).toBeInstanceOf(ArrowTable);
    });
+
+    it("should be able to handle missing fields", async () => {
+      const schema = new arrow.Schema([
+        new arrow.Field("id", new arrow.Int32(), true),
+        new arrow.Field("y", new arrow.Int32(), true),
+        new arrow.Field("z", new arrow.Int64(), true),
+      ]);
+      const db = await connect(tmpDir.name);
+      const table = await db.createEmptyTable("testNull", schema);
+      await table.add([{ id: 1, y: 2 }]);
+      await table.add([{ id: 2 }]);
+
+      await table
+        .mergeInsert("id")
+        .whenNotMatchedInsertAll()
+        .execute([
+          { id: 3, z: 3 },
+          { id: 4, z: 5 },
+        ]);
+
+      const res = await table.query().toArrow();
+      expect(res.getChild("id")?.toJSON()).toEqual([1, 2, 3, 4]);
+      expect(res.getChild("y")?.toJSON()).toEqual([2, null, null, null]);
+      expect(res.getChild("z")?.toJSON()).toEqual([null, null, 3n, 5n]);
+    });
  },
 );

@@ -641,11 +666,11 @@ describe("When creating an index", () => {
    expect(fs.readdirSync(indexDir)).toHaveLength(1);

    for await (const r of tbl.query().where("id > 1").select(["id"])) {
-      expect(r.numRows).toBe(10);
+      expect(r.numRows).toBe(298);
    }
    // should also work with 'filter' alias
    for await (const r of tbl.query().filter("id > 1").select(["id"])) {
-      expect(r.numRows).toBe(10);
+      expect(r.numRows).toBe(298);
    }
  });

@@ -1013,9 +1038,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
    test("can search using a string", async () => {
      @register()
      class MockEmbeddingFunction extends EmbeddingFunction<string> {
-        toJSON(): object {
-          return {};
-        }
        ndims() {
          return 1;
        }
--- a/nodejs/examples/ann_indexes.test.ts
+++ b/nodejs/examples/ann_indexes.test.ts
@@ -3,7 +3,7 @@
 import { expect, test } from "@jest/globals";
 // --8<-- [start:import]
 import * as lancedb from "@lancedb/lancedb";
-import { VectorQuery } from "@lancedb/lancedb";
+import type { VectorQuery } from "@lancedb/lancedb";
 // --8<-- [end:import]
 import { withTempDirectory } from "./util.ts";

--- a/nodejs/examples/basic.test.ts
+++ b/nodejs/examples/basic.test.ts
@@ -117,7 +117,6 @@ test("basic table examples", async () => {
      // --8<-- [end:add_data]
    }

-    {
    // --8<-- [start:add_columns]
    await tbl.addColumns([
      { name: "double_price", valueSql: "cast((price * 2) as Float)" },
@@ -136,7 +135,6 @@ test("basic table examples", async () => {
    // --8<-- [start:drop_columns]
    await tbl.dropColumns(["dbl_price"]);
    // --8<-- [end:drop_columns]
-    }

    {
      // --8<-- [start:vector_search]
--- a/nodejs/examples/biome.json
+++ b/nodejs/examples/biome.json
@@ -0,0 +1,52 @@
+{
+  "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
+  "vcs": {
+    "enabled": false,
+    "clientKind": "git",
+    "useIgnoreFile": false
+  },
+  "files": {
+    "ignoreUnknown": false,
+    "ignore": []
+  },
+  "formatter": {
+    "enabled": true,
+    "indentStyle": "space"
+  },
+  "organizeImports": {
+    "enabled": true
+  },
+  "linter": {
+    "enabled": true,
+    "rules": {
+      "recommended": true
+    }
+  },
+  "javascript": {
+    "formatter": {
+      "quoteStyle": "double"
+    }
+  },
+  "overrides": [
+    {
+      "include": ["*"],
+      "linter": {
+        "rules": {
+          "style": {
+            "noNonNullAssertion": "off"
+          }
+        }
+      }
+    },
+    {
+      "include": ["merge_insert.test.ts"],
+      "linter": {
+        "rules": {
+          "style": {
+            "useNamingConvention": "off"
+          }
+        }
+      }
+    }
+  ]
+}
--- a/nodejs/examples/custom_embedding_function.test.ts
+++ b/nodejs/examples/custom_embedding_function.test.ts
@@ -1,4 +1,7 @@
-import { FeatureExtractionPipeline, pipeline } from "@huggingface/transformers";
+import {
+  type FeatureExtractionPipeline,
+  pipeline,
+} from "@huggingface/transformers";
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import { expect, test } from "@jest/globals";
--- a/nodejs/examples/embedding.test.ts
+++ b/nodejs/examples/embedding.test.ts
@@ -43,12 +43,17 @@ test("custom embedding function", async () => {

    @register("my_embedding")
    class MyEmbeddingFunction extends EmbeddingFunction<string> {
-      toJSON(): object {
-        return {};
+      constructor(optionsRaw = {}) {
+        super();
+        const options = this.resolveVariables(optionsRaw);
+        // Initialize using options
      }
      ndims() {
        return 3;
      }
+      protected getSensitiveKeys(): string[] {
+        return [];
+      }
      embeddingDataType(): Float {
        return new Float32();
      }
@@ -94,3 +99,14 @@ test("custom embedding function", async () => {
    expect(await table2.countRows()).toBe(2);
  });
 });
+
+test("embedding function api_key", async () => {
+  // --8<-- [start:register_secret]
+  const registry = getRegistry();
+  registry.setVar("api_key", "sk-...");
+
+  const func = registry.get("openai")!.create({
+    apiKey: "$var:api_key",
+  });
+  // --8<-- [end:register_secret]
+});
--- a/nodejs/examples/full_text_search.test.ts
+++ b/nodejs/examples/full_text_search.test.ts
@@ -42,4 +42,4 @@ test("full text search", async () => {
    expect(result.length).toBe(10);
    // --8<-- [end:full_text_search]
  });
-});
+}, 10_000);
--- a/nodejs/examples/merge_insert.test.ts
+++ b/nodejs/examples/merge_insert.test.ts
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+import { expect, test } from "@jest/globals";
+import * as lancedb from "@lancedb/lancedb";
+
+test("basic upsert", async () => {
+  const db = await lancedb.connect("memory://");
+
+  // --8<-- [start:upsert_basic]
+  const table = await db.createTable("users", [
+    { id: 0, name: "Alice" },
+    { id: 1, name: "Bob" },
+  ]);
+
+  const newUsers = [
+    { id: 1, name: "Bobby" },
+    { id: 2, name: "Charlie" },
+  ];
+  await table
+    .mergeInsert("id")
+    .whenMatchedUpdateAll()
+    .whenNotMatchedInsertAll()
+    .execute(newUsers);
+
+  await table.countRows(); // 3
+  // --8<-- [end:upsert_basic]
+  expect(await table.countRows()).toBe(3);
+
+  // --8<-- [start:insert_if_not_exists]
+  const table2 = await db.createTable("domains", [
+    { domain: "google.com", name: "Google" },
+    { domain: "github.com", name: "GitHub" },
+  ]);
+
+  const newDomains = [
+    { domain: "google.com", name: "Google" },
+    { domain: "facebook.com", name: "Facebook" },
+  ];
+  await table2
+    .mergeInsert("domain")
+    .whenNotMatchedInsertAll()
+    .execute(newDomains);
+  await table2.countRows(); // 3
+  // --8<-- [end:insert_if_not_exists]
+  expect(await table2.countRows()).toBe(3);
+
+  // --8<-- [start:replace_range]
+  const table3 = await db.createTable("chunks", [
+    { doc_id: 0, chunk_id: 0, text: "Hello" },
+    { doc_id: 0, chunk_id: 1, text: "World" },
+    { doc_id: 1, chunk_id: 0, text: "Foo" },
+    { doc_id: 1, chunk_id: 1, text: "Bar" },
+  ]);
+
+  const newChunks = [{ doc_id: 1, chunk_id: 0, text: "Baz" }];
+
+  await table3
+    .mergeInsert(["doc_id", "chunk_id"])
+    .whenMatchedUpdateAll()
+    .whenNotMatchedInsertAll()
+    .whenNotMatchedBySourceDelete({ where: "doc_id = 1" })
+    .execute(newChunks);
+
+  await table3.countRows("doc_id = 1"); // 1
+  // --8<-- [end:replace_range]
+  expect(await table3.countRows("doc_id = 1")).toBe(1);
+});
--- a/nodejs/examples/sentence-transformers.test.ts
+++ b/nodejs/examples/sentence-transformers.test.ts
@@ -6,7 +6,7 @@ import { withTempDirectory } from "./util.ts";
 import * as lancedb from "@lancedb/lancedb";
 import "@lancedb/lancedb/embedding/transformers";
 import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
-import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
+import type { EmbeddingFunction } from "@lancedb/lancedb/embedding";
 import { Utf8 } from "apache-arrow";

 test("full text search", async () => {
@@ -58,6 +58,6 @@ test("full text search", async () => {
    const query = "How many bones are in the human body?";
    const actual = await tbl.search(query).limit(1).toArray();

-    expect(actual[0]["text"]).toBe("The human body has 206 bones.");
+    expect(actual[0].text).toBe("The human body has 206 bones.");
  });
 }, 100_000);
--- a/nodejs/examples/util.ts
+++ b/nodejs/examples/util.ts
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import * as fs from "fs";
-import { tmpdir } from "os";
-import * as path from "path";
+import * as fs from "node:fs";
+import { tmpdir } from "node:os";
+import * as path from "node:path";

 export async function withTempDirectory(
  fn: (tempDir: string) => Promise<void>,
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -2,31 +2,37 @@
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import {
+  Data as ArrowData,
  Table as ArrowTable,
  Binary,
+  Bool,
  BufferType,
  DataType,
+  Dictionary,
  Field,
  FixedSizeBinary,
  FixedSizeList,
  Float,
  Float32,
+  Float64,
  Int,
+  Int32,
+  Int64,
  LargeBinary,
  List,
  Null,
  RecordBatch,
  RecordBatchFileReader,
  RecordBatchFileWriter,
-  RecordBatchReader,
  RecordBatchStreamWriter,
  Schema,
  Struct,
  Utf8,
  Vector,
+  makeVector as arrowMakeVector,
  makeBuilder,
  makeData,
-  type makeTable,
+  makeTable,
  vectorFromArray,
 } from "apache-arrow";
 import { Buffers } from "apache-arrow/data";
@@ -236,8 +242,6 @@ export class MakeArrowTableOptions {
 * This function converts an array of Record<String, any> (row-major JS objects)
 * to an Arrow Table (a columnar structure)
 *
- * Note that it currently does not support nulls.
- *
 * If a schema is provided then it will be used to determine the resulting array
 * types.  Fields will also be reordered to fit the order defined by the schema.
 *
@@ -245,6 +249,9 @@ export class MakeArrowTableOptions {
 * will be controlled by the order of properties in the first record.  If a type
 * is inferred it will always be nullable.
 *
+ * If not all fields are found in the data, then a subset of the schema will be
+ * returned.
+ *
 * If the input is empty then a schema must be provided to create an empty table.
 *
 * When a schema is not specified then data types will be inferred.  The inference
@@ -252,11 +259,13 @@ export class MakeArrowTableOptions {
 *
 *  - boolean => Bool
 *  - number => Float64
+ *  - bigint => Int64
 *  - String => Utf8
 *  - Buffer => Binary
 *  - Record<String, any> => Struct
 *  - Array<any> => List
 * @example
+ * ```ts
 * import { fromTableToBuffer, makeArrowTable } from "../arrow";
 * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
 *
@@ -278,43 +287,41 @@ export class MakeArrowTableOptions {
 * names and data types.
 *
 * ```ts
- *
 * const schema = new Schema([
-    new Field("a", new Float64()),
-    new Field("b", new Float64()),
-    new Field(
-      "vector",
-      new FixedSizeList(3, new Field("item", new Float32()))
-    ),
-  ]);
-  const table = makeArrowTable([
-    { a: 1, b: 2, vector: [1, 2, 3] },
-    { a: 4, b: 5, vector: [4, 5, 6] },
-    { a: 7, b: 8, vector: [7, 8, 9] },
-  ]);
-  assert.deepEqual(table.schema, schema);
+ *   new Field("a", new Float64()),
+ *   new Field("b", new Float64()),
+ *   new Field(
+ *     "vector",
+ *     new FixedSizeList(3, new Field("item", new Float32()))
+ *   ),
+ * ]);
+ * const table = makeArrowTable([
+ *   { a: 1, b: 2, vector: [1, 2, 3] },
+ *   { a: 4, b: 5, vector: [4, 5, 6] },
+ *   { a: 7, b: 8, vector: [7, 8, 9] },
+ * ]);
+ * assert.deepEqual(table.schema, schema);
 * ```
 *
 * You can specify the vector column types and names using the options as well
 *
- * ```typescript
- *
+ * ```ts
 * const schema = new Schema([
-    new Field('a', new Float64()),
-    new Field('b', new Float64()),
-    new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
-    new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
-  ]);
+ *   new Field('a', new Float64()),
+ *   new Field('b', new Float64()),
+ *   new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
+ *   new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
+ * ]);
 * const table = makeArrowTable([
-    { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
-    { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
-    { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
-  ], {
-    vectorColumns: {
-      vec1: { type: new Float16() },
-      vec2: { type: new Float16() }
-    }
-  }
+ *   { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
+ *   { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
+ *   { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
+ * ], {
+ *   vectorColumns: {
+ *     vec1: { type: new Float16() },
+ *     vec2: { type: new Float16() }
+ *   }
+ * }
 * assert.deepEqual(table.schema, schema)
 * ```
 */
@@ -323,126 +330,316 @@ export function makeArrowTable(
  options?: Partial<MakeArrowTableOptions>,
  metadata?: Map<string, string>,
 ): ArrowTable {
+  const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
+  let schema: Schema | undefined = undefined;
+  if (opt.schema !== undefined && opt.schema !== null) {
+    schema = sanitizeSchema(opt.schema);
+    schema = validateSchemaEmbeddings(
+      schema as Schema,
+      data,
+      options?.embeddingFunction,
+    );
+  }
+
+  let schemaMetadata = schema?.metadata || new Map<string, string>();
+  if (metadata !== undefined) {
+    schemaMetadata = new Map([...schemaMetadata, ...metadata]);
+  }
+
  if (
    data.length === 0 &&
    (options?.schema === undefined || options?.schema === null)
  ) {
    throw new Error("At least one record or a schema needs to be provided");
+  } else if (data.length === 0) {
+    if (schema === undefined) {
+      throw new Error("A schema must be provided if data is empty");
+    } else {
+      schema = new Schema(schema.fields, schemaMetadata);
+      return new ArrowTable(schema);
+    }
  }

-  const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
-  if (opt.schema !== undefined && opt.schema !== null) {
-    opt.schema = sanitizeSchema(opt.schema);
-    opt.schema = validateSchemaEmbeddings(
-      opt.schema as Schema,
-      data,
-      options?.embeddingFunction,
-    );
-  }
-  const columns: Record<string, Vector> = {};
-  // TODO: sample dataset to find missing columns
-  // Prefer the field ordering of the schema, if present
-  const columnNames =
-    opt.schema != null ? (opt.schema.names as string[]) : Object.keys(data[0]);
-  for (const colName of columnNames) {
-    if (
-      data.length !== 0 &&
-      !Object.prototype.hasOwnProperty.call(data[0], colName)
-    ) {
-      // The field is present in the schema, but not in the data, skip it
-      continue;
-    }
-    // Extract a single column from the records (transpose from row-major to col-major)
-    let values = data.map((datum) => datum[colName]);
+  let inferredSchema = inferSchema(data, schema, opt);
+  inferredSchema = new Schema(inferredSchema.fields, schemaMetadata);

-    // By default (type === undefined) arrow will infer the type from the JS type
-    let type;
-    if (opt.schema !== undefined) {
-      // If there is a schema provided, then use that for the type instead
-      type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
-      if (DataType.isInt(type) && type.bitWidth === 64) {
-        // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
-        values = values.map((v) => {
-          if (v === null) {
-            return v;
+  const finalColumns: Record<string, Vector> = {};
+  for (const field of inferredSchema.fields) {
+    finalColumns[field.name] = transposeData(data, field);
  }
-          if (typeof v === "bigint") {
-            return v;
-          }
-          if (typeof v === "number") {
-            return BigInt(v);
+
+  return new ArrowTable(inferredSchema, finalColumns);
 }
+
+function inferSchema(
+  data: Array<Record<string, unknown>>,
+  schema: Schema | undefined,
+  opts: MakeArrowTableOptions,
+): Schema {
+  // We will collect all fields we see in the data.
+  const pathTree = new PathTree<DataType>();
+
+  for (const [rowI, row] of data.entries()) {
+    for (const [path, value] of rowPathsAndValues(row)) {
+      if (!pathTree.has(path)) {
+        // First time seeing this field.
+        if (schema !== undefined) {
+          const field = getFieldForPath(schema, path);
+          if (field === undefined) {
            throw new Error(
-            `Expected BigInt or number for column ${colName}, got ${typeof v}`,
+              `Found field not in schema: ${path.join(".")} at row ${rowI}`,
            );
+          } else {
+            pathTree.set(path, field.type);
+          }
+        } else {
+          const inferredType = inferType(value, path, opts);
+          if (inferredType === undefined) {
+            throw new Error(`Failed to infer data type for field ${path.join(".")} at row ${rowI}. \
+                             Consider providing an explicit schema.`);
+          }
+          pathTree.set(path, inferredType);
+        }
+      } else if (schema === undefined) {
+        const currentType = pathTree.get(path);
+        const newType = inferType(value, path, opts);
+        if (currentType !== newType) {
+          new Error(`Failed to infer schema for data. Previously inferred type \
+                     ${currentType} but found ${newType} at row ${rowI}. Consider \
+                     providing an explicit schema.`);
+        }
+      }
+    }
+  }
+
+  if (schema === undefined) {
+    function fieldsFromPathTree(pathTree: PathTree<DataType>): Field[] {
+      const fields = [];
+      for (const [name, value] of pathTree.map.entries()) {
+        if (value instanceof PathTree) {
+          const children = fieldsFromPathTree(value);
+          fields.push(new Field(name, new Struct(children), true));
+        } else {
+          fields.push(new Field(name, value, true));
+        }
+      }
+      return fields;
+    }
+    const fields = fieldsFromPathTree(pathTree);
+    return new Schema(fields);
+  } else {
+    function takeMatchingFields(
+      fields: Field[],
+      pathTree: PathTree<DataType>,
+    ): Field[] {
+      const outFields = [];
+      for (const field of fields) {
+        if (pathTree.map.has(field.name)) {
+          const value = pathTree.get([field.name]);
+          if (value instanceof PathTree) {
+            const struct = field.type as Struct;
+            const children = takeMatchingFields(struct.children, value);
+            outFields.push(
+              new Field(field.name, new Struct(children), field.nullable),
+            );
+          } else {
+            outFields.push(
+              new Field(field.name, value as DataType, field.nullable),
+            );
+          }
+        }
+      }
+      return outFields;
+    }
+    const fields = takeMatchingFields(schema.fields, pathTree);
+    return new Schema(fields);
+  }
+}
+
+function* rowPathsAndValues(
+  row: Record<string, unknown>,
+  basePath: string[] = [],
+): Generator<[string[], unknown]> {
+  for (const [key, value] of Object.entries(row)) {
+    if (isObject(value)) {
+      yield* rowPathsAndValues(value, [...basePath, key]);
+    } else {
+      yield [[...basePath, key], value];
+    }
+  }
+}
+
+function isObject(value: unknown): value is Record<string, unknown> {
+  return (
+    typeof value === "object" &&
+    value !== null &&
+    !Array.isArray(value) &&
+    !(value instanceof RegExp) &&
+    !(value instanceof Date) &&
+    !(value instanceof Set) &&
+    !(value instanceof Map) &&
+    !(value instanceof Buffer)
+  );
+}
+
+function getFieldForPath(schema: Schema, path: string[]): Field | undefined {
+  let current: Field | Schema = schema;
+  for (const key of path) {
+    if (current instanceof Schema) {
+      const field: Field | undefined = current.fields.find(
+        (f) => f.name === key,
+      );
+      if (field === undefined) {
+        return undefined;
+      }
+      current = field;
+    } else if (current instanceof Field && DataType.isStruct(current.type)) {
+      const struct: Struct = current.type;
+      const field = struct.children.find((f) => f.name === key);
+      if (field === undefined) {
+        return undefined;
+      }
+      current = field;
+    } else {
+      return undefined;
+    }
+  }
+  if (current instanceof Field) {
+    return current;
+  } else {
+    return undefined;
+  }
+}
+
+/**
+ * Try to infer which Arrow type to use for a given value.
+ *
+ * May return undefined if the type cannot be inferred.
+ */
+function inferType(
+  value: unknown,
+  path: string[],
+  opts: MakeArrowTableOptions,
+): DataType | undefined {
+  if (typeof value === "bigint") {
+    return new Int64();
+  } else if (typeof value === "number") {
+    // Even if it's an integer, it's safer to assume Float64. Users can
+    // always provide an explicit schema or use BigInt if they mean integer.
+    return new Float64();
+  } else if (typeof value === "string") {
+    if (opts.dictionaryEncodeStrings) {
+      return new Dictionary(new Utf8(), new Int32());
+    } else {
+      return new Utf8();
+    }
+  } else if (typeof value === "boolean") {
+    return new Bool();
+  } else if (value instanceof Buffer) {
+    return new Binary();
+  } else if (Array.isArray(value)) {
+    if (value.length === 0) {
+      return undefined; // Without any values we can't infer the type
+    }
+    if (path.length === 1 && Object.hasOwn(opts.vectorColumns, path[0])) {
+      const floatType = sanitizeType(opts.vectorColumns[path[0]].type);
+      return new FixedSizeList(
+        value.length,
+        new Field("item", floatType, true),
+      );
+    }
+    const valueType = inferType(value[0], path, opts);
+    if (valueType === undefined) {
+      return undefined;
+    }
+    // Try to automatically detect embedding columns.
+    if (valueType instanceof Float && path[path.length - 1] === "vector") {
+      // We default to Float32 for vectors.
+      const child = new Field("item", new Float32(), true);
+      return new FixedSizeList(value.length, child);
+    } else {
+      const child = new Field("item", valueType, true);
+      return new List(child);
+    }
+  } else {
+    // TODO: timestamp
+    return undefined;
+  }
+}
+
+class PathTree<V> {
+  map: Map<string, V | PathTree<V>>;
+
+  constructor(entries?: [string[], V][]) {
+    this.map = new Map();
+    if (entries !== undefined) {
+      for (const [path, value] of entries) {
+        this.set(path, value);
+      }
+    }
+  }
+  has(path: string[]): boolean {
+    let ref: PathTree<V> = this;
+    for (const part of path) {
+      if (!(ref instanceof PathTree) || !ref.map.has(part)) {
+        return false;
+      }
+      ref = ref.map.get(part) as PathTree<V>;
+    }
+    return true;
+  }
+  get(path: string[]): V | undefined {
+    let ref: PathTree<V> = this;
+    for (const part of path) {
+      if (!(ref instanceof PathTree) || !ref.map.has(part)) {
+        return undefined;
+      }
+      ref = ref.map.get(part) as PathTree<V>;
+    }
+    return ref as V;
+  }
+  set(path: string[], value: V): void {
+    let ref: PathTree<V> = this;
+    for (const part of path.slice(0, path.length - 1)) {
+      if (!ref.map.has(part)) {
+        ref.map.set(part, new PathTree<V>());
+      }
+      ref = ref.map.get(part) as PathTree<V>;
+    }
+    ref.map.set(path[path.length - 1], value);
+  }
+}
+
+function transposeData(
+  data: Record<string, unknown>[],
+  field: Field,
+  path: string[] = [],
+): Vector {
+  if (field.type instanceof Struct) {
+    const childFields = field.type.children;
+    const childVectors = childFields.map((child) => {
+      return transposeData(data, child, [...path, child.name]);
    });
-      }
+    const structData = makeData({
+      type: field.type,
+      children: childVectors as unknown as ArrowData<DataType>[],
+    });
+    return arrowMakeVector(structData);
  } else {
-      // Otherwise, check to see if this column is one of the vector columns
-      // defined by opt.vectorColumns and, if so, use the fixed size list type
-      const vectorColumnOptions = opt.vectorColumns[colName];
-      if (vectorColumnOptions !== undefined) {
-        const firstNonNullValue = values.find((v) => v !== null);
-        if (Array.isArray(firstNonNullValue)) {
-          type = newVectorType(
-            firstNonNullValue.length,
-            vectorColumnOptions.type,
-          );
+    const valuesPath = [...path, field.name];
+    const values = data.map((datum) => {
+      let current: unknown = datum;
+      for (const key of valuesPath) {
+        if (isObject(current) && Object.hasOwn(current, key)) {
+          current = current[key];
        } else {
-          throw new Error(
-            `Column ${colName} is expected to be a vector column but first non-null value is not an array.  Could not determine size of vector column`,
-          );
+          return null;
        }
      }
+      return current;
+    });
+    return makeVector(values, field.type);
  }
-
-    try {
-      // Convert an Array of JS values to an arrow vector
-      columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
-    } catch (error: unknown) {
-      // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-      throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
-    }
-  }
-
-  if (opt.schema != null) {
-    // `new ArrowTable(columns)` infers a schema which may sometimes have
-    // incorrect nullability (it assumes nullable=true always)
-    //
-    // `new ArrowTable(schema, columns)` will also fail because it will create a
-    // batch with an inferred schema and then complain that the batch schema
-    // does not match the provided schema.
-    //
-    // To work around this we first create a table with the wrong schema and
-    // then patch the schema of the batches so we can use
-    // `new ArrowTable(schema, batches)` which does not do any schema inference
-    const firstTable = new ArrowTable(columns);
-    const batchesFixed = firstTable.batches.map(
-      (batch) => new RecordBatch(opt.schema as Schema, batch.data),
-    );
-    let schema: Schema;
-    if (metadata !== undefined) {
-      let schemaMetadata = opt.schema.metadata;
-      if (schemaMetadata.size === 0) {
-        schemaMetadata = metadata;
-      } else {
-        for (const [key, entry] of schemaMetadata.entries()) {
-          schemaMetadata.set(key, entry);
-        }
-      }
-
-      schema = new Schema(opt.schema.fields as Field[], schemaMetadata);
-    } else {
-      schema = opt.schema as Schema;
-    }
-    return new ArrowTable(schema, batchesFixed);
-  }
-  const tbl = new ArrowTable(columns);
-  if (metadata !== undefined) {
-    // biome-ignore lint/suspicious/noExplicitAny: <explanation>
-    (<any>tbl.schema).metadata = metadata;
-  }
-  return tbl;
 }

 /**
@@ -492,6 +689,31 @@ function makeVector(
 ): Vector<any> {
  if (type !== undefined) {
    // No need for inference, let Arrow create it
+    if (type instanceof Int) {
+      if (DataType.isInt(type) && type.bitWidth === 64) {
+        // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
+        values = values.map((v) => {
+          if (v === null) {
+            return v;
+          } else if (typeof v === "bigint") {
+            return v;
+          } else if (typeof v === "number") {
+            return BigInt(v);
+          } else {
+            return v;
+          }
+        });
+      } else {
+        // Similarly, bigint isn't supported for 16 or 32-bit ints.
+        values = values.map((v) => {
+          if (typeof v == "bigint") {
+            return Number(v);
+          } else {
+            return v;
+          }
+        });
+      }
+    }
    return vectorFromArray(values, type);
  }
  if (values.length === 0) {
@@ -903,7 +1125,7 @@ function validateSchemaEmbeddings(
  schema: Schema,
  data: Array<Record<string, unknown>>,
  embeddings: EmbeddingFunctionConfig | undefined,
-) {
+): Schema {
  const fields = [];
  const missingEmbeddingFields = [];

--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -1,10 +1,23 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-import { Data, Schema, SchemaLike, TableLike } from "./arrow";
-import { fromTableToBuffer, makeEmptyTable } from "./arrow";
+import {
+  Data,
+  Schema,
+  SchemaLike,
+  TableLike,
+  fromTableToStreamBuffer,
+  isArrowTable,
+  makeArrowTable,
+} from "./arrow";
+import {
+  Table as ArrowTable,
+  fromTableToBuffer,
+  makeEmptyTable,
+} from "./arrow";
 import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
 import { Connection as LanceDbConnection } from "./native";
+import { sanitizeTable } from "./sanitize";
 import { LocalTable, Table } from "./table";

 export interface CreateTableOptions {
@@ -39,6 +52,8 @@ export interface CreateTableOptions {
   *
   * The default is `stable`.
   * Set to "legacy" to use the old format.
+   *
+   * @deprecated Pass `new_table_data_storage_version` to storageOptions instead.
   */
  dataStorageVersion?: string;

@@ -48,17 +63,11 @@ export interface CreateTableOptions {
   * turning this on will make the dataset unreadable for older versions
   * of LanceDB (prior to 0.10.0). To migrate an existing dataset, instead
   * use the {@link LocalTable#migrateManifestPathsV2} method.
+   *
+   * @deprecated Pass `new_table_enable_v2_manifest_paths` to storageOptions instead.
   */
  enableV2ManifestPaths?: boolean;

-  /**
-   * If true then data files will be written with the legacy format
-   *
-   * The default is false.
-   *
-   * Deprecated. Use data storage version instead.
-   */
-  useLegacyFormat?: boolean;
  schema?: SchemaLike;
  embeddingFunction?: EmbeddingFunctionConfig;
 }
@@ -116,6 +125,7 @@ export interface TableNamesOptions {
 *
 * Any created tables are independent and will continue to work even if
 * the underlying connection has been closed.
+ * @hideconstructor
 */
 export abstract class Connection {
  [Symbol.for("nodejs.util.inspect.custom")](): string {
@@ -201,11 +211,18 @@ export abstract class Connection {
   * @param {string} name The name of the table to drop.
   */
  abstract dropTable(name: string): Promise<void>;
+
+  /**
+   * Drop all tables in the database.
+   */
+  abstract dropAllTables(): Promise<void>;
 }

+/** @hideconstructor */
 export class LocalConnection extends Connection {
  readonly inner: LanceDbConnection;

+  /** @hidden */
  constructor(inner: LanceDbConnection) {
    super();
    this.inner = inner;
@@ -240,6 +257,28 @@ export class LocalConnection extends Connection {
    return new LocalTable(innerTable);
  }

+  private getStorageOptions(
+    options?: Partial<CreateTableOptions>,
+  ): Record<string, string> | undefined {
+    if (options?.dataStorageVersion !== undefined) {
+      if (options.storageOptions === undefined) {
+        options.storageOptions = {};
+      }
+      options.storageOptions["newTableDataStorageVersion"] =
+        options.dataStorageVersion;
+    }
+
+    if (options?.enableV2ManifestPaths !== undefined) {
+      if (options.storageOptions === undefined) {
+        options.storageOptions = {};
+      }
+      options.storageOptions["newTableEnableV2ManifestPaths"] =
+        options.enableV2ManifestPaths ? "true" : "false";
+    }
+
+    return cleanseStorageOptions(options?.storageOptions);
+  }
+
  async createTable(
    nameOrOptions:
      | string
@@ -255,21 +294,15 @@ export class LocalConnection extends Connection {
    if (data === undefined) {
      throw new Error("data is required");
    }
-    const { buf, mode } = await Table.parseTableData(data, options);
-    let dataStorageVersion = "stable";
-    if (options?.dataStorageVersion !== undefined) {
-      dataStorageVersion = options.dataStorageVersion;
-    } else if (options?.useLegacyFormat !== undefined) {
-      dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
-    }
+    const { buf, mode } = await parseTableData(data, options);
+
+    const storageOptions = this.getStorageOptions(options);

    const innerTable = await this.inner.createTable(
      nameOrOptions,
      buf,
      mode,
-      cleanseStorageOptions(options?.storageOptions),
-      dataStorageVersion,
-      options?.enableV2ManifestPaths,
+      storageOptions,
    );

    return new LocalTable(innerTable);
@@ -293,22 +326,14 @@ export class LocalConnection extends Connection {
      metadata = registry.getTableMetadata([embeddingFunction]);
    }

-    let dataStorageVersion = "stable";
-    if (options?.dataStorageVersion !== undefined) {
-      dataStorageVersion = options.dataStorageVersion;
-    } else if (options?.useLegacyFormat !== undefined) {
-      dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
-    }
-
+    const storageOptions = this.getStorageOptions(options);
    const table = makeEmptyTable(schema, metadata);
    const buf = await fromTableToBuffer(table);
    const innerTable = await this.inner.createEmptyTable(
      name,
      buf,
      mode,
-      cleanseStorageOptions(options?.storageOptions),
-      dataStorageVersion,
-      options?.enableV2ManifestPaths,
+      storageOptions,
    );
    return new LocalTable(innerTable);
  }
@@ -316,6 +341,10 @@ export class LocalConnection extends Connection {
  async dropTable(name: string): Promise<void> {
    return this.inner.dropTable(name);
  }
+
+  async dropAllTables(): Promise<void> {
+    return this.inner.dropAllTables();
+  }
 }

 /**
@@ -357,3 +386,38 @@ function camelToSnakeCase(camel: string): string {
  }
  return result;
 }
+
+async function parseTableData(
+  data: Record<string, unknown>[] | TableLike,
+  options?: Partial<CreateTableOptions>,
+  streaming = false,
+) {
+  let mode: string = options?.mode ?? "create";
+  const existOk = options?.existOk ?? false;
+
+  if (mode === "create" && existOk) {
+    mode = "exist_ok";
+  }
+
+  let table: ArrowTable;
+  if (isArrowTable(data)) {
+    table = sanitizeTable(data);
+  } else {
+    table = makeArrowTable(data as Record<string, unknown>[], options);
+  }
+  if (streaming) {
+    const buf = await fromTableToStreamBuffer(
+      table,
+      options?.embeddingFunction,
+      options?.schema,
+    );
+    return { buf, mode };
+  } else {
+    const buf = await fromTableToBuffer(
+      table,
+      options?.embeddingFunction,
+      options?.schema,
+    );
+    return { buf, mode };
+  }
+}
--- a/nodejs/lancedb/embedding/embedding_function.ts
+++ b/nodejs/lancedb/embedding/embedding_function.ts
@@ -15,6 +15,7 @@ import {
  newVectorType,
 } from "../arrow";
 import { sanitizeType } from "../sanitize";
+import { getRegistry } from "./registry";

 /**
 * Options for a given embedding function
@@ -32,6 +33,22 @@ export interface EmbeddingFunctionConstructor<

 /**
 * An embedding function that automatically creates vector representation for a given column.
+ *
+ * It's important subclasses pass the **original** options to the super constructor
+ * and then pass those options to `resolveVariables` to resolve any variables before
+ * using them.
+ *
+ * @example
+ * ```ts
+ * class MyEmbeddingFunction extends EmbeddingFunction {
+ *   constructor(options: {model: string, timeout: number}) {
+ *     super(optionsRaw);
+ *     const options = this.resolveVariables(optionsRaw);
+ *     this.model = options.model;
+ *     this.timeout = options.timeout;
+ *   }
+ * }
+ * ```
 */
 export abstract class EmbeddingFunction<
  // biome-ignore lint/suspicious/noExplicitAny: we don't know what the implementor will do
@@ -44,33 +61,74 @@ export abstract class EmbeddingFunction<
   */
  // biome-ignore lint/style/useNamingConvention: we want to keep the name as it is
  readonly TOptions!: M;
-  /**
-   * Convert the embedding function to a JSON object
-   * It is used to serialize the embedding function to the schema
-   * It's important that any object returned by this method contains all the necessary
-   * information to recreate the embedding function
-   *
-   * It should return the same object that was passed to the constructor
-   * If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
-   *
-   * @example
-   * ```ts
-   * class MyEmbeddingFunction extends EmbeddingFunction {
-   *   constructor(options: {model: string, timeout: number}) {
-   *     super();
-   *     this.model = options.model;
-   *     this.timeout = options.timeout;
-   *   }
-   *   toJSON() {
-   *     return {
-   *       model: this.model,
-   *       timeout: this.timeout,
-   *     };
-   * }
-   * ```
-   */
-  abstract toJSON(): Partial<M>;

+  #config: Partial<M>;
+
+  /**
+   * Get the original arguments to the constructor, to serialize them so they
+   * can be used to recreate the embedding function later.
+   */
+  // biome-ignore lint/suspicious/noExplicitAny :
+  toJSON(): Record<string, any> {
+    return JSON.parse(JSON.stringify(this.#config));
+  }
+
+  constructor() {
+    this.#config = {};
+  }
+
+  /**
+   * Provide a list of keys in the function options that should be treated as
+   * sensitive. If users pass raw values for these keys, they will be rejected.
+   */
+  protected getSensitiveKeys(): string[] {
+    return [];
+  }
+
+  /**
+   * Apply variables to the config.
+   */
+  protected resolveVariables(config: Partial<M>): Partial<M> {
+    this.#config = config;
+    const registry = getRegistry();
+    const newConfig = { ...config };
+    for (const [key_, value] of Object.entries(newConfig)) {
+      if (
+        this.getSensitiveKeys().includes(key_) &&
+        !value.startsWith("$var:")
+      ) {
+        throw new Error(
+          `The key "${key_}" is sensitive and cannot be set directly. Please use the $var: syntax to set it.`,
+        );
+      }
+      // Makes TS happy (https://stackoverflow.com/a/78391854)
+      const key = key_ as keyof M;
+      if (typeof value === "string" && value.startsWith("$var:")) {
+        const [name, defaultValue] = value.slice(5).split(":", 2);
+        const variableValue = registry.getVar(name);
+        if (!variableValue) {
+          if (defaultValue) {
+            // biome-ignore lint/suspicious/noExplicitAny:
+            newConfig[key] = defaultValue as any;
+          } else {
+            throw new Error(`Variable "${name}" not found`);
+          }
+        } else {
+          // biome-ignore lint/suspicious/noExplicitAny:
+          newConfig[key] = variableValue as any;
+        }
+      }
+    }
+    return newConfig;
+  }
+
+  /**
+   * Optionally load any resources needed for the embedding function.
+   *
+   * This method is called after the embedding function has been initialized
+   * but before any embeddings are computed. It is useful for loading local models
+   * or other resources that are needed for the embedding function to work.
+   */
  async init?(): Promise<void>;

  /**
@@ -78,7 +136,7 @@ export abstract class EmbeddingFunction<
   *
   * @param optionsOrDatatype - The options for the field or the datatype
   *
-   * @see {@link lancedb.LanceSchema}
+   * @see {@link LanceSchema}
   */
  sourceField(
    optionsOrDatatype: Partial<FieldOptions> | DataType,
@@ -100,9 +158,9 @@ export abstract class EmbeddingFunction<
  /**
   * vectorField is used in combination with `LanceSchema` to provide a declarative data model
   *
-   * @param options - The options for the field
+   * @param optionsOrDatatype - The options for the field
   *
-   * @see {@link lancedb.LanceSchema}
+   * @see {@link LanceSchema}
   */
  vectorField(
    optionsOrDatatype?: Partial<FieldOptions> | DataType,
--- a/nodejs/lancedb/embedding/index.ts
+++ b/nodejs/lancedb/embedding/index.ts
@@ -6,7 +6,13 @@ import { sanitizeType } from "../sanitize";
 import { EmbeddingFunction } from "./embedding_function";
 import { EmbeddingFunctionConfig, getRegistry } from "./registry";

-export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";
+export {
+  FieldOptions,
+  EmbeddingFunction,
+  TextEmbeddingFunction,
+  FunctionOptions,
+  EmbeddingFunctionConstructor,
+} from "./embedding_function";

 export * from "./registry";

--- a/nodejs/lancedb/embedding/openai.ts
+++ b/nodejs/lancedb/embedding/openai.ts
@@ -21,11 +21,13 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
  #modelName: OpenAIOptions["model"];

  constructor(
-    options: Partial<OpenAIOptions> = {
+    optionsRaw: Partial<OpenAIOptions> = {
      model: "text-embedding-ada-002",
    },
  ) {
    super();
+    const options = this.resolveVariables(optionsRaw);
+
    const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
    if (!openAIKey) {
      throw new Error("OpenAI API key is required");
@@ -52,10 +54,8 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
    this.#modelName = modelName;
  }

-  toJSON() {
-    return {
-      model: this.#modelName,
-    };
+  protected getSensitiveKeys(): string[] {
+    return ["apiKey"];
  }

  ndims(): number {
--- a/nodejs/lancedb/embedding/registry.ts
+++ b/nodejs/lancedb/embedding/registry.ts
@@ -7,11 +7,11 @@ import {
 } from "./embedding_function";
 import "reflect-metadata";

-type CreateReturnType<T> = T extends { init: () => Promise<void> }
+export type CreateReturnType<T> = T extends { init: () => Promise<void> }
  ? Promise<T>
  : T;

-interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
+export interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
  create(options?: T["TOptions"]): CreateReturnType<T>;
 }

@@ -23,6 +23,7 @@ interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
 */
 export class EmbeddingFunctionRegistry {
  #functions = new Map<string, EmbeddingFunctionConstructor>();
+  #variables = new Map<string, string>();

  /**
   * Get the number of registered functions
@@ -33,8 +34,6 @@ export class EmbeddingFunctionRegistry {

  /**
   * Register an embedding function
-   * @param name The name of the function
-   * @param func The function to register
   * @throws Error if the function is already registered
   */
  register<
@@ -84,10 +83,7 @@ export class EmbeddingFunctionRegistry {
      };
    } else {
      // biome-ignore lint/suspicious/noExplicitAny: <explanation>
-      create = function (options?: any) {
-        const instance = new factory(options);
-        return instance;
-      };
+      create = (options?: any) => new factory(options);
    }

    return {
@@ -166,6 +162,37 @@ export class EmbeddingFunctionRegistry {

    return metadata;
  }
+
+  /**
+   * Set a variable. These can be accessed in the embedding function
+   * configuration using the syntax `$var:variable_name`. If they are not
+   * set, an error will be thrown letting you know which key is unset. If you
+   * want to supply a default value, you can add an additional part in the
+   * configuration like so: `$var:variable_name:default_value`. Default values
+   * can be used for runtime configurations that are not sensitive, such as
+   * whether to use a GPU for inference.
+   *
+   * The name must not contain colons. The default value can contain colons.
+   *
+   * @param name
+   * @param value
+   */
+  setVar(name: string, value: string): void {
+    if (name.includes(":")) {
+      throw new Error("Variable names cannot contain colons");
+    }
+    this.#variables.set(name, value);
+  }
+
+  /**
+   * Get a variable.
+   * @param name
+   * @returns
+   * @see {@link setVar}
+   */
+  getVar(name: string): string | undefined {
+    return this.#variables.get(name);
+  }
 }

 const _REGISTRY = new EmbeddingFunctionRegistry();
--- a/nodejs/lancedb/embedding/transformers.ts
+++ b/nodejs/lancedb/embedding/transformers.ts
@@ -44,11 +44,12 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
  #ndims?: number;

  constructor(
-    options: Partial<XenovaTransformerOptions> = {
+    optionsRaw: Partial<XenovaTransformerOptions> = {
      model: "Xenova/all-MiniLM-L6-v2",
    },
  ) {
    super();
+    const options = this.resolveVariables(optionsRaw);

    const modelName = options?.model ?? "Xenova/all-MiniLM-L6-v2";
    this.#tokenizerOptions = {
@@ -59,22 +60,6 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
    this.#ndims = options.ndims;
    this.#modelName = modelName;
  }
-  toJSON() {
-    // biome-ignore lint/suspicious/noExplicitAny: <explanation>
-    const obj: Record<string, any> = {
-      model: this.#modelName,
-    };
-    if (this.#ndims) {
-      obj["ndims"] = this.#ndims;
-    }
-    if (this.#tokenizerOptions) {
-      obj["tokenizerOptions"] = this.#tokenizerOptions;
-    }
-    if (this.#tokenizer) {
-      obj["tokenizer"] = this.#tokenizer.name;
-    }
-    return obj;
-  }

  async init() {
    let transformers;
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`!function(){var e,t,n;e="9627b71b382d201",t=function(){Reo.init({clientID:"9627b71b382d201"})},(n=document.createElement("script")).src="https://static.reo.dev/"+e+"/reo.js",n.defer=!0,n.onload=t,document.head.appendChild(n)}();`