Bump version: 0.19.1-beta.1 → 0.19.1-beta.2

fix: panic when field id doesn't equal to field index (#2116 )
Signed-off-by: BubbleCal <bubble-cal@outlook.com>
2025-12-23 05:19:58 +00:00 · 2025-02-13 04:39:19 +00:00 · 2025-02-13 12:38:35 +08:00 · 2025-02-12 14:08:52 -08:00 · 2025-02-11 22:05:54 +00:00 · 2025-02-11 20:56:22 +00:00
278 changed files with 13830 additions and 3633 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.15.0"
+current_version = "0.16.1-beta.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/license-header-check.yml
+++ b/.github/workflows/license-header-check.yml
@@ -0,0 +1,31 @@
+name: Check license headers
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    paths:
+      - rust/**
+      - python/**
+      - nodejs/**
+      - java/**
+      - .github/workflows/license-header-check.yml
+jobs:
+  check-licenses:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+      - name: Install license-header-checker
+        working-directory: /tmp
+        run: |
+            curl -s https://raw.githubusercontent.com/lluissm/license-header-checker/master/install.sh | bash
+            mv /tmp/bin/license-header-checker /usr/local/bin/
+      - name: Check license headers (rust)
+        run: license-header-checker -a -v ./rust/license_header.txt ./ rs && [[ -z `git status -s` ]]
+      - name: Check license headers (python)
+        run: license-header-checker -a -v ./python/license_header.txt python py && [[ -z `git status -s` ]]
+      - name: Check license headers (typescript)
+        run: license-header-checker -a -v ./nodejs/license_header.txt nodejs ts && [[ -z `git status -s` ]]
+      - name: Check license headers (java)
+        run: license-header-checker -a -v ./nodejs/license_header.txt java java && [[ -z `git status -s` ]]
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -106,6 +106,18 @@ jobs:
        python ci/mock_openai.py &
        cd nodejs/examples
        npm test
+    - name: Check docs
+      run: |
+        # We run this as part of the job because the binary needs to be built
+        # first to export the types of the native code.
+        set -e
+        npm ci
+        npm run docs
+        if ! git diff --exit-code; then
+          echo "Docs need to be updated"
+          echo "Run 'npm run docs', fix any warnings, and commit the changes."
+          exit 1
+        fi
  macos:
    timeout-minutes: 30
    runs-on: "macos-14"
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -22,6 +22,7 @@ env:
  # "1" means line tables only, which is useful for panic tracebacks.
  RUSTFLAGS: "-C debuginfo=1"
  RUST_BACKTRACE: "1"
+  CARGO_INCREMENTAL: 0

 jobs:
  lint:
@@ -51,6 +52,28 @@ jobs:
      - name: Run clippy
        run: cargo clippy --workspace --tests --all-features -- -D warnings

+  build-no-lock:
+    runs-on: ubuntu-24.04
+    timeout-minutes: 30
+    env:
+      # Need up-to-date compilers for kernels
+      CC: clang
+      CXX: clang++
+    steps:
+      - uses: actions/checkout@v4
+      # Remote cargo.lock to force a fresh build
+      - name: Remove Cargo.lock
+        run: rm -f Cargo.lock
+      - uses: rui314/setup-mold@v1
+      - uses: Swatinem/rust-cache@v2
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install -y protobuf-compiler libssl-dev
+      - name: Build all
+        run: |
+          cargo build --benches --all-features --tests
+
  linux:
    timeout-minutes: 30
    # To build all features, we need more disk space than is available
@@ -75,8 +98,11 @@ jobs:
          workspaces: rust
      - name: Install dependencies
        run: |
-          sudo apt update
+          # This shaves 2 minutes off this step in CI. This doesn't seem to be
+          # necessary in standard runners, but it is in the 4x runners.
+          sudo rm /var/lib/man-db/auto-update
          sudo apt install -y protobuf-compiler libssl-dev
+      - uses: rui314/setup-mold@v1
      - name: Make Swap
        run: |
          sudo fallocate -l 16G /swapfile
@@ -87,11 +113,11 @@ jobs:
        working-directory: .
        run: docker compose up --detach --wait
      - name: Build
-        run: cargo build --all-features
+        run: cargo build --all-features --tests --locked --examples
      - name: Run tests
-        run: cargo test --all-features
+        run: cargo test --all-features --locked
      - name: Run examples
-        run: cargo run --example simple
+        run: cargo run --example simple --locked

  macos:
    timeout-minutes: 30
@@ -115,11 +141,14 @@ jobs:
          workspaces: rust
      - name: Install dependencies
        run: brew install protobuf
-      - name: Build
-        run: cargo build --all-features
      - name: Run tests
-        # Run with everything except the integration tests.
-        run: cargo test --features remote,fp16kernels
+        run: |
+          # Don't run the s3 integration tests since docker isn't available
+          # on this image.
+          ALL_FEATURES=`cargo metadata --format-version=1 --no-deps \
+            | jq -r '.packages[] | .features | keys | .[]' \
+            | grep -v s3-test | sort | uniq | paste -s -d "," -`
+          cargo test --features $ALL_FEATURES --locked

  windows:
    runs-on: windows-2022
@@ -140,8 +169,38 @@ jobs:
      - name: Run tests
        run: |
          $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
-          cargo build
-          cargo test
+          cargo test --features remote --locked
+
+  windows-arm64-cross:
+    # We cross compile in Node releases, so we want to make sure
+    # this can run successfully.
+    runs-on: ubuntu-latest
+    container: alpine:edge
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install dependencies
+        run: |
+          set -e
+          apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
+
+          curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y
+          source $HOME/.cargo/env
+          rustup target add aarch64-pc-windows-msvc
+
+          mkdir -p sysroot
+          cd sysroot
+          sh ../ci/sysroot-aarch64-pc-windows-msvc.sh
+      - name: Check
+        env:
+          CC: clang
+          AR: llvm-ar
+          C_INCLUDE_PATH: /usr/aarch64-pc-windows-msvc/usr/include
+          CARGO_BUILD_TARGET: aarch64-pc-windows-msvc
+          RUSTFLAGS: -Ctarget-feature=+crt-static,+neon,+fp16,+fhm,+dotprod -Clinker=lld -Clink-arg=/LIBPATH:/usr/aarch64-pc-windows-msvc/usr/lib -Clink-arg=arm64rt.lib
+        run: |
+          source $HOME/.cargo/env
+          cargo check --features remote --locked

  windows-arm64:
    runs-on: windows-4x-arm
@@ -236,8 +295,7 @@ jobs:
      - name: Run tests
        run: |
          $env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
-          cargo build --target aarch64-pc-windows-msvc
-          cargo test --target aarch64-pc-windows-msvc
+          cargo test --target aarch64-pc-windows-msvc --features remote --locked

  msrv:
    # Check the minimum supported Rust version
--- a/.gitignore
+++ b/.gitignore
@@ -9,7 +9,6 @@ venv
 .vscode
 .zed
 rust/target
-rust/Cargo.lock

 site

@@ -42,5 +41,3 @@ dist
 target

 **/sccache.log
-
-Cargo.lock
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ repos:
    -   id: trailing-whitespace
 -   repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
-    rev: v0.2.2
+    rev: v0.8.4
    hooks:
    - id: ruff
 - repo: local
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,16 +21,16 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.23.0", "features" = [
+lance = { "version" = "=0.23.1", "features" = [
    "dynamodb",
-], git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
-lance-io = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
-lance-index = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
-lance-linalg = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
-lance-table = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
-lance-testing = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
-lance-datafusion = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
-lance-encoding = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.2" }
+], git = "https://github.com/lancedb/lance.git", tag = "v0.23.1-beta.2"}
+lance-io = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
+lance-index = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
+lance-linalg = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
+lance-table = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
+lance-testing = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
+lance-datafusion = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
+lance-encoding = {version = "=0.23.1", tag="v0.23.1-beta.2", git = "https://github.com/lancedb/lance.git"}
 # Note that this one does not include pyarrow
 arrow = { version = "53.2", optional = false }
 arrow-array = "53.2"
@@ -42,15 +42,19 @@ arrow-arith = "53.2"
 arrow-cast = "53.2"
 async-trait = "0"
 chrono = "0.4.35"
-datafusion-common = "44.0"
+datafusion = { version = "44.0", default-features = false }
+datafusion-catalog = "44.0"
+datafusion-common = { version = "44.0", default-features = false }
+datafusion-execution = "44.0"
+datafusion-expr = "44.0"
 datafusion-physical-plan = "44.0"
-env_logger = "0.10"
+env_logger = "0.11"
 half = { "version" = "=2.4.1", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
 log = "0.4"
-moka = { version = "0.11", features = ["future"] }
+moka = { version = "0.12", features = ["future"] }
 object_store = "0.10.2"
 pin-project = "1.0.7"
 snafu = "0.7.4"
@@ -59,3 +63,6 @@ num-traits = "0.2"
 rand = "0.8"
 regex = "1.10"
 lazy_static = "1"
+
+# Workaround for: https://github.com/eira-fransham/crunchy/issues/13
+crunchy = "=0.2.2"
--- a/docs/openapi.yml
+++ b/docs/openapi.yml
@@ -38,6 +38,13 @@ components:
      required: true
      schema:
        type: string
+    index_name:
+      name: index_name
+      in: path
+      description: name of the index
+      required: true
+      schema:
+        type: string
  responses:
    invalid_request:
      description: Invalid request
@@ -485,3 +492,22 @@ paths:
          $ref: "#/components/responses/unauthorized"
        "404":
          $ref: "#/components/responses/not_found"
+  /v1/table/{name}/index/{index_name}/drop/:
+    post:
+      description: Drop an index from the table
+      tags:
+        - Tables
+      summary: Drop an index from the table
+      operationId: dropIndex
+      parameters:
+        - $ref: "#/components/parameters/table_name"
+        - $ref: "#/components/parameters/index_name"
+      responses:
+        "200":
+          description: Index successfully dropped
+        "400":
+          $ref: "#/components/responses/invalid_request"
+        "401":
+          $ref: "#/components/responses/unauthorized"
+        "404":
+          $ref: "#/components/responses/not_found"
--- a/docs/src/ann_indexes.ts
+++ b/docs/src/ann_indexes.ts
@@ -3,6 +3,7 @@ import * as vectordb from "vectordb";
 // --8<-- [end:import]

 (async () => {
+  console.log("ann_indexes.ts: start");
  // --8<-- [start:ingest]
  const db = await vectordb.connect("data/sample-lancedb");

@@ -49,5 +50,5 @@ import * as vectordb from "vectordb";
    .execute();
  // --8<-- [end:search3]

-  console.log("Ann indexes: done");
+  console.log("ann_indexes.ts: done");
 })();
--- a/docs/src/basic.md
+++ b/docs/src/basic.md
@@ -133,13 +133,22 @@ recommend switching to stable releases.
 ## Connect to a database

 === "Python"
+    === "Sync API"

-    ```python
-    --8<-- "python/python/tests/docs/test_basic.py:imports"
-    --8<-- "python/python/tests/docs/test_basic.py:connect"
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:imports"

-    --8<-- "python/python/tests/docs/test_basic.py:connect_async"
-    ```
+        --8<-- "python/python/tests/docs/test_basic.py:set_uri"
+        --8<-- "python/python/tests/docs/test_basic.py:connect"
+        ```
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:imports"
+
+        --8<-- "python/python/tests/docs/test_basic.py:set_uri"
+        --8<-- "python/python/tests/docs/test_basic.py:connect_async"
+        ```

 === "Typescript[^1]"

@@ -183,21 +192,33 @@ table.

 === "Python"

-    ```python
-    --8<-- "python/python/tests/docs/test_basic.py:create_table"
-    --8<-- "python/python/tests/docs/test_basic.py:create_table_async"
-    ```
-
    If the table already exists, LanceDB will raise an error by default.
    If you want to overwrite the table, you can pass in `mode="overwrite"`
    to the `create_table` method.

-    You can also pass in a pandas DataFrame directly:
+    === "Sync API"

-    ```python
-    --8<-- "python/python/tests/docs/test_basic.py:create_table_pandas"
-    --8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas"
-    ```
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:create_table"
+        ```
+
+        You can also pass in a pandas DataFrame directly:
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:create_table_pandas"
+        ```
+
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:create_table_async"
+        ```
+
+        You can also pass in a pandas DataFrame directly:
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas"
+        ```

 === "Typescript[^1]"

@@ -247,10 +268,16 @@ similar to a `CREATE TABLE` statement in SQL.

 === "Python"

-      ```python
-      --8<-- "python/python/tests/docs/test_basic.py:create_empty_table"
-      --8<-- "python/python/tests/docs/test_basic.py:create_empty_table_async"
-      ```
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:create_empty_table"
+        ```
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:create_empty_table_async"
+        ```

    !!! note "You can define schema in Pydantic"
        LanceDB comes with Pydantic support, which allows you to define the schema of your data using Pydantic models. This makes it easy to work with LanceDB tables and data. Learn more about all supported types in [tables guide](./guides/tables.md).
@@ -281,10 +308,16 @@ Once created, you can open a table as follows:

 === "Python"

-    ```python
-    --8<-- "python/python/tests/docs/test_basic.py:open_table"
-    --8<-- "python/python/tests/docs/test_basic.py:open_table_async"
-    ```
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:open_table"
+        ```
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:open_table_async"
+        ```

 === "Typescript[^1]"
    === "@lancedb/lancedb"
@@ -310,10 +343,16 @@ If you forget the name of your table, you can always get a listing of all table

 === "Python"

-    ```python
-    --8<-- "python/python/tests/docs/test_basic.py:table_names"
-    --8<-- "python/python/tests/docs/test_basic.py:table_names_async"
-    ```
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:table_names"
+        ```
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:table_names_async"
+        ```

 === "Typescript[^1]"
    === "@lancedb/lancedb"
@@ -340,10 +379,16 @@ After a table has been created, you can always add more data to it as follows:

 === "Python"

-    ```python
-    --8<-- "python/python/tests/docs/test_basic.py:add_data"
-    --8<-- "python/python/tests/docs/test_basic.py:add_data_async"
-    ```
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:add_data"
+        ```
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:add_data_async"
+        ```

 === "Typescript[^1]"
    === "@lancedb/lancedb"
@@ -370,10 +415,16 @@ Once you've embedded the query, you can find its nearest neighbors as follows:

 === "Python"

-    ```python
-    --8<-- "python/python/tests/docs/test_basic.py:vector_search"
-    --8<-- "python/python/tests/docs/test_basic.py:vector_search_async"
-    ```
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:vector_search"
+        ```
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:vector_search_async"
+        ```

    This returns a pandas DataFrame with the results.

@@ -412,10 +463,16 @@ LanceDB allows you to create an ANN index on a table as follows:

 === "Python"

-    ```py
-    --8<-- "python/python/tests/docs/test_basic.py:create_index"
-    --8<-- "python/python/tests/docs/test_basic.py:create_index_async"
-    ```
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:create_index"
+        ```
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:create_index_async"
+        ```

 === "Typescript[^1]"
    === "@lancedb/lancedb"
@@ -451,10 +508,16 @@ This can delete any number of rows that match the filter.

 === "Python"

-    ```python
-    --8<-- "python/python/tests/docs/test_basic.py:delete_rows"
-    --8<-- "python/python/tests/docs/test_basic.py:delete_rows_async"
-    ```
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:delete_rows"
+        ```
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:delete_rows_async"
+        ```

 === "Typescript[^1]"

@@ -483,7 +546,10 @@ simple or complex as needed. To see what expressions are supported, see the

 === "Python"

-      Read more: [lancedb.table.Table.delete][]
+    === "Sync API"
+        Read more: [lancedb.table.Table.delete][]
+    === "Async API"
+        Read more: [lancedb.table.AsyncTable.delete][]

 === "Typescript[^1]"

@@ -505,10 +571,16 @@ Use the `drop_table()` method on the database to remove a table.

 === "Python"

-    ```python
-    --8<-- "python/python/tests/docs/test_basic.py:drop_table"
-    --8<-- "python/python/tests/docs/test_basic.py:drop_table_async"
-    ```
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:drop_table"
+        ```
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_basic.py:drop_table_async"
+        ```

    This permanently removes the table and is not recoverable, unlike deleting rows.
    By default, if the table does not exist an exception is raised. To suppress this,
@@ -543,10 +615,17 @@ You can use the embedding API when working with embedding models. It automatical

 === "Python"

-    ```python
-    --8<-- "python/python/tests/docs/test_embeddings_optional.py:imports"
-    --8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings"
-    ```
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_embeddings_optional.py:imports"
+
+        --8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings"
+        ```
+    === "Async API"
+
+        Coming soon to the async API.
+        https://github.com/lancedb/lancedb/issues/1938

 === "Typescript[^1]"

--- a/docs/src/basic_legacy.ts
+++ b/docs/src/basic_legacy.ts
@@ -107,7 +107,6 @@ const example = async () => {
  // --8<-- [start:search]
  const query = await tbl.search([100, 100]).limit(2).execute();
  // --8<-- [end:search]
-  console.log(query);

  // --8<-- [start:delete]
  await tbl.delete('item = "fizz"');
@@ -119,8 +118,9 @@ const example = async () => {
 };

 async function main() {
+  console.log("basic_legacy.ts: start");
  await example();
-  console.log("Basic example: done");
+  console.log("basic_legacy.ts: done");
 }

 main();
--- a/docs/src/guides/tables.md
+++ b/docs/src/guides/tables.md
@@ -518,7 +518,7 @@ After a table has been created, you can always add more data to it using the `ad
        --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_from_polars"
        ```
    === "Async API"
-    
+
        ```python
        --8<-- "python/python/tests/docs/test_guide_tables.py:add_table_async_from_polars"
        ```
@@ -601,6 +601,38 @@ After a table has been created, you can always add more data to it using the `ad
    )
    ```

+## Upserting into a table
+
+Upserting lets you insert new rows or update existing rows in a table. To upsert
+in LanceDB, use the merge insert API.
+
+=== "Python"
+
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic"
+        ```
+        **API Reference**: [lancedb.table.Table.merge_insert][]
+
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic_async"
+        ```
+        **API Reference**: [lancedb.table.AsyncTable.merge_insert][]
+
+=== "Typescript[^1]"
+
+    === "@lancedb/lancedb"
+
+        ```typescript
+        --8<-- "nodejs/examples/merge_insert.test.ts:upsert_basic"
+        ```
+        **API Reference**: [lancedb.Table.mergeInsert](../js/classes/Table.md/#mergeInsert)
+
+Read more in the guide on [merge insert](tables/merge_insert.md).
+
 ## Deleting from a table

 Use the `delete()` method on tables to delete rows from a table. To choose which rows to delete, provide a filter that matches on the metadata columns. This can delete any number of rows that match the filter.
@@ -630,7 +662,7 @@ Use the `delete()` method on tables to delete rows from a table. To choose which
        ```python
        --8<-- "python/python/tests/docs/test_guide_tables.py:delete_specific_row_async"
        ```
-    
+
    ### Delete from a list of values
    === "Sync API"

@@ -838,7 +870,7 @@ a table:

 You can add new columns to the table with the `add_columns` method. New columns
 are filled with values based on a SQL expression. For example, you can add a new
-column `y` to the table, fill it with the value of `x * 2` and set the expected 
+column `y` to the table, fill it with the value of `x * 2` and set the expected
 data type for it.

 === "Python"
--- a/docs/src/guides/tables/merge_insert.md
+++ b/docs/src/guides/tables/merge_insert.md
@@ -0,0 +1,135 @@
+The merge insert command is a flexible API that can be used to perform:
+
+1. Upsert
+2. Insert-if-not-exists
+3. Replace range
+
+It works by joining the input data with the target table on a key you provide.
+Often this key is a unique row id key. You can then specify what to do when
+there is a match and when there is not a match. For example, for upsert you want
+to update if the row has a match and insert if the row doesn't have a match.
+Whereas for insert-if-not-exists you only want to insert if the row doesn't have
+a match.
+
+You can also read more in the API reference:
+
+* Python
+    * Sync: [lancedb.table.Table.merge_insert][]
+    * Async: [lancedb.table.AsyncTable.merge_insert][]
+* Typescript: [lancedb.Table.mergeInsert](../../js/classes/Table.md/#mergeinsert)
+
+!!! tip "Use scalar indices to speed up merge insert"
+
+    The merge insert command needs to perform a join between the input data and the
+    target table on the `on` key you provide. This requires scanning that entire
+    column, which can be expensive for large tables. To speed up this operation,
+    you can create a scalar index on the `on` column, which will allow LanceDB to
+    find matches without having to scan the whole tables.
+
+    Read more about scalar indices in [Building a Scalar Index](../scalar_index.md)
+    guide.
+
+!!! info "Embedding Functions"
+
+    Like the create table and add APIs, the merge insert API will automatically
+    compute embeddings if the table has a embedding definition in its schema.
+    If the input data doesn't contain the source column, or the vector column
+    is already filled, then the embeddings won't be computed. See the
+    [Embedding Functions](../../embeddings/embedding_functions.md) guide for more
+    information.
+
+## Upsert
+
+Upsert updates rows if they exist and inserts them if they don't. To do this
+with merge insert, enable both `when_matched_update_all()` and
+`when_not_matched_insert_all()`.
+
+=== "Python"
+
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic"
+        ```
+
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic_async"
+        ```
+
+=== "Typescript"
+
+    === "@lancedb/lancedb"
+
+        ```typescript
+        --8<-- "nodejs/examples/merge_insert.test.ts:upsert_basic"
+        ```
+
+!!! note "Providing subsets of columns"
+
+    If a column is nullable, it can be omitted from input data and it will be
+    considered `null`. Columns can also be provided in any order.
+
+## Insert-if-not-exists
+
+To avoid inserting duplicate rows, you can use the insert-if-not-exists command.
+This will only insert rows that do not have a match in the target table. To do
+this with merge insert, enable just `when_not_matched_insert_all()`.
+
+
+=== "Python"
+
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:insert_if_not_exists"
+        ```
+
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:insert_if_not_exists_async"
+        ```
+
+=== "Typescript"
+
+    === "@lancedb/lancedb"
+
+        ```typescript
+        --8<-- "nodejs/examples/merge_insert.test.ts:insert_if_not_exists"
+        ```
+
+
+## Replace range
+
+You can also replace a range of rows in the target table with the input data.
+For example, if you have a table of document chunks, where each chunk has
+both a `doc_id` and a `chunk_id`, you can replace all chunks for a given
+`doc_id` with updated chunks. This can be tricky otherwise because if you
+try to use upsert when the new data has fewer chunks you will end up with
+extra chunks. To avoid this, add another clause to delete any chunks for
+the document that are not in the new data, with
+`when_not_matched_by_source_delete`.
+
+=== "Python"
+
+    === "Sync API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:replace_range"
+        ```
+
+    === "Async API"
+
+        ```python
+        --8<-- "python/python/tests/docs/test_merge_insert.py:replace_range_async"
+        ```
+
+=== "Typescript"
+
+    === "@lancedb/lancedb"
+
+        ```typescript
+        --8<-- "nodejs/examples/merge_insert.test.ts:replace_range"
+        ```
--- a/docs/src/js/README.md
+++ b/docs/src/js/README.md
@@ -36,7 +36,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
 console.log(results);
 ```

-The [quickstart](../basic.md) contains a more complete example.
+The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.

 ## Development

--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -23,18 +23,6 @@ be closed when they are garbage collected.
 Any created tables are independent and will continue to work even if
 the underlying connection has been closed.

-## Constructors
-
-### new Connection()
-
-```ts
-new Connection(): Connection
-```
-
-#### Returns
-
-[`Connection`](Connection.md)
-
 ## Methods

 ### close()
@@ -71,7 +59,7 @@ Creates a new empty Table
 * **name**: `string`
    The name of the table.

-* **schema**: `SchemaLike`
+* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
    The schema of the table

 * **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
@@ -117,7 +105,7 @@ Creates a new Table and initialize it with new data.
 * **name**: `string`
    The name of the table.

-* **data**: `TableLike` \| `Record`&lt;`string`, `unknown`&gt;[]
+* **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`&lt;`string`, `unknown`&gt;[]
    Non-empty Array of Records
    to be inserted into the table

@@ -143,6 +131,20 @@ Return a brief description of the connection

 ***

+### dropAllTables()
+
+```ts
+abstract dropAllTables(): Promise<void>
+```
+
+Drop all tables in the database.
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
+
+***
+
 ### dropTable()

 ```ts
@@ -189,7 +191,7 @@ Open a table in the database.
 * **name**: `string`
    The name of the table

-* **options?**: `Partial`&lt;`OpenTableOptions`&gt;
+* **options?**: `Partial`&lt;[`OpenTableOptions`](../interfaces/OpenTableOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/Index.md
+++ b/docs/src/js/classes/Index.md
@@ -72,11 +72,9 @@ The results of a full text search are ordered by relevance measured by BM25.

 You can combine filters with full text search.

-For now, the full text search index only supports English, and doesn't support phrase search.
-
 #### Parameters

-* **options?**: `Partial`&lt;`FtsOptions`&gt;
+* **options?**: `Partial`&lt;[`FtsOptions`](../interfaces/FtsOptions.md)&gt;

 #### Returns

@@ -98,7 +96,7 @@ the vectors.

 #### Parameters

-* **options?**: `Partial`&lt;`HnswPqOptions`&gt;
+* **options?**: `Partial`&lt;[`HnswPqOptions`](../interfaces/HnswPqOptions.md)&gt;

 #### Returns

@@ -120,7 +118,7 @@ the vectors.

 #### Parameters

-* **options?**: `Partial`&lt;`HnswSqOptions`&gt;
+* **options?**: `Partial`&lt;[`HnswSqOptions`](../interfaces/HnswSqOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/MergeInsertBuilder.md
+++ b/docs/src/js/classes/MergeInsertBuilder.md
@@ -0,0 +1,126 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / MergeInsertBuilder
+
+# Class: MergeInsertBuilder
+
+A builder used to create and run a merge insert operation
+
+## Constructors
+
+### new MergeInsertBuilder()
+
+```ts
+new MergeInsertBuilder(native, schema): MergeInsertBuilder
+```
+
+Construct a MergeInsertBuilder. __Internal use only.__
+
+#### Parameters
+
+* **native**: `NativeMergeInsertBuilder`
+
+* **schema**: `Schema`&lt;`any`&gt; \| `Promise`&lt;`Schema`&lt;`any`&gt;&gt;
+
+#### Returns
+
+[`MergeInsertBuilder`](MergeInsertBuilder.md)
+
+## Methods
+
+### execute()
+
+```ts
+execute(data): Promise<void>
+```
+
+Executes the merge insert operation
+
+Nothing is returned but the `Table` is updated
+
+#### Parameters
+
+* **data**: [`Data`](../type-aliases/Data.md)
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
+
+***
+
+### whenMatchedUpdateAll()
+
+```ts
+whenMatchedUpdateAll(options?): MergeInsertBuilder
+```
+
+Rows that exist in both the source table (new data) and
+the target table (old data) will be updated, replacing
+the old row with the corresponding matching row.
+
+If there are multiple matches then the behavior is undefined.
+Currently this causes multiple copies of the row to be created
+but that behavior is subject to change.
+
+An optional condition may be specified.  If it is, then only
+matched rows that satisfy the condtion will be updated.  Any
+rows that do not satisfy the condition will be left as they
+are.  Failing to satisfy the condition does not cause a
+"matched row" to become a "not matched" row.
+
+The condition should be an SQL string.  Use the prefix
+target. to refer to rows in the target table (old data)
+and the prefix source. to refer to rows in the source
+table (new data).
+
+For example, "target.last_update < source.last_update"
+
+#### Parameters
+
+* **options?**
+
+* **options.where?**: `string`
+
+#### Returns
+
+[`MergeInsertBuilder`](MergeInsertBuilder.md)
+
+***
+
+### whenNotMatchedBySourceDelete()
+
+```ts
+whenNotMatchedBySourceDelete(options?): MergeInsertBuilder
+```
+
+Rows that exist only in the target table (old data) will be
+deleted.  An optional condition can be provided to limit what
+data is deleted.
+
+#### Parameters
+
+* **options?**
+
+* **options.where?**: `string`
+    An optional condition to limit what data is deleted
+
+#### Returns
+
+[`MergeInsertBuilder`](MergeInsertBuilder.md)
+
+***
+
+### whenNotMatchedInsertAll()
+
+```ts
+whenNotMatchedInsertAll(): MergeInsertBuilder
+```
+
+Rows that exist only in the source table (new data) should
+be inserted into the target table.
+
+#### Returns
+
+[`MergeInsertBuilder`](MergeInsertBuilder.md)
--- a/docs/src/js/classes/Query.md
+++ b/docs/src/js/classes/Query.md
@@ -8,30 +8,14 @@

 A builder for LanceDB queries.

+## See
+
+[Table#query](Table.md#query), [Table#search](Table.md#search)
+
 ## Extends

 - [`QueryBase`](QueryBase.md)&lt;`NativeQuery`&gt;

-## Constructors
-
-### new Query()
-
-```ts
-new Query(tbl): Query
-```
-
-#### Parameters
-
-* **tbl**: `Table`
-
-#### Returns
-
-[`Query`](Query.md)
-
-#### Overrides
-
-[`QueryBase`](QueryBase.md).[`constructor`](QueryBase.md#constructors)
-
 ## Properties

 ### inner
@@ -46,42 +30,6 @@ protected inner: Query | Promise<Query>;

 ## Methods

-### \[asyncIterator\]()
-
-```ts
-asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
-```
-
-#### Returns
-
-`AsyncIterator`&lt;`RecordBatch`&lt;`any`&gt;, `any`, `undefined`&gt;
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`[asyncIterator]`](QueryBase.md#%5Basynciterator%5D)
-
-***
-
-### doCall()
-
-```ts
-protected doCall(fn): void
-```
-
-#### Parameters
-
-* **fn**
-
-#### Returns
-
-`void`
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`doCall`](QueryBase.md#docall)
-
-***
-
 ### execute()

 ```ts
@@ -92,7 +40,7 @@ Execute the query and return the results as an

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -161,7 +109,7 @@ fastSearch(): this
 Skip searching un-indexed data. This can make search faster, but will miss
 any data that is not yet indexed.

-Use lancedb.Table#optimize to index all un-indexed data.
+Use [Table#optimize](Table.md#optimize) to index all un-indexed data.

 #### Returns

@@ -189,7 +137,7 @@ A filter statement to be applied to this query.

 `this`

-#### Alias
+#### See

 where

@@ -213,7 +161,7 @@ fullTextSearch(query, options?): this

 * **query**: `string`

-* **options?**: `Partial`&lt;`FullTextSearchOptions`&gt;
+* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;

 #### Returns

@@ -250,26 +198,6 @@ called then every valid row from the table will be returned.

 ***

-### nativeExecute()
-
-```ts
-protected nativeExecute(options?): Promise<RecordBatchIterator>
-```
-
-#### Parameters
-
-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
-
-#### Returns
-
-`Promise`&lt;`RecordBatchIterator`&gt;
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`nativeExecute`](QueryBase.md#nativeexecute)
-
-***
-
 ### nearestTo()

 ```ts
@@ -294,7 +222,7 @@ If there is more than one vector column you must use

 #### Parameters

-* **vector**: `IntoVector`
+* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)

 #### Returns

@@ -427,7 +355,7 @@ Collect the results as an array of objects.

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -449,7 +377,7 @@ Collect the results as an Arrow

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/QueryBase.md
+++ b/docs/src/js/classes/QueryBase.md
@@ -8,6 +8,11 @@

 Common methods supported by all query types

+## See
+
+ - [Query](Query.md)
+ - [VectorQuery](VectorQuery.md)
+
 ## Extended by

 - [`Query`](Query.md)
@@ -21,22 +26,6 @@ Common methods supported by all query types

 - `AsyncIterable`&lt;`RecordBatch`&gt;

-## Constructors
-
-### new QueryBase()
-
-```ts
-protected new QueryBase<NativeQueryType>(inner): QueryBase<NativeQueryType>
-```
-
-#### Parameters
-
-* **inner**: `NativeQueryType` \| `Promise`&lt;`NativeQueryType`&gt;
-
-#### Returns
-
-[`QueryBase`](QueryBase.md)&lt;`NativeQueryType`&gt;
-
 ## Properties

 ### inner
@@ -47,38 +36,6 @@ protected inner: NativeQueryType | Promise<NativeQueryType>;

 ## Methods

-### \[asyncIterator\]()
-
-```ts
-asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
-```
-
-#### Returns
-
-`AsyncIterator`&lt;`RecordBatch`&lt;`any`&gt;, `any`, `undefined`&gt;
-
-#### Implementation of
-
-`AsyncIterable.[asyncIterator]`
-
-***
-
-### doCall()
-
-```ts
-protected doCall(fn): void
-```
-
-#### Parameters
-
-* **fn**
-
-#### Returns
-
-`void`
-
-***
-
 ### execute()

 ```ts
@@ -89,7 +46,7 @@ Execute the query and return the results as an

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -150,7 +107,7 @@ fastSearch(): this
 Skip searching un-indexed data. This can make search faster, but will miss
 any data that is not yet indexed.

-Use lancedb.Table#optimize to index all un-indexed data.
+Use [Table#optimize](Table.md#optimize) to index all un-indexed data.

 #### Returns

@@ -174,7 +131,7 @@ A filter statement to be applied to this query.

 `this`

-#### Alias
+#### See

 where

@@ -194,7 +151,7 @@ fullTextSearch(query, options?): this

 * **query**: `string`

-* **options?**: `Partial`&lt;`FullTextSearchOptions`&gt;
+* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;

 #### Returns

@@ -223,22 +180,6 @@ called then every valid row from the table will be returned.

 ***

-### nativeExecute()
-
-```ts
-protected nativeExecute(options?): Promise<RecordBatchIterator>
-```
-
-#### Parameters
-
-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
-
-#### Returns
-
-`Promise`&lt;`RecordBatchIterator`&gt;
-
-***
-
 ### offset()

 ```ts
@@ -314,7 +255,7 @@ Collect the results as an array of objects.

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -332,7 +273,7 @@ Collect the results as an Arrow

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -14,21 +14,13 @@ will be freed when the Table is garbage collected.  To eagerly free the cache yo
 can call the `close` method.  Once the Table is closed, it cannot be used for any
 further operations.

+Tables are created using the methods [Connection#createTable](Connection.md#createtable)
+and [Connection#createEmptyTable](Connection.md#createemptytable). Existing tables are opened
+using [Connection#openTable](Connection.md#opentable).
+
 Closing a table is optional.  It not closed, it will be closed when it is garbage
 collected.

-## Constructors
-
-### new Table()
-
-```ts
-new Table(): Table
-```
-
-#### Returns
-
-[`Table`](Table.md)
-
 ## Accessors

 ### name
@@ -216,6 +208,9 @@ Indices on vector columns will speed up vector searches.
 Indices on scalar columns will speed up filtering (in both
 vector and non-vector searches)

+We currently don't support custom named indexes.
+The index name will always be `${column}_idx`.
+
 #### Parameters

 * **column**: `string`
@@ -226,11 +221,6 @@ vector and non-vector searches)

 `Promise`&lt;`void`&gt;

-#### Note
-
-We currently don't support custom named indexes,
-The index name will always be `${column}_idx`
-
 #### Examples

 ```ts
@@ -329,18 +319,14 @@ Drop an index from the table.

 * **name**: `string`
    The name of the index.
+    This does not delete the index from disk, it just removes it from the table.
+    To delete the index, run [Table#optimize](Table.md#optimize) after dropping the index.
+    Use [Table.listIndices](Table.md#listindices) to find the names of the indices.

 #### Returns

 `Promise`&lt;`void`&gt;

-#### Note
-
-This does not delete the index from disk, it just removes it from the table.
-To delete the index, run [Table#optimize](Table.md#optimize) after dropping the index.
-
-Use [Table.listIndices](Table.md#listindices) to find the names of the indices.
-
 ***

 ### indexStats()
@@ -404,7 +390,7 @@ List all the versions of the table

 #### Returns

-`Promise`&lt;`Version`[]&gt;
+`Promise`&lt;[`Version`](../interfaces/Version.md)[]&gt;

 ***

@@ -420,7 +406,7 @@ abstract mergeInsert(on): MergeInsertBuilder

 #### Returns

-`MergeInsertBuilder`
+[`MergeInsertBuilder`](MergeInsertBuilder.md)

 ***

@@ -464,7 +450,7 @@ Modeled after ``VACUUM`` in PostgreSQL.

 #### Returns

-`Promise`&lt;`OptimizeStats`&gt;
+`Promise`&lt;[`OptimizeStats`](../interfaces/OptimizeStats.md)&gt;

 ***

@@ -581,7 +567,7 @@ Get the schema of the table.
 abstract search(
   query,
   queryType?,
-   ftsColumns?): VectorQuery | Query
+   ftsColumns?): Query | VectorQuery
 ```

 Create a search query to find the nearest neighbors
@@ -589,7 +575,7 @@ of the given query

 #### Parameters

-* **query**: `string` \| `IntoVector`
+* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md)
    the query, a vector or string

 * **queryType?**: `string`
@@ -603,7 +589,7 @@ of the given query

 #### Returns

-[`VectorQuery`](VectorQuery.md) \| [`Query`](Query.md)
+[`Query`](Query.md) \| [`VectorQuery`](VectorQuery.md)

 ***

@@ -722,7 +708,7 @@ by `query`.

 #### Parameters

-* **vector**: `IntoVector`
+* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)

 #### Returns

@@ -745,38 +731,3 @@ Retrieve the version of the table
 #### Returns

 `Promise`&lt;`number`&gt;
-
-***
-
-### parseTableData()
-
-```ts
-static parseTableData(
-   data,
-   options?,
-   streaming?): Promise<object>
-```
-
-#### Parameters
-
-* **data**: `TableLike` \| `Record`&lt;`string`, `unknown`&gt;[]
-
-* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
-
-* **streaming?**: `boolean` = `false`
-
-#### Returns
-
-`Promise`&lt;`object`&gt;
-
-##### buf
-
-```ts
-buf: Buffer;
-```
-
-##### mode
-
-```ts
-mode: string;
-```
--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -10,30 +10,14 @@ A builder used to construct a vector search

 This builder can be reused to execute the query many times.

+## See
+
+[Query#nearestTo](Query.md#nearestto)
+
 ## Extends

 - [`QueryBase`](QueryBase.md)&lt;`NativeVectorQuery`&gt;

-## Constructors
-
-### new VectorQuery()
-
-```ts
-new VectorQuery(inner): VectorQuery
-```
-
-#### Parameters
-
-* **inner**: `VectorQuery` \| `Promise`&lt;`VectorQuery`&gt;
-
-#### Returns
-
-[`VectorQuery`](VectorQuery.md)
-
-#### Overrides
-
-[`QueryBase`](QueryBase.md).[`constructor`](QueryBase.md#constructors)
-
 ## Properties

 ### inner
@@ -48,22 +32,6 @@ protected inner: VectorQuery | Promise<VectorQuery>;

 ## Methods

-### \[asyncIterator\]()
-
-```ts
-asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
-```
-
-#### Returns
-
-`AsyncIterator`&lt;`RecordBatch`&lt;`any`&gt;, `any`, `undefined`&gt;
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`[asyncIterator]`](QueryBase.md#%5Basynciterator%5D)
-
-***
-
 ### addQueryVector()

 ```ts
@@ -72,7 +40,7 @@ addQueryVector(vector): VectorQuery

 #### Parameters

-* **vector**: `IntoVector`
+* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)

 #### Returns

@@ -179,26 +147,6 @@ By default "l2" is used.

 ***

-### doCall()
-
-```ts
-protected doCall(fn): void
-```
-
-#### Parameters
-
-* **fn**
-
-#### Returns
-
-`void`
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`doCall`](QueryBase.md#docall)
-
-***
-
 ### ef()

 ```ts
@@ -233,7 +181,7 @@ Execute the query and return the results as an

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -302,7 +250,7 @@ fastSearch(): this
 Skip searching un-indexed data. This can make search faster, but will miss
 any data that is not yet indexed.

-Use lancedb.Table#optimize to index all un-indexed data.
+Use [Table#optimize](Table.md#optimize) to index all un-indexed data.

 #### Returns

@@ -330,7 +278,7 @@ A filter statement to be applied to this query.

 `this`

-#### Alias
+#### See

 where

@@ -354,7 +302,7 @@ fullTextSearch(query, options?): this

 * **query**: `string`

-* **options?**: `Partial`&lt;`FullTextSearchOptions`&gt;
+* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;

 #### Returns

@@ -391,26 +339,6 @@ called then every valid row from the table will be returned.

 ***

-### nativeExecute()
-
-```ts
-protected nativeExecute(options?): Promise<RecordBatchIterator>
-```
-
-#### Parameters
-
-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
-
-#### Returns
-
-`Promise`&lt;`RecordBatchIterator`&gt;
-
-#### Inherited from
-
-[`QueryBase`](QueryBase.md).[`nativeExecute`](QueryBase.md#nativeexecute)
-
-***
-
 ### nprobes()

 ```ts
@@ -625,7 +553,7 @@ Collect the results as an array of objects.

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

@@ -647,7 +575,7 @@ Collect the results as an Arrow

 #### Parameters

-* **options?**: `Partial`&lt;`QueryExecutionOptions`&gt;
+* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;

 #### Returns

--- a/docs/src/js/enumerations/WriteMode.md
+++ b/docs/src/js/enumerations/WriteMode.md
@@ -1,33 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / WriteMode
-
-# Enumeration: WriteMode
-
-Write mode for writing a table.
-
-## Enumeration Members
-
-### Append
-
-```ts
-Append: "Append";
-```
-
-***
-
-### Create
-
-```ts
-Create: "Create";
-```
-
-***
-
-### Overwrite
-
-```ts
-Overwrite: "Overwrite";
-```
--- a/docs/src/js/functions/connect.md
+++ b/docs/src/js/functions/connect.md
@@ -6,10 +6,10 @@

 # Function: connect()

-## connect(uri, opts)
+## connect(uri, options)

 ```ts
-function connect(uri, opts?): Promise<Connection>
+function connect(uri, options?): Promise<Connection>
 ```

 Connect to a LanceDB instance at the given URI.
@@ -26,7 +26,8 @@ Accepted formats:
    The uri of the database. If the database uri starts
    with `db://` then it connects to a remote database.

-* **opts?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
+* **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
+    The options to use when connecting to the database

 ### Returns

@@ -49,10 +50,10 @@ const conn = await connect(
 });
 ```

-## connect(opts)
+## connect(options)

 ```ts
-function connect(opts): Promise<Connection>
+function connect(options): Promise<Connection>
 ```

 Connect to a LanceDB instance at the given URI.
@@ -65,7 +66,8 @@ Accepted formats:

 ### Parameters

-* **opts**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt; & `object`
+* **options**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt; & `object`
+    The options to use when connecting to the database

 ### Returns

--- a/docs/src/js/functions/makeArrowTable.md
+++ b/docs/src/js/functions/makeArrowTable.md
@@ -22,8 +22,6 @@ when creating a table or adding data to it)
 This function converts an array of Record<String, any> (row-major JS objects)
 to an Arrow Table (a columnar structure)

-Note that it currently does not support nulls.
-
 If a schema is provided then it will be used to determine the resulting array
 types.  Fields will also be reordered to fit the order defined by the schema.

@@ -31,6 +29,9 @@ If a schema is not provided then the types will be inferred and the field order
 will be controlled by the order of properties in the first record.  If a type
 is inferred it will always be nullable.

+If not all fields are found in the data, then a subset of the schema will be
+returned.
+
 If the input is empty then a schema must be provided to create an empty table.

 When a schema is not specified then data types will be inferred.  The inference
@@ -38,6 +39,7 @@ rules are as follows:

 - boolean => Bool
 - number => Float64
+ - bigint => Int64
 - String => Utf8
 - Buffer => Binary
 - Record<String, any> => Struct
@@ -57,6 +59,7 @@ rules are as follows:

 ## Example

+```ts
 import { fromTableToBuffer, makeArrowTable } from "../arrow";
 import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";

@@ -78,42 +81,40 @@ The `vectorColumns` option can be used to support other vector column
 names and data types.

 ```ts
-
 const schema = new Schema([
-   new Field("a", new Float64()),
-   new Field("b", new Float64()),
-   new Field(
-     "vector",
-     new FixedSizeList(3, new Field("item", new Float32()))
-   ),
- ]);
- const table = makeArrowTable([
-   { a: 1, b: 2, vector: [1, 2, 3] },
-   { a: 4, b: 5, vector: [4, 5, 6] },
-   { a: 7, b: 8, vector: [7, 8, 9] },
- ]);
- assert.deepEqual(table.schema, schema);
+  new Field("a", new Float64()),
+  new Field("b", new Float64()),
+  new Field(
+    "vector",
+    new FixedSizeList(3, new Field("item", new Float32()))
+  ),
+]);
+const table = makeArrowTable([
+  { a: 1, b: 2, vector: [1, 2, 3] },
+  { a: 4, b: 5, vector: [4, 5, 6] },
+  { a: 7, b: 8, vector: [7, 8, 9] },
+]);
+assert.deepEqual(table.schema, schema);
 ```

 You can specify the vector column types and names using the options as well

-```typescript
-
+```ts
 const schema = new Schema([
-   new Field('a', new Float64()),
-   new Field('b', new Float64()),
-   new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
-   new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
- ]);
+  new Field('a', new Float64()),
+  new Field('b', new Float64()),
+  new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
+  new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
+]);
 const table = makeArrowTable([
-   { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
-   { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
-   { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
- ], {
-   vectorColumns: {
-     vec1: { type: new Float16() },
-     vec2: { type: new Float16() }
-   }
- }
+  { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
+  { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
+  { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
+], {
+  vectorColumns: {
+    vec1: { type: new Float16() },
+    vec2: { type: new Float16() }
+  }
+}
 assert.deepEqual(table.schema, schema)
 ```
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -9,15 +9,12 @@
 - [embedding](namespaces/embedding/README.md)
 - [rerankers](namespaces/rerankers/README.md)

-## Enumerations
-
- [WriteMode](enumerations/WriteMode.md)
-
 ## Classes

 - [Connection](classes/Connection.md)
 - [Index](classes/Index.md)
 - [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
+- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
 - [Query](classes/Query.md)
 - [QueryBase](classes/QueryBase.md)
 - [RecordBatchIterator](classes/RecordBatchIterator.md)
@@ -31,23 +28,39 @@
 - [AddDataOptions](interfaces/AddDataOptions.md)
 - [ClientConfig](interfaces/ClientConfig.md)
 - [ColumnAlteration](interfaces/ColumnAlteration.md)
+- [CompactionStats](interfaces/CompactionStats.md)
 - [ConnectionOptions](interfaces/ConnectionOptions.md)
 - [CreateTableOptions](interfaces/CreateTableOptions.md)
 - [ExecutableQuery](interfaces/ExecutableQuery.md)
+- [FtsOptions](interfaces/FtsOptions.md)
+- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
+- [HnswPqOptions](interfaces/HnswPqOptions.md)
+- [HnswSqOptions](interfaces/HnswSqOptions.md)
 - [IndexConfig](interfaces/IndexConfig.md)
 - [IndexOptions](interfaces/IndexOptions.md)
 - [IndexStatistics](interfaces/IndexStatistics.md)
 - [IvfPqOptions](interfaces/IvfPqOptions.md)
+- [OpenTableOptions](interfaces/OpenTableOptions.md)
 - [OptimizeOptions](interfaces/OptimizeOptions.md)
+- [OptimizeStats](interfaces/OptimizeStats.md)
+- [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
+- [RemovalStats](interfaces/RemovalStats.md)
 - [RetryConfig](interfaces/RetryConfig.md)
 - [TableNamesOptions](interfaces/TableNamesOptions.md)
 - [TimeoutConfig](interfaces/TimeoutConfig.md)
 - [UpdateOptions](interfaces/UpdateOptions.md)
- [WriteOptions](interfaces/WriteOptions.md)
+- [Version](interfaces/Version.md)

 ## Type Aliases

 - [Data](type-aliases/Data.md)
+- [DataLike](type-aliases/DataLike.md)
+- [FieldLike](type-aliases/FieldLike.md)
+- [IntoSql](type-aliases/IntoSql.md)
+- [IntoVector](type-aliases/IntoVector.md)
+- [RecordBatchLike](type-aliases/RecordBatchLike.md)
+- [SchemaLike](type-aliases/SchemaLike.md)
+- [TableLike](type-aliases/TableLike.md)

 ## Functions

--- a/docs/src/js/interfaces/ClientConfig.md
+++ b/docs/src/js/interfaces/ClientConfig.md
@@ -8,6 +8,14 @@

 ## Properties

+### extraHeaders?
+
+```ts
+optional extraHeaders: Record<string, string>;
+```
+
+***
+
 ### retryConfig?

 ```ts
--- a/docs/src/js/interfaces/CompactionStats.md
+++ b/docs/src/js/interfaces/CompactionStats.md
@@ -0,0 +1,49 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / CompactionStats
+
+# Interface: CompactionStats
+
+Statistics about a compaction operation.
+
+## Properties
+
+### filesAdded
+
+```ts
+filesAdded: number;
+```
+
+The number of new, compacted data files added
+
+***
+
+### filesRemoved
+
+```ts
+filesRemoved: number;
+```
+
+The number of data files removed
+
+***
+
+### fragmentsAdded
+
+```ts
+fragmentsAdded: number;
+```
+
+The number of new, compacted fragments added
+
+***
+
+### fragmentsRemoved
+
+```ts
+fragmentsRemoved: number;
+```
+
+The number of fragments removed
--- a/docs/src/js/interfaces/CreateTableOptions.md
+++ b/docs/src/js/interfaces/CreateTableOptions.md
@@ -8,7 +8,7 @@

 ## Properties

-### dataStorageVersion?
+### ~~dataStorageVersion?~~

 ```ts
 optional dataStorageVersion: string;
@@ -19,6 +19,10 @@ The version of the data storage format to use.
 The default is `stable`.
 Set to "legacy" to use the old format.

+#### Deprecated
+
+Pass `new_table_data_storage_version` to storageOptions instead.
+
 ***

 ### embeddingFunction?
@@ -29,7 +33,7 @@ optional embeddingFunction: EmbeddingFunctionConfig;

 ***

-### enableV2ManifestPaths?
+### ~~enableV2ManifestPaths?~~

 ```ts
 optional enableV2ManifestPaths: boolean;
@@ -41,6 +45,10 @@ turning this on will make the dataset unreadable for older versions
 of LanceDB (prior to 0.10.0). To migrate an existing dataset, instead
 use the LocalTable#migrateManifestPathsV2 method.

+#### Deprecated
+
+Pass `new_table_enable_v2_manifest_paths` to storageOptions instead.
+
 ***

 ### existOk
@@ -90,17 +98,3 @@ Options already set on the connection will be inherited by the table,
 but can be overridden here.

 The available options are described at https://lancedb.github.io/lancedb/guides/storage/
-
-***
-
-### useLegacyFormat?
-
-```ts
-optional useLegacyFormat: boolean;
-```
-
-If true then data files will be written with the legacy format
-
-The default is false.
-
-Deprecated. Use data storage version instead.
--- a/docs/src/js/interfaces/FtsOptions.md
+++ b/docs/src/js/interfaces/FtsOptions.md
@@ -0,0 +1,103 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / FtsOptions
+
+# Interface: FtsOptions
+
+Options to create a full text search index
+
+## Properties
+
+### asciiFolding?
+
+```ts
+optional asciiFolding: boolean;
+```
+
+whether to remove punctuation
+
+***
+
+### baseTokenizer?
+
+```ts
+optional baseTokenizer: "raw" | "simple" | "whitespace";
+```
+
+The tokenizer to use when building the index.
+The default is "simple".
+
+The following tokenizers are available:
+
+"simple" - Simple tokenizer. This tokenizer splits the text into tokens using whitespace and punctuation as a delimiter.
+
+"whitespace" - Whitespace tokenizer. This tokenizer splits the text into tokens using whitespace as a delimiter.
+
+"raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token.
+
+***
+
+### language?
+
+```ts
+optional language: string;
+```
+
+language for stemming and stop words
+this is only used when `stem` or `remove_stop_words` is true
+
+***
+
+### lowercase?
+
+```ts
+optional lowercase: boolean;
+```
+
+whether to lowercase tokens
+
+***
+
+### maxTokenLength?
+
+```ts
+optional maxTokenLength: number;
+```
+
+maximum token length
+tokens longer than this length will be ignored
+
+***
+
+### removeStopWords?
+
+```ts
+optional removeStopWords: boolean;
+```
+
+whether to remove stop words
+
+***
+
+### stem?
+
+```ts
+optional stem: boolean;
+```
+
+whether to stem tokens
+
+***
+
+### withPosition?
+
+```ts
+optional withPosition: boolean;
+```
+
+Whether to build the index with positions.
+True by default.
+If set to false, the index will not store the positions of the tokens in the text,
+which will make the index smaller and faster to build, but will not support phrase queries.
--- a/docs/src/js/interfaces/FullTextSearchOptions.md
+++ b/docs/src/js/interfaces/FullTextSearchOptions.md
@@ -0,0 +1,22 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / FullTextSearchOptions
+
+# Interface: FullTextSearchOptions
+
+Options that control the behavior of a full text search
+
+## Properties
+
+### columns?
+
+```ts
+optional columns: string | string[];
+```
+
+The columns to search
+
+If not specified, all indexed columns will be searched.
+For now, only one column can be searched.
--- a/docs/src/js/interfaces/HnswPqOptions.md
+++ b/docs/src/js/interfaces/HnswPqOptions.md
@@ -0,0 +1,149 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / HnswPqOptions
+
+# Interface: HnswPqOptions
+
+Options to create an `HNSW_PQ` index
+
+## Properties
+
+### distanceType?
+
+```ts
+optional distanceType: "l2" | "cosine" | "dot";
+```
+
+The distance metric used to train the index.
+
+Default value is "l2".
+
+The following distance types are available:
+
+"l2" - Euclidean distance. This is a very common distance metric that
+accounts for both magnitude and direction when determining the distance
+between vectors. L2 distance has a range of [0, ∞).
+
+"cosine" - Cosine distance.  Cosine distance is a distance metric
+calculated from the cosine similarity between two vectors. Cosine
+similarity is a measure of similarity between two non-zero vectors of an
+inner product space. It is defined to equal the cosine of the angle
+between them.  Unlike L2, the cosine distance is not affected by the
+magnitude of the vectors.  Cosine distance has a range of [0, 2].
+
+"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
+distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
+L2 norm is 1), then dot distance is equivalent to the cosine distance.
+
+***
+
+### efConstruction?
+
+```ts
+optional efConstruction: number;
+```
+
+The number of candidates to evaluate during the construction of the HNSW graph.
+
+The default value is 300.
+
+This value controls the tradeoff between build speed and accuracy.
+The higher the value the more accurate the build but the slower it will be.
+150 to 300 is the typical range. 100 is a minimum for good quality search
+results. In most cases, there is no benefit to setting this higher than 500.
+This value should be set to a value that is not less than `ef` in the search phase.
+
+***
+
+### m?
+
+```ts
+optional m: number;
+```
+
+The number of neighbors to select for each vector in the HNSW graph.
+
+The default value is 20.
+
+This value controls the tradeoff between search speed and accuracy.
+The higher the value the more accurate the search but the slower it will be.
+
+***
+
+### maxIterations?
+
+```ts
+optional maxIterations: number;
+```
+
+Max iterations to train kmeans.
+
+The default value is 50.
+
+When training an IVF index we use kmeans to calculate the partitions.  This parameter
+controls how many iterations of kmeans to run.
+
+Increasing this might improve the quality of the index but in most cases the parameter
+is unused because kmeans will converge with fewer iterations.  The parameter is only
+used in cases where kmeans does not appear to converge.  In those cases it is unlikely
+that setting this larger will lead to the index converging anyways.
+
+***
+
+### numPartitions?
+
+```ts
+optional numPartitions: number;
+```
+
+The number of IVF partitions to create.
+
+For HNSW, we recommend a small number of partitions. Setting this to 1 works
+well for most tables. For very large tables, training just one HNSW graph
+will require too much memory. Each partition becomes its own HNSW graph, so
+setting this value higher reduces the peak memory use of training.
+
+***
+
+### numSubVectors?
+
+```ts
+optional numSubVectors: number;
+```
+
+Number of sub-vectors of PQ.
+
+This value controls how much the vector is compressed during the quantization step.
+The more sub vectors there are the less the vector is compressed.  The default is
+the dimension of the vector divided by 16.  If the dimension is not evenly divisible
+by 16 we use the dimension divded by 8.
+
+The above two cases are highly preferred.  Having 8 or 16 values per subvector allows
+us to use efficient SIMD instructions.
+
+If the dimension is not visible by 8 then we use 1 subvector.  This is not ideal and
+will likely result in poor performance.
+
+***
+
+### sampleRate?
+
+```ts
+optional sampleRate: number;
+```
+
+The rate used to calculate the number of training vectors for kmeans.
+
+Default value is 256.
+
+When an IVF index is trained, we need to calculate partitions.  These are groups
+of vectors that are similar to each other.  To do this we use an algorithm called kmeans.
+
+Running kmeans on a large dataset can be slow.  To speed this up we run kmeans on a
+random sample of the data.  This parameter controls the size of the sample.  The total
+number of vectors used to train the index is `sample_rate * num_partitions`.
+
+Increasing this value might improve the quality of the index but in most cases the
+default should be sufficient.
--- a/docs/src/js/interfaces/HnswSqOptions.md
+++ b/docs/src/js/interfaces/HnswSqOptions.md
@@ -0,0 +1,128 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / HnswSqOptions
+
+# Interface: HnswSqOptions
+
+Options to create an `HNSW_SQ` index
+
+## Properties
+
+### distanceType?
+
+```ts
+optional distanceType: "l2" | "cosine" | "dot";
+```
+
+The distance metric used to train the index.
+
+Default value is "l2".
+
+The following distance types are available:
+
+"l2" - Euclidean distance. This is a very common distance metric that
+accounts for both magnitude and direction when determining the distance
+between vectors. L2 distance has a range of [0, ∞).
+
+"cosine" - Cosine distance.  Cosine distance is a distance metric
+calculated from the cosine similarity between two vectors. Cosine
+similarity is a measure of similarity between two non-zero vectors of an
+inner product space. It is defined to equal the cosine of the angle
+between them.  Unlike L2, the cosine distance is not affected by the
+magnitude of the vectors.  Cosine distance has a range of [0, 2].
+
+"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
+distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
+L2 norm is 1), then dot distance is equivalent to the cosine distance.
+
+***
+
+### efConstruction?
+
+```ts
+optional efConstruction: number;
+```
+
+The number of candidates to evaluate during the construction of the HNSW graph.
+
+The default value is 300.
+
+This value controls the tradeoff between build speed and accuracy.
+The higher the value the more accurate the build but the slower it will be.
+150 to 300 is the typical range. 100 is a minimum for good quality search
+results. In most cases, there is no benefit to setting this higher than 500.
+This value should be set to a value that is not less than `ef` in the search phase.
+
+***
+
+### m?
+
+```ts
+optional m: number;
+```
+
+The number of neighbors to select for each vector in the HNSW graph.
+
+The default value is 20.
+
+This value controls the tradeoff between search speed and accuracy.
+The higher the value the more accurate the search but the slower it will be.
+
+***
+
+### maxIterations?
+
+```ts
+optional maxIterations: number;
+```
+
+Max iterations to train kmeans.
+
+The default value is 50.
+
+When training an IVF index we use kmeans to calculate the partitions.  This parameter
+controls how many iterations of kmeans to run.
+
+Increasing this might improve the quality of the index but in most cases the parameter
+is unused because kmeans will converge with fewer iterations.  The parameter is only
+used in cases where kmeans does not appear to converge.  In those cases it is unlikely
+that setting this larger will lead to the index converging anyways.
+
+***
+
+### numPartitions?
+
+```ts
+optional numPartitions: number;
+```
+
+The number of IVF partitions to create.
+
+For HNSW, we recommend a small number of partitions. Setting this to 1 works
+well for most tables. For very large tables, training just one HNSW graph
+will require too much memory. Each partition becomes its own HNSW graph, so
+setting this value higher reduces the peak memory use of training.
+
+***
+
+### sampleRate?
+
+```ts
+optional sampleRate: number;
+```
+
+The rate used to calculate the number of training vectors for kmeans.
+
+Default value is 256.
+
+When an IVF index is trained, we need to calculate partitions.  These are groups
+of vectors that are similar to each other.  To do this we use an algorithm called kmeans.
+
+Running kmeans on a large dataset can be slow.  To speed this up we run kmeans on a
+random sample of the data.  This parameter controls the size of the sample.  The total
+number of vectors used to train the index is `sample_rate * num_partitions`.
+
+Increasing this value might improve the quality of the index but in most cases the
+default should be sufficient.
--- a/docs/src/js/interfaces/OpenTableOptions.md
+++ b/docs/src/js/interfaces/OpenTableOptions.md
@@ -0,0 +1,40 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / OpenTableOptions
+
+# Interface: OpenTableOptions
+
+## Properties
+
+### indexCacheSize?
+
+```ts
+optional indexCacheSize: number;
+```
+
+Set the size of the index cache, specified as a number of entries
+
+The exact meaning of an "entry" will depend on the type of index:
+- IVF: there is one entry for each IVF partition
+- BTREE: there is one entry for the entire index
+
+This cache applies to the entire opened table, across all indices.
+Setting this value higher will increase performance on larger datasets
+at the expense of more RAM
+
+***
+
+### storageOptions?
+
+```ts
+optional storageOptions: Record<string, string>;
+```
+
+Configuration for object storage.
+
+Options already set on the connection will be inherited by the table,
+but can be overridden here.
+
+The available options are described at https://lancedb.github.io/lancedb/guides/storage/
--- a/docs/src/js/interfaces/OptimizeStats.md
+++ b/docs/src/js/interfaces/OptimizeStats.md
@@ -0,0 +1,29 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / OptimizeStats
+
+# Interface: OptimizeStats
+
+Statistics about an optimize operation
+
+## Properties
+
+### compaction
+
+```ts
+compaction: CompactionStats;
+```
+
+Statistics about the compaction operation
+
+***
+
+### prune
+
+```ts
+prune: RemovalStats;
+```
+
+Statistics about the removal operation
--- a/docs/src/js/interfaces/QueryExecutionOptions.md
+++ b/docs/src/js/interfaces/QueryExecutionOptions.md
@@ -0,0 +1,22 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / QueryExecutionOptions
+
+# Interface: QueryExecutionOptions
+
+Options that control the behavior of a particular query execution
+
+## Properties
+
+### maxBatchLength?
+
+```ts
+optional maxBatchLength: number;
+```
+
+The maximum number of rows to return in a single batch
+
+Batches may have fewer rows if the underlying data is stored
+in smaller chunks.
--- a/docs/src/js/interfaces/RemovalStats.md
+++ b/docs/src/js/interfaces/RemovalStats.md
@@ -0,0 +1,29 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / RemovalStats
+
+# Interface: RemovalStats
+
+Statistics about a cleanup operation
+
+## Properties
+
+### bytesRemoved
+
+```ts
+bytesRemoved: number;
+```
+
+The number of bytes removed
+
+***
+
+### oldVersionsRemoved
+
+```ts
+oldVersionsRemoved: number;
+```
+
+The number of old versions removed
--- a/docs/src/js/interfaces/Version.md
+++ b/docs/src/js/interfaces/Version.md
@@ -0,0 +1,31 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / Version
+
+# Interface: Version
+
+## Properties
+
+### metadata
+
+```ts
+metadata: Record<string, string>;
+```
+
+***
+
+### timestamp
+
+```ts
+timestamp: Date;
+```
+
+***
+
+### version
+
+```ts
+version: number;
+```
--- a/docs/src/js/interfaces/WriteOptions.md
+++ b/docs/src/js/interfaces/WriteOptions.md
@@ -1,19 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / WriteOptions
-
-# Interface: WriteOptions
-
-Write options when creating a Table.
-
-## Properties
-
-### mode?
-
-```ts
-optional mode: WriteMode;
-```
-
-Write mode for writing to a table.
--- a/docs/src/js/namespaces/embedding/README.md
+++ b/docs/src/js/namespaces/embedding/README.md
@@ -17,6 +17,14 @@
 ### Interfaces

 - [EmbeddingFunctionConfig](interfaces/EmbeddingFunctionConfig.md)
+- [EmbeddingFunctionConstructor](interfaces/EmbeddingFunctionConstructor.md)
+- [EmbeddingFunctionCreate](interfaces/EmbeddingFunctionCreate.md)
+- [FieldOptions](interfaces/FieldOptions.md)
+- [FunctionOptions](interfaces/FunctionOptions.md)
+
+### Type Aliases
+
+- [CreateReturnType](type-aliases/CreateReturnType.md)

 ### Functions

--- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md
+++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunction.md
@@ -16,7 +16,7 @@ An embedding function that automatically creates vector representation for a giv

 • **T** = `any`

-• **M** *extends* `FunctionOptions` = `FunctionOptions`
+• **M** *extends* [`FunctionOptions`](../interfaces/FunctionOptions.md) = [`FunctionOptions`](../interfaces/FunctionOptions.md)

 ## Constructors

@@ -118,16 +118,16 @@ sourceField is used in combination with `LanceSchema` to provide a declarative d

 #### Parameters

-* **optionsOrDatatype**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;`FieldOptions`&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+* **optionsOrDatatype**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
    The options for the field or the datatype

 #### Returns

-[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;]
+[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]

 #### See

-lancedb.LanceSchema
+[LanceSchema](../functions/LanceSchema.md)

 ***

@@ -178,12 +178,13 @@ vectorField is used in combination with `LanceSchema` to provide a declarative d

 #### Parameters

-* **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;`FieldOptions`&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+* **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+    The options for the field

 #### Returns

-[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;]
+[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]

 #### See

-lancedb.LanceSchema
+[LanceSchema](../functions/LanceSchema.md)
--- a/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md
+++ b/docs/src/js/namespaces/embedding/classes/EmbeddingFunctionRegistry.md
@@ -51,7 +51,7 @@ Fetch an embedding function by name

 #### Type Parameters

-• **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`unknown`, `FunctionOptions`&gt;
+• **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`unknown`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;

 #### Parameters

@@ -60,7 +60,7 @@ Fetch an embedding function by name

 #### Returns

-`undefined` \| `EmbeddingFunctionCreate`&lt;`T`&gt;
+`undefined` \| [`EmbeddingFunctionCreate`](../interfaces/EmbeddingFunctionCreate.md)&lt;`T`&gt;

 ***

@@ -104,7 +104,7 @@ Register an embedding function

 #### Type Parameters

-• **T** *extends* `EmbeddingFunctionConstructor`&lt;[`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt; = `EmbeddingFunctionConstructor`&lt;[`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;
+• **T** *extends* [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)&lt;[`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt; = [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)&lt;[`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;

 #### Parameters

--- a/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md
+++ b/docs/src/js/namespaces/embedding/classes/TextEmbeddingFunction.md
@@ -14,7 +14,7 @@ an abstract class for implementing embedding functions that take text as input

 ## Type Parameters

-• **M** *extends* `FunctionOptions` = `FunctionOptions`
+• **M** *extends* [`FunctionOptions`](../interfaces/FunctionOptions.md) = [`FunctionOptions`](../interfaces/FunctionOptions.md)

 ## Constructors

@@ -158,11 +158,11 @@ sourceField is used in combination with `LanceSchema` to provide a declarative d

 #### Returns

-[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;]
+[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]

 #### See

-lancedb.LanceSchema
+[LanceSchema](../functions/LanceSchema.md)

 #### Overrides

@@ -221,15 +221,16 @@ vectorField is used in combination with `LanceSchema` to provide a declarative d

 #### Parameters

-* **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;`FieldOptions`&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+* **optionsOrDatatype?**: `DataType`&lt;`Type`, `any`&gt; \| `Partial`&lt;[`FieldOptions`](../interfaces/FieldOptions.md)&lt;`DataType`&lt;`Type`, `any`&gt;&gt;&gt;
+    The options for the field

 #### Returns

-[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;]
+[`DataType`&lt;`Type`, `any`&gt;, `Map`&lt;`string`, [`EmbeddingFunction`](EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]

 #### See

-lancedb.LanceSchema
+[LanceSchema](../functions/LanceSchema.md)

 #### Inherited from

--- a/docs/src/js/namespaces/embedding/functions/LanceSchema.md
+++ b/docs/src/js/namespaces/embedding/functions/LanceSchema.md
@@ -14,7 +14,7 @@ Create a schema with embedding functions.

 ## Parameters

-* **fields**: `Record`&lt;`string`, `object` \| [`object`, `Map`&lt;`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;]&gt;
+* **fields**: `Record`&lt;`string`, `object` \| [`object`, `Map`&lt;`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;]&gt;

 ## Returns

--- a/docs/src/js/namespaces/embedding/functions/register.md
+++ b/docs/src/js/namespaces/embedding/functions/register.md
@@ -20,7 +20,7 @@ function register(name?): (ctor) => any

 ### Parameters

-* **ctor**: `EmbeddingFunctionConstructor`&lt;[`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, `FunctionOptions`&gt;&gt;
+* **ctor**: [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)&lt;[`EmbeddingFunction`](../classes/EmbeddingFunction.md)&lt;`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)&gt;&gt;

 ### Returns

--- a/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionConstructor.md
+++ b/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionConstructor.md
@@ -0,0 +1,27 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / EmbeddingFunctionConstructor
+
+# Interface: EmbeddingFunctionConstructor&lt;T&gt;
+
+## Type Parameters
+
+• **T** *extends* [`EmbeddingFunction`](../classes/EmbeddingFunction.md) = [`EmbeddingFunction`](../classes/EmbeddingFunction.md)
+
+## Constructors
+
+### new EmbeddingFunctionConstructor()
+
+```ts
+new EmbeddingFunctionConstructor(modelOptions?): T
+```
+
+#### Parameters
+
+* **modelOptions?**: `T`\[`"TOptions"`\]
+
+#### Returns
+
+`T`
--- a/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionCreate.md
+++ b/docs/src/js/namespaces/embedding/interfaces/EmbeddingFunctionCreate.md
@@ -0,0 +1,27 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / EmbeddingFunctionCreate
+
+# Interface: EmbeddingFunctionCreate&lt;T&gt;
+
+## Type Parameters
+
+• **T** *extends* [`EmbeddingFunction`](../classes/EmbeddingFunction.md)
+
+## Methods
+
+### create()
+
+```ts
+create(options?): CreateReturnType<T>
+```
+
+#### Parameters
+
+* **options?**: `T`\[`"TOptions"`\]
+
+#### Returns
+
+[`CreateReturnType`](../type-aliases/CreateReturnType.md)&lt;`T`&gt;
--- a/docs/src/js/namespaces/embedding/interfaces/FieldOptions.md
+++ b/docs/src/js/namespaces/embedding/interfaces/FieldOptions.md
@@ -0,0 +1,27 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / FieldOptions
+
+# Interface: FieldOptions&lt;T&gt;
+
+## Type Parameters
+
+• **T** *extends* `DataType` = `DataType`
+
+## Properties
+
+### datatype
+
+```ts
+datatype: T;
+```
+
+***
+
+### dims?
+
+```ts
+optional dims: number;
+```
--- a/docs/src/js/namespaces/embedding/interfaces/FunctionOptions.md
+++ b/docs/src/js/namespaces/embedding/interfaces/FunctionOptions.md
@@ -0,0 +1,13 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / FunctionOptions
+
+# Interface: FunctionOptions
+
+Options for a given embedding function
+
+## Indexable
+
+ \[`key`: `string`\]: `any`
--- a/docs/src/js/namespaces/embedding/type-aliases/CreateReturnType.md
+++ b/docs/src/js/namespaces/embedding/type-aliases/CreateReturnType.md
@@ -0,0 +1,15 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / CreateReturnType
+
+# Type Alias: CreateReturnType&lt;T&gt;
+
+```ts
+type CreateReturnType<T>: T extends object ? Promise<T> : T;
+```
+
+## Type Parameters
+
+• **T**
--- a/docs/src/js/namespaces/rerankers/classes/RRFReranker.md
+++ b/docs/src/js/namespaces/rerankers/classes/RRFReranker.md
@@ -8,24 +8,6 @@

 Reranks the results using the Reciprocal Rank Fusion (RRF) algorithm.

-Internally this uses the Rust implementation
-
-## Constructors
-
-### new RRFReranker()
-
-```ts
-new RRFReranker(inner): RRFReranker
-```
-
-#### Parameters
-
-* **inner**: `RrfReranker`
-
-#### Returns
-
-[`RRFReranker`](RRFReranker.md)
-
 ## Methods

 ### rerankHybrid()
--- a/docs/src/js/type-aliases/DataLike.md
+++ b/docs/src/js/type-aliases/DataLike.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / DataLike
+
+# Type Alias: DataLike
+
+```ts
+type DataLike: Data | object;
+```
--- a/docs/src/js/type-aliases/FieldLike.md
+++ b/docs/src/js/type-aliases/FieldLike.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / FieldLike
+
+# Type Alias: FieldLike
+
+```ts
+type FieldLike: Field | object;
+```
--- a/docs/src/js/type-aliases/IntoSql.md
+++ b/docs/src/js/type-aliases/IntoSql.md
@@ -0,0 +1,19 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / IntoSql
+
+# Type Alias: IntoSql
+
+```ts
+type IntoSql:
+  | string
+  | number
+  | boolean
+  | null
+  | Date
+  | ArrayBufferLike
+  | Buffer
+  | IntoSql[];
+```
--- a/docs/src/js/type-aliases/IntoVector.md
+++ b/docs/src/js/type-aliases/IntoVector.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / IntoVector
+
+# Type Alias: IntoVector
+
+```ts
+type IntoVector: Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
+```
--- a/docs/src/js/type-aliases/RecordBatchLike.md
+++ b/docs/src/js/type-aliases/RecordBatchLike.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / RecordBatchLike
+
+# Type Alias: RecordBatchLike
+
+```ts
+type RecordBatchLike: RecordBatch | object;
+```
--- a/docs/src/js/type-aliases/SchemaLike.md
+++ b/docs/src/js/type-aliases/SchemaLike.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / SchemaLike
+
+# Type Alias: SchemaLike
+
+```ts
+type SchemaLike: Schema | object;
+```
--- a/docs/src/js/type-aliases/TableLike.md
+++ b/docs/src/js/type-aliases/TableLike.md
@@ -0,0 +1,11 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / TableLike
+
+# Type Alias: TableLike
+
+```ts
+type TableLike: ArrowTable | object;
+```
--- a/docs/src/notebooks/diffusiondb/datagen.py
+++ b/docs/src/notebooks/diffusiondb/datagen.py
@@ -1,17 +1,6 @@
 #!/usr/bin/env python
 #
-#  Copyright 2023 LanceDB Developers
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
+

 """Dataset hf://poloclub/diffusiondb
 """
--- a/docs/src/notebooks/tables_guide.ipynb
+++ b/docs/src/notebooks/tables_guide.ipynb
@@ -114,14 +114,17 @@
    }
   ],
   "source": [
-    "data = [\n",
-    "    {\"vector\": [1.1, 1.2], \"lat\": 45.5, \"long\": -122.7},\n",
-    "    {\"vector\": [0.2, 1.8], \"lat\": 40.1, \"long\": -74.1},\n",
-    "]\n",
+    "import pandas as pd\n",
    "\n",
-    "db.create_table(\"table2\", data)\n",
-    "\n",
-    "db[\"table2\"].head() "
+    "data = pd.DataFrame(\n",
+    "    {\n",
+    "        \"vector\": [[1.1, 1.2, 1.3, 1.4], [0.2, 1.8, 0.4, 3.6]],\n",
+    "        \"lat\": [45.5, 40.1],\n",
+    "        \"long\": [-122.7, -74.1],\n",
+    "    }\n",
+    ")\n",
+    "db.create_table(\"my_table_pandas\", data)\n",
+    "db[\"my_table_pandas\"].head()"
   ]
  },
  {
@@ -164,7 +167,7 @@
    "import pyarrow as pa\n",
    "\n",
    "custom_schema = pa.schema([\n",
-    "pa.field(\"vector\", pa.list_(pa.float32(), 2)),\n",
+    "pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n",
    "pa.field(\"lat\", pa.float32()),\n",
    "pa.field(\"long\", pa.float32())\n",
    "])\n",
--- a/docs/src/search.md
+++ b/docs/src/search.md
@@ -122,7 +122,7 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi

 === "Python"

-    === "sync API"
+    === "Sync API"

        ```python
        --8<-- "python/python/tests/docs/test_binary_vector.py:imports"
@@ -130,7 +130,7 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi
        --8<-- "python/python/tests/docs/test_binary_vector.py:sync_binary_vector"
        ```

-    === "async API"
+    === "Async API"

        ```python
        --8<-- "python/python/tests/docs/test_binary_vector.py:imports"
@@ -153,7 +153,7 @@ The vector value type can be `float16`, `float32` or `float64`.

 === "Python"

-    === "sync API"
+    === "Sync API"

        ```python
        --8<-- "python/python/tests/docs/test_multivector.py:imports"
@@ -161,7 +161,7 @@ The vector value type can be `float16`, `float32` or `float64`.
        --8<-- "python/python/tests/docs/test_multivector.py:sync_multivector"
        ```

-    === "async API"
+    === "Async API"

        ```python
        --8<-- "python/python/tests/docs/test_multivector.py:imports"
@@ -175,7 +175,7 @@ You can also search for vectors within a specific distance range from the query

 === "Python"

-    === "sync API"
+    === "Sync API"

        ```python
        --8<-- "python/python/tests/docs/test_distance_range.py:imports"
@@ -183,7 +183,7 @@ You can also search for vectors within a specific distance range from the query
        --8<-- "python/python/tests/docs/test_distance_range.py:sync_distance_range"
        ```

-    === "async API"
+    === "Async API"

        ```python
        --8<-- "python/python/tests/docs/test_distance_range.py:imports"
--- a/docs/src/search_legacy.ts
+++ b/docs/src/search_legacy.ts
@@ -20,6 +20,7 @@ async function setup() {
 }

 async () => {
+  console.log("search_legacy.ts: start");
  await setup();

  // --8<-- [start:search1]
@@ -37,5 +38,5 @@ async () => {
    .execute();
  // --8<-- [end:search2]

-  console.log("search: done");
+  console.log("search_legacy.ts: done");
 };
--- a/docs/src/sql.md
+++ b/docs/src/sql.md
@@ -64,6 +64,7 @@ const tbl = await db.createTable('myVectors', data)
        ```ts
        --8<-- "docs/src/sql_legacy.ts:search"
        ```
+
 !!! note

    Creating a [scalar index](guides/scalar_index.md) accelerates filtering.
@@ -118,14 +119,14 @@ For example, the following filter string is acceptable:
        --8<-- "docs/src/sql_legacy.ts:vec_search"
        ```

-If your column name contains special characters or is a [SQL Keyword](https://docs.rs/sqlparser/latest/sqlparser/keywords/index.html),
+If your column name contains special characters, upper-case characters, or is a [SQL Keyword](https://docs.rs/sqlparser/latest/sqlparser/keywords/index.html),
 you can use backtick (`` ` ``) to escape it. For nested fields, each segment of the
 path must be wrapped in backticks.

 === "SQL"

    ```sql
-    `CUBE` = 10 AND `column name with space` IS NOT NULL
+    `CUBE` = 10 AND `UpperCaseName` = '3' AND `column name with space` IS NOT NULL
      AND `nested with space`.`inner with space` < 2
    ```

--- a/docs/src/sql_legacy.ts
+++ b/docs/src/sql_legacy.ts
@@ -1,6 +1,7 @@
 import * as vectordb from "vectordb";

 (async () => {
+  console.log("sql_legacy.ts: start");
  const db = await vectordb.connect("data/sample-lancedb");

  let data = [];
@@ -34,5 +35,5 @@ import * as vectordb from "vectordb";
  await tbl.filter("id = 10").limit(10).execute();
  // --8<-- [end:sql_search]

-  console.log("SQL search: done");
+  console.log("sql_legacy.ts: done");
 })();
--- a/docs/test/md_testing.py
+++ b/docs/test/md_testing.py
@@ -11,6 +11,7 @@ excluded_globs = [
    "../src/examples/*.md",
    "../src/integrations/*.md",
    "../src/guides/tables.md",
+    "../src/guides/tables/merge_insert.md",
    "../src/python/duckdb.md",
    "../src/python/pandas_and_pyarrow.md",
    "../src/python/polars_arrow.md",
--- a/java/core/lancedb-jni/src/connection.rs
+++ b/java/core/lancedb-jni/src/connection.rs
@@ -1,3 +1,6 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
 use crate::ffi::JNIEnvExt;
 use crate::traits::IntoJava;
 use crate::{Error, RT};
--- a/java/core/lancedb-jni/src/error.rs
+++ b/java/core/lancedb-jni/src/error.rs
@@ -1,16 +1,5 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 use std::str::Utf8Error;

--- a/java/core/lancedb-jni/src/ffi.rs
+++ b/java/core/lancedb-jni/src/ffi.rs
@@ -1,16 +1,5 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 use core::slice;

--- a/java/core/lancedb-jni/src/lib.rs
+++ b/java/core/lancedb-jni/src/lib.rs
@@ -1,16 +1,5 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 use lazy_static::lazy_static;

--- a/java/core/lancedb-jni/src/traits.rs
+++ b/java/core/lancedb-jni/src/traits.rs
@@ -1,16 +1,5 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 use jni::objects::{JMap, JObject, JString, JValue};
 use jni::JNIEnv;
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.15.0-final.0</version>
+        <version>0.16.1-beta.1</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/core/src/main/java/com/lancedb/lancedb/Connection.java
+++ b/java/core/src/main/java/com/lancedb/lancedb/Connection.java
@@ -1,16 +1,5 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 package com.lancedb.lancedb;

@@ -117,4 +106,4 @@ public class Connection implements Closeable {
  private native void releaseNativeConnection(long handle);

  private Connection() {}
-}
+}
--- a/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java
+++ b/java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java
@@ -1,16 +1,5 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
 package com.lancedb.lancedb;

 import static org.junit.jupiter.api.Assertions.assertEquals;
--- a/java/license_header.txt
+++ b/java/license_header.txt
@@ -0,0 +1,4 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * SPDX-FileCopyrightText: Copyright The LanceDB Authors
+ */
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.15.0-final.0</version>
+    <version>0.16.1-beta.1</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.15.0",
+  "version": "0.16.1-beta.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.15.0",
+      "version": "0.16.1-beta.1",
      "cpu": [
        "x64",
        "arm64"
@@ -52,14 +52,14 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.15.0",
-        "@lancedb/vectordb-darwin-x64": "0.15.0",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.15.0",
-        "@lancedb/vectordb-linux-arm64-musl": "0.15.0",
-        "@lancedb/vectordb-linux-x64-gnu": "0.15.0",
-        "@lancedb/vectordb-linux-x64-musl": "0.15.0",
-        "@lancedb/vectordb-win32-arm64-msvc": "0.15.0",
-        "@lancedb/vectordb-win32-x64-msvc": "0.15.0"
+        "@lancedb/vectordb-darwin-arm64": "0.16.1-beta.1",
+        "@lancedb/vectordb-darwin-x64": "0.16.1-beta.1",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.1",
+        "@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.1",
+        "@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.1",
+        "@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.1",
+        "@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.1",
+        "@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.1"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -330,9 +330,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.15.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.15.0.tgz",
-      "integrity": "sha512-FnBRsCrxvecjhkMQus9M9RQpXyhu1jxQjYGDaqqRIfcUd3ew7ahIR4qk9FyALHmjpPd72xJZgNLjliHtsIX4/w==",
+      "version": "0.16.1-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.16.1-beta.1.tgz",
+      "integrity": "sha512-dV/I7K9a8OZUk6JaY1mOFRCtq0+ae2erRaDe6AQVeA6t+IVClQaYkHiYUQa5wHqN0QTb2GU7qUCRgA8ZLilVuw==",
      "cpu": [
        "arm64"
      ],
@@ -343,9 +343,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.15.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.15.0.tgz",
-      "integrity": "sha512-zy+nt1WBCabVI16u2t3sqGUXBOmnF5ZXMsHa9TWYEXVnbw5112K7/1783DTNA/ZBI/WziUa5jqYQ0GOwkgruqA==",
+      "version": "0.16.1-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.16.1-beta.1.tgz",
+      "integrity": "sha512-Fyn9zTDUrYK/bjriHSu/0TiIA7NJmjQQxqzjcwFOMsghL8D0FQuxqHYWKoJSaJv6me6Kd41R4qesC9mrt2ZGDw==",
      "cpu": [
        "x64"
      ],
@@ -356,9 +356,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.15.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.15.0.tgz",
-      "integrity": "sha512-2Pbw+z5Ij5QBvmBxmjaT5F2lNHftVWlarDM1bDc4JtgodJ3Js729qnVLQ0yehnlt+hM6aGFEyn8bH5vf6gEvpQ==",
+      "version": "0.16.1-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.16.1-beta.1.tgz",
+      "integrity": "sha512-2vcSfg6pVzEkA7tqQssBQgnf7Rbw5fp1xdhnd0X5h4z6Xqs6CpynbkfsyMjBRuL7bDv4phl1lvJmlb3Zfre7mg==",
      "cpu": [
        "arm64"
      ],
@@ -369,9 +369,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-musl": {
-      "version": "0.15.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.15.0.tgz",
-      "integrity": "sha512-WIvgd2EY2maCdYNHPC0C9RprjNWL83FkQKtn591xixltFk3XKgvBQ2USZW2tXndH/WVdvFQvystmZ3dgUrh8DQ==",
+      "version": "0.16.1-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.16.1-beta.1.tgz",
+      "integrity": "sha512-rxE/G2PRKTU+fi5j1ojBV9dKfVLKGLnKgLezGryPkKwMYokCQ8kGxohKTmrmLaclXZpBDQo4AYIIE0oJhXG04g==",
      "cpu": [
        "arm64"
      ],
@@ -382,9 +382,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.15.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.15.0.tgz",
-      "integrity": "sha512-Pet3aPE+yQT13Gm0+fh11pgHvImS4X8Uf0zRdzsx0eja7x8j15VrVcZTEVTT4QdBNiZrhXBuiq482NJBsqe6vw==",
+      "version": "0.16.1-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.16.1-beta.1.tgz",
+      "integrity": "sha512-XWqRMif+wVTfcAPQI9n/H6eNmfwPHWBo4e0SAdWw+eifaBDxrcSTDhPpIrW+NGTJMhlm01rD7Qzh4j0y/Vcy5w==",
      "cpu": [
        "x64"
      ],
@@ -395,9 +395,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-musl": {
-      "version": "0.15.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.15.0.tgz",
-      "integrity": "sha512-BC1RvIoEmyOr7ENp618vs9F05gdN7aKlToJNZnGIoi++hRZ25y39B1xxMXQHDnUL8G+Ur9kJObfQ43nVWqueTQ==",
+      "version": "0.16.1-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.16.1-beta.1.tgz",
+      "integrity": "sha512-W/OV5aXAbqhQQ8NYLJG5W/ZdHwpfjiS/9VTVVo44cOznCO6tijiSsrLYIyIw/rHU+ORhYi7My+5+JmkrIoQIgQ==",
      "cpu": [
        "x64"
      ],
@@ -408,9 +408,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-win32-arm64-msvc": {
-      "version": "0.15.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.15.0.tgz",
-      "integrity": "sha512-H9BeryZl1aLxldtVP0XyiQJyzKStkuxS6SmIg+zaANr9Dns+LmVxYCz429JLC0DlvBWoYjTfK9WJTgMSZXr0Cg==",
+      "version": "0.16.1-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.16.1-beta.1.tgz",
+      "integrity": "sha512-jy18KmJUem+lInjODNfPLSXM8Dmb9xydcab/IoSPB+6qhEn/QC0HW3R1KkR3I6cfryYkEXsNd35pNdtajn9DsQ==",
      "cpu": [
        "arm64"
      ],
@@ -421,9 +421,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.15.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.15.0.tgz",
-      "integrity": "sha512-J8JICux2M82OR27i/4YAbEPlvszuE7EnGIU5jmm2+RTFaptKOCshH1C4D4jEXDAaHcUkVgsxyc9lGmGJCkGLhg==",
+      "version": "0.16.1-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.16.1-beta.1.tgz",
+      "integrity": "sha512-ObGYHAkvyOxJA3QTxMukXaHCe29J2Pbsm90w2c+E3zGb8K5Xe4pLTqojGEDYrtNKZhf2d9YMsoQq8dg6mLsbag==",
      "cpu": [
        "x64"
      ],
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.15.0",
+  "version": "0.16.1-beta.1",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
@@ -92,13 +92,13 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.15.0",
-    "@lancedb/vectordb-darwin-arm64": "0.15.0",
-    "@lancedb/vectordb-linux-x64-gnu": "0.15.0",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.15.0",
-    "@lancedb/vectordb-linux-x64-musl": "0.15.0",
-    "@lancedb/vectordb-linux-arm64-musl": "0.15.0",
-    "@lancedb/vectordb-win32-x64-msvc": "0.15.0",
-    "@lancedb/vectordb-win32-arm64-msvc": "0.15.0"
+    "@lancedb/vectordb-darwin-x64": "0.16.1-beta.1",
+    "@lancedb/vectordb-darwin-arm64": "0.16.1-beta.1",
+    "@lancedb/vectordb-linux-x64-gnu": "0.16.1-beta.1",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.16.1-beta.1",
+    "@lancedb/vectordb-linux-x64-musl": "0.16.1-beta.1",
+    "@lancedb/vectordb-linux-arm64-musl": "0.16.1-beta.1",
+    "@lancedb/vectordb-win32-x64-msvc": "0.16.1-beta.1",
+    "@lancedb/vectordb-win32-arm64-msvc": "0.16.1-beta.1"
  }
 }
--- a/node/src/index.ts
+++ b/node/src/index.ts
@@ -47,7 +47,8 @@ const {
  tableSchema,
  tableAddColumns,
  tableAlterColumns,
-  tableDropColumns
+  tableDropColumns,
+  tableDropIndex
  // eslint-disable-next-line @typescript-eslint/no-var-requires
 } = require("../native.js");

@@ -604,6 +605,13 @@ export interface Table<T = number[]> {
   */
  dropColumns(columnNames: string[]): Promise<void>

+  /**
+   * Drop an index from the table
+   *
+   * @param indexName The name of the index to drop
+   */
+  dropIndex(indexName: string): Promise<void>
+
  /**
   * Instrument the behavior of this Table with middleware.
   *
@@ -1206,6 +1214,10 @@ export class LocalTable<T = number[]> implements Table<T> {
    return tableDropColumns.call(this._tbl, columnNames);
  }

+  async dropIndex(indexName: string): Promise<void> {
+    return tableDropIndex.call(this._tbl, indexName);
+  }
+
  withMiddleware(middleware: HttpMiddleware): Table<T> {
    return this;
  }
--- a/node/src/remote/index.ts
+++ b/node/src/remote/index.ts
@@ -471,6 +471,18 @@ export class RemoteTable<T = number[]> implements Table<T> {
      )
    }
  }
+  async dropIndex (index_name: string): Promise<void> {
+    const res = await this._client.post(
+        `/v1/table/${encodeURIComponent(this._name)}/index/${encodeURIComponent(index_name)}/drop/`
+    )
+    if (res.status !== 200) {
+      throw new Error(
+          `Server Error, status: ${res.status}, ` +
+          // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
+          `message: ${res.statusText}: ${await res.body()}`
+      )
+    }
+  }

  async countRows (filter?: string): Promise<number> {
    const result = await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/count_rows/`, {
--- a/node/src/test/test.ts
+++ b/node/src/test/test.ts
@@ -894,6 +894,27 @@ describe("LanceDB client", function () {
      expect(stats.distanceType).to.equal("l2");
      expect(stats.numIndices).to.equal(1);
    }).timeout(50_000);
+
+    // not yet implemented
+    // it("can drop index", async function () {
+    //   const uri = await createTestDB(32, 300);
+    //   const con = await lancedb.connect(uri);
+    //   const table = await con.openTable("vectors");
+    //   await table.createIndex({
+    //     type: "ivf_pq",
+    //     column: "vector",
+    //     num_partitions: 2,
+    //     max_iters: 2,
+    //     num_sub_vectors: 2
+    //   });
+    //
+    //   const indices = await table.listIndices();
+    //   expect(indices).to.have.lengthOf(1);
+    //   expect(indices[0].name).to.equal("vector_idx");
+    //
+    //   await table.dropIndex("vector_idx");
+    //   expect(await table.listIndices()).to.have.lengthOf(0);
+    // }).timeout(50_000);
  });

  describe("when using a custom embedding function", function () {
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.15.0"
+version = "0.16.1-beta.1"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/README.md
+++ b/nodejs/README.md
@@ -32,7 +32,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
 console.log(results);
 ```

-The [quickstart](../basic.md) contains a more complete example.
+The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.

 ## Development

--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -1,17 +1,7 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
 import { Schema } from "apache-arrow";
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.

 import * as arrow15 from "apache-arrow-15";
 import * as arrow16 from "apache-arrow-16";
@@ -65,6 +55,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      Float64,
      Struct,
      List,
+      Int16,
      Int32,
      Int64,
      Float,
@@ -118,13 +109,16 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          false,
        ),
      ]);
-
      const table = (await tableCreationMethod(
        records,
        recordsReversed,
        schema,
        // biome-ignore lint/suspicious/noExplicitAny: <explanation>
      )) as any;
+
+      // We expect deterministic ordering of the fields
+      expect(table.schema.names).toEqual(schema.names);
+
      schema.fields.forEach(
        (
          // biome-ignore lint/suspicious/noExplicitAny: <explanation>
@@ -151,13 +145,13 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
    describe("The function makeArrowTable", function () {
      it("will use data types from a provided schema instead of inference", async function () {
        const schema = new Schema([
-          new Field("a", new Int32()),
-          new Field("b", new Float32()),
+          new Field("a", new Int32(), false),
+          new Field("b", new Float32(), true),
          new Field(
            "c",
            new FixedSizeList(3, new Field("item", new Float16())),
          ),
-          new Field("d", new Int64()),
+          new Field("d", new Int64(), true),
        ]);
        const table = makeArrowTable(
          [
@@ -175,12 +169,15 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(actual.numRows).toBe(3);
        const actualSchema = actual.schema;
        expect(actualSchema).toEqual(schema);
+        expect(table.getChild("a")?.toJSON()).toEqual([1, 4, 7]);
+        expect(table.getChild("b")?.toJSON()).toEqual([2, 5, 8]);
+        expect(table.getChild("d")?.toJSON()).toEqual([9n, 10n, null]);
      });

      it("will assume the column `vector` is FixedSizeList<Float32> by default", async function () {
        const schema = new Schema([
          new Field("a", new Float(Precision.DOUBLE), true),
-          new Field("b", new Float(Precision.DOUBLE), true),
+          new Field("b", new Int64(), true),
          new Field(
            "vector",
            new FixedSizeList(
@@ -191,9 +188,9 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          ),
        ]);
        const table = makeArrowTable([
-          { a: 1, b: 2, vector: [1, 2, 3] },
-          { a: 4, b: 5, vector: [4, 5, 6] },
-          { a: 7, b: 8, vector: [7, 8, 9] },
+          { a: 1, b: 2n, vector: [1, 2, 3] },
+          { a: 4, b: 5n, vector: [4, 5, 6] },
+          { a: 7, b: 8n, vector: [7, 8, 9] },
        ]);

        const buf = await fromTableToBuffer(table);
@@ -203,6 +200,19 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(actual.numRows).toBe(3);
        const actualSchema = actual.schema;
        expect(actualSchema).toEqual(schema);
+
+        expect(table.getChild("a")?.toJSON()).toEqual([1, 4, 7]);
+        expect(table.getChild("b")?.toJSON()).toEqual([2n, 5n, 8n]);
+        expect(
+          table
+            .getChild("vector")
+            ?.toJSON()
+            .map((v) => v.toJSON()),
+        ).toEqual([
+          [1, 2, 3],
+          [4, 5, 6],
+          [7, 8, 9],
+        ]);
      });

      it("can support multiple vector columns", async function () {
@@ -216,7 +226,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          ),
          new Field(
            "vec2",
-            new FixedSizeList(3, new Field("item", new Float16(), true)),
+            new FixedSizeList(3, new Field("item", new Float64(), true)),
            true,
          ),
        ]);
@@ -229,7 +239,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          {
            vectorColumns: {
              vec1: { type: new Float16() },
-              vec2: { type: new Float16() },
+              vec2: { type: new Float64() },
            },
          },
        );
@@ -317,6 +327,53 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          false,
        );
      });
+
+      it("will allow subsets of columns if nullable", async function () {
+        const schema = new Schema([
+          new Field("a", new Int64(), true),
+          new Field(
+            "s",
+            new Struct([
+              new Field("x", new Int32(), true),
+              new Field("y", new Int32(), true),
+            ]),
+            true,
+          ),
+          new Field("d", new Int16(), true),
+        ]);
+
+        const table = makeArrowTable([{ a: 1n }], { schema });
+        expect(table.numCols).toBe(1);
+        expect(table.numRows).toBe(1);
+
+        const table2 = makeArrowTable([{ a: 1n, d: 2 }], { schema });
+        expect(table2.numCols).toBe(2);
+
+        const table3 = makeArrowTable([{ s: { y: 3 } }], { schema });
+        expect(table3.numCols).toBe(1);
+        const expectedSchema = new Schema([
+          new Field("s", new Struct([new Field("y", new Int32(), true)]), true),
+        ]);
+        expect(table3.schema).toEqual(expectedSchema);
+      });
+
+      it("will work even if columns are sparsely provided", async function () {
+        const sparseRecords = [{ a: 1n }, { b: 2n }, { c: 3n }, { d: 4n }];
+        const table = makeArrowTable(sparseRecords);
+        expect(table.numCols).toBe(4);
+        expect(table.numRows).toBe(4);
+
+        const schema = new Schema([
+          new Field("a", new Int64(), true),
+          new Field("b", new Int32(), true),
+          new Field("c", new Int64(), true),
+          new Field("d", new Int16(), true),
+        ]);
+        const table2 = makeArrowTable(sparseRecords, { schema });
+        expect(table2.numCols).toBe(4);
+        expect(table2.numRows).toBe(4);
+        expect(table2.schema).toEqual(schema);
+      });
    });

    class DummyEmbedding extends EmbeddingFunction<string> {
--- a/nodejs/test/connection.test.ts
+++ b/nodejs/test/connection.test.ts
@@ -1,16 +1,5 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import { readdirSync } from "fs";
 import { Field, Float64, Schema } from "apache-arrow";
@@ -28,14 +17,14 @@ describe("when connecting", () => {
  it("should connect", async () => {
    const db = await connect(tmpDir.name);
    expect(db.display()).toBe(
-      `NativeDatabase(uri=${tmpDir.name}, read_consistency_interval=None)`,
+      `ListingDatabase(uri=${tmpDir.name}, read_consistency_interval=None)`,
    );
  });

  it("should allow read consistency interval to be specified", async () => {
    const db = await connect(tmpDir.name, { readConsistencyInterval: 5 });
    expect(db.display()).toBe(
-      `NativeDatabase(uri=${tmpDir.name}, read_consistency_interval=5s)`,
+      `ListingDatabase(uri=${tmpDir.name}, read_consistency_interval=5s)`,
    );
  });
 });
@@ -72,6 +61,26 @@ describe("given a connection", () => {
    await expect(tbl.countRows()).resolves.toBe(1);
  });

+  it("should be able to drop tables`", async () => {
+    await db.createTable("test", [{ id: 1 }, { id: 2 }]);
+    await db.createTable("test2", [{ id: 1 }, { id: 2 }]);
+    await db.createTable("test3", [{ id: 1 }, { id: 2 }]);
+
+    await expect(db.tableNames()).resolves.toEqual(["test", "test2", "test3"]);
+
+    await db.dropTable("test2");
+
+    await expect(db.tableNames()).resolves.toEqual(["test", "test3"]);
+
+    await db.dropAllTables();
+
+    await expect(db.tableNames()).resolves.toEqual([]);
+
+    // Make sure we can still create more tables after dropping all
+
+    await db.createTable("test4", [{ id: 1 }, { id: 2 }]);
+  });
+
  it("should fail if creating table twice, unless overwrite is true", async () => {
    let tbl = await db.createTable("test", [{ id: 1 }, { id: 2 }]);
    await expect(tbl.countRows()).resolves.toBe(2);
@@ -107,14 +116,15 @@ describe("given a connection", () => {
    const data = [...Array(10000).keys()].map((i) => ({ id: i }));

    // Create in v1 mode
-    let table = await db.createTable("test", data, { useLegacyFormat: true });
+    let table = await db.createTable("test", data, {
+      storageOptions: { newTableDataStorageVersion: "legacy" },
+    });

    const isV2 = async (table: Table) => {
      const data = await table
        .query()
        .limit(10000)
        .toArrow({ maxBatchLength: 100000 });
-      console.log(data.batches.length);
      return data.batches.length < 5;
    };

@@ -133,7 +143,7 @@ describe("given a connection", () => {
    const schema = new Schema([new Field("id", new Float64(), true)]);

    table = await db.createEmptyTable("test_v2_empty", schema, {
-      useLegacyFormat: false,
+      storageOptions: { newTableDataStorageVersion: "stable" },
    });

    await table.add(data);
--- a/nodejs/test/embedding.test.ts
+++ b/nodejs/test/embedding.test.ts
@@ -1,16 +1,5 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import * as tmp from "tmp";

@@ -83,6 +72,74 @@ describe("embedding functions", () => {
    expect(vector0).toEqual([1, 2, 3]);
  });

+  it("should be able to append and upsert using embedding function", async () => {
+    @register()
+    class MockEmbeddingFunction extends EmbeddingFunction<string> {
+      toJSON(): object {
+        return {};
+      }
+      ndims() {
+        return 3;
+      }
+      embeddingDataType(): Float {
+        return new Float32();
+      }
+      async computeQueryEmbeddings(_data: string) {
+        return [1, 2, 3];
+      }
+      async computeSourceEmbeddings(data: string[]) {
+        return Array.from({ length: data.length }).fill([
+          1, 2, 3,
+        ]) as number[][];
+      }
+    }
+    const func = new MockEmbeddingFunction();
+    const db = await connect(tmpDir.name);
+    const table = await db.createTable(
+      "test",
+      [
+        { id: 1, text: "hello" },
+        { id: 2, text: "world" },
+      ],
+      {
+        embeddingFunction: {
+          function: func,
+          sourceColumn: "text",
+        },
+      },
+    );
+
+    const schema = await table.schema();
+    expect(schema.metadata.get("embedding_functions")).toBeDefined();
+
+    // Append some new data
+    const data1 = [
+      { id: 3, text: "forest" },
+      { id: 4, text: "mountain" },
+    ];
+    await table.add(data1);
+
+    // Upsert some data
+    const data2 = [
+      { id: 5, text: "river" },
+      { id: 2, text: "canyon" },
+    ];
+    await table
+      .mergeInsert("id")
+      .whenMatchedUpdateAll()
+      .whenNotMatchedInsertAll()
+      .execute(data2);
+
+    const rows = await table.query().toArray();
+    rows.sort((a, b) => a.id - b.id);
+    const texts = rows.map((row) => row.text);
+    expect(texts).toEqual(["hello", "canyon", "forest", "mountain", "river"]);
+    const vectorsDefined = rows.map(
+      (row) => row.vector !== undefined && row.vector !== null,
+    );
+    expect(vectorsDefined).toEqual(new Array(5).fill(true));
+  });
+
  it("should be able to create an empty table with an embedding function", async () => {
    @register()
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
--- a/nodejs/test/registry.test.ts
+++ b/nodejs/test/registry.test.ts
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
 import * as apiArrow from "apache-arrow";
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+
 import * as arrow15 from "apache-arrow-15";
 import * as arrow16 from "apache-arrow-16";
 import * as arrow17 from "apache-arrow-17";
--- a/nodejs/test/remote.test.ts
+++ b/nodejs/test/remote.test.ts
@@ -1,16 +1,5 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import * as http from "http";
 import { RequestListener } from "http";
@@ -115,4 +104,26 @@ describe("remote connection", () => {
      },
    );
  });
+
+  it("should pass on requested extra headers", async () => {
+    await withMockDatabase(
+      (req, res) => {
+        expect(req.headers["x-my-header"]).toEqual("my-value");
+
+        const body = JSON.stringify({ tables: [] });
+        res.writeHead(200, { "Content-Type": "application/json" }).end(body);
+      },
+      async (db) => {
+        const tableNames = await db.tableNames();
+        expect(tableNames).toEqual([]);
+      },
+      {
+        clientConfig: {
+          extraHeaders: {
+            "x-my-header": "my-value",
+          },
+        },
+      },
+    );
+  });
 });
--- a/nodejs/test/s3_integration.test.ts
+++ b/nodejs/test/s3_integration.test.ts
@@ -1,16 +1,5 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 /* eslint-disable @typescript-eslint/naming-convention */

--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -1,16 +1,5 @@
-// Copyright 2024 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import * as fs from "fs";
 import * as path from "path";
@@ -264,6 +253,31 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      const arrowTbl = await table.toArrow();
      expect(arrowTbl).toBeInstanceOf(ArrowTable);
    });
+
+    it("should be able to handle missing fields", async () => {
+      const schema = new arrow.Schema([
+        new arrow.Field("id", new arrow.Int32(), true),
+        new arrow.Field("y", new arrow.Int32(), true),
+        new arrow.Field("z", new arrow.Int64(), true),
+      ]);
+      const db = await connect(tmpDir.name);
+      const table = await db.createEmptyTable("testNull", schema);
+      await table.add([{ id: 1, y: 2 }]);
+      await table.add([{ id: 2 }]);
+
+      await table
+        .mergeInsert("id")
+        .whenNotMatchedInsertAll()
+        .execute([
+          { id: 3, z: 3 },
+          { id: 4, z: 5 },
+        ]);
+
+      const res = await table.query().toArrow();
+      expect(res.getChild("id")?.toJSON()).toEqual([1, 2, 3, 4]);
+      expect(res.getChild("y")?.toJSON()).toEqual([2, null, null, null]);
+      expect(res.getChild("z")?.toJSON()).toEqual([null, null, 3n, 5n]);
+    });
  },
 );

--- a/nodejs/test/util.test.ts
+++ b/nodejs/test/util.test.ts
@@ -1,3 +1,6 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
 import { IntoSql, toSQL } from "../lancedb/util";
 test.each([
  ["string", "'string'"],
--- a/nodejs/build.rs
+++ b/nodejs/build.rs
@@ -1,3 +1,6 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
 extern crate napi_build;

 fn main() {
--- a/nodejs/examples/ann_indexes.test.ts
+++ b/nodejs/examples/ann_indexes.test.ts
@@ -3,7 +3,7 @@
 import { expect, test } from "@jest/globals";
 // --8<-- [start:import]
 import * as lancedb from "@lancedb/lancedb";
-import { VectorQuery } from "@lancedb/lancedb";
+import type { VectorQuery } from "@lancedb/lancedb";
 // --8<-- [end:import]
 import { withTempDirectory } from "./util.ts";

--- a/nodejs/examples/basic.test.ts
+++ b/nodejs/examples/basic.test.ts
@@ -117,26 +117,24 @@ test("basic table examples", async () => {
      // --8<-- [end:add_data]
    }

-    {
-      // --8<-- [start:add_columns]
-      await tbl.addColumns([
-        { name: "double_price", valueSql: "cast((price * 2) as Float)" },
-      ]);
-      // --8<-- [end:add_columns]
-      // --8<-- [start:alter_columns]
-      await tbl.alterColumns([
-        {
-          path: "double_price",
-          rename: "dbl_price",
-          dataType: "float",
-          nullable: true,
-        },
-      ]);
-      // --8<-- [end:alter_columns]
-      // --8<-- [start:drop_columns]
-      await tbl.dropColumns(["dbl_price"]);
-      // --8<-- [end:drop_columns]
-    }
+    // --8<-- [start:add_columns]
+    await tbl.addColumns([
+      { name: "double_price", valueSql: "cast((price * 2) as Float)" },
+    ]);
+    // --8<-- [end:add_columns]
+    // --8<-- [start:alter_columns]
+    await tbl.alterColumns([
+      {
+        path: "double_price",
+        rename: "dbl_price",
+        dataType: "float",
+        nullable: true,
+      },
+    ]);
+    // --8<-- [end:alter_columns]
+    // --8<-- [start:drop_columns]
+    await tbl.dropColumns(["dbl_price"]);
+    // --8<-- [end:drop_columns]

    {
      // --8<-- [start:vector_search]
--- a/nodejs/examples/biome.json
+++ b/nodejs/examples/biome.json
@@ -0,0 +1,52 @@
+{
+  "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
+  "vcs": {
+    "enabled": false,
+    "clientKind": "git",
+    "useIgnoreFile": false
+  },
+  "files": {
+    "ignoreUnknown": false,
+    "ignore": []
+  },
+  "formatter": {
+    "enabled": true,
+    "indentStyle": "space"
+  },
+  "organizeImports": {
+    "enabled": true
+  },
+  "linter": {
+    "enabled": true,
+    "rules": {
+      "recommended": true
+    }
+  },
+  "javascript": {
+    "formatter": {
+      "quoteStyle": "double"
+    }
+  },
+  "overrides": [
+    {
+      "include": ["*"],
+      "linter": {
+        "rules": {
+          "style": {
+            "noNonNullAssertion": "off"
+          }
+        }
+      }
+    },
+    {
+      "include": ["merge_insert.test.ts"],
+      "linter": {
+        "rules": {
+          "style": {
+            "useNamingConvention": "off"
+          }
+        }
+      }
+    }
+  ]
+}
--- a/nodejs/examples/custom_embedding_function.test.ts
+++ b/nodejs/examples/custom_embedding_function.test.ts
@@ -1,4 +1,7 @@
-import { FeatureExtractionPipeline, pipeline } from "@huggingface/transformers";
+import {
+  type FeatureExtractionPipeline,
+  pipeline,
+} from "@huggingface/transformers";
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import { expect, test } from "@jest/globals";
--- a/nodejs/examples/full_text_search.test.ts
+++ b/nodejs/examples/full_text_search.test.ts
@@ -42,4 +42,4 @@ test("full text search", async () => {
    expect(result.length).toBe(10);
    // --8<-- [end:full_text_search]
  });
-});
+}, 10_000);
--- a/nodejs/examples/merge_insert.test.ts
+++ b/nodejs/examples/merge_insert.test.ts
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+import { expect, test } from "@jest/globals";
+import * as lancedb from "@lancedb/lancedb";
+
+test("basic upsert", async () => {
+  const db = await lancedb.connect("memory://");
+
+  // --8<-- [start:upsert_basic]
+  const table = await db.createTable("users", [
+    { id: 0, name: "Alice" },
+    { id: 1, name: "Bob" },
+  ]);
+
+  const newUsers = [
+    { id: 1, name: "Bobby" },
+    { id: 2, name: "Charlie" },
+  ];
+  await table
+    .mergeInsert("id")
+    .whenMatchedUpdateAll()
+    .whenNotMatchedInsertAll()
+    .execute(newUsers);
+
+  await table.countRows(); // 3
+  // --8<-- [end:upsert_basic]
+  expect(await table.countRows()).toBe(3);
+
+  // --8<-- [start:insert_if_not_exists]
+  const table2 = await db.createTable("domains", [
+    { domain: "google.com", name: "Google" },
+    { domain: "github.com", name: "GitHub" },
+  ]);
+
+  const newDomains = [
+    { domain: "google.com", name: "Google" },
+    { domain: "facebook.com", name: "Facebook" },
+  ];
+  await table2
+    .mergeInsert("domain")
+    .whenNotMatchedInsertAll()
+    .execute(newDomains);
+  await table2.countRows(); // 3
+  // --8<-- [end:insert_if_not_exists]
+  expect(await table2.countRows()).toBe(3);
+
+  // --8<-- [start:replace_range]
+  const table3 = await db.createTable("chunks", [
+    { doc_id: 0, chunk_id: 0, text: "Hello" },
+    { doc_id: 0, chunk_id: 1, text: "World" },
+    { doc_id: 1, chunk_id: 0, text: "Foo" },
+    { doc_id: 1, chunk_id: 1, text: "Bar" },
+  ]);
+
+  const newChunks = [{ doc_id: 1, chunk_id: 0, text: "Baz" }];
+
+  await table3
+    .mergeInsert(["doc_id", "chunk_id"])
+    .whenMatchedUpdateAll()
+    .whenNotMatchedInsertAll()
+    .whenNotMatchedBySourceDelete({ where: "doc_id = 1" })
+    .execute(newChunks);
+
+  await table3.countRows("doc_id = 1"); // 1
+  // --8<-- [end:replace_range]
+  expect(await table3.countRows("doc_id = 1")).toBe(1);
+});
--- a/nodejs/examples/sentence-transformers.test.ts
+++ b/nodejs/examples/sentence-transformers.test.ts
@@ -6,7 +6,7 @@ import { withTempDirectory } from "./util.ts";
 import * as lancedb from "@lancedb/lancedb";
 import "@lancedb/lancedb/embedding/transformers";
 import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
-import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
+import type { EmbeddingFunction } from "@lancedb/lancedb/embedding";
 import { Utf8 } from "apache-arrow";

 test("full text search", async () => {
@@ -58,6 +58,6 @@ test("full text search", async () => {
    const query = "How many bones are in the human body?";
    const actual = await tbl.search(query).limit(1).toArray();

-    expect(actual[0]["text"]).toBe("The human body has 206 bones.");
+    expect(actual[0].text).toBe("The human body has 206 bones.");
  });
 }, 100_000);
--- a/nodejs/examples/util.ts
+++ b/nodejs/examples/util.ts
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
-import * as fs from "fs";
-import { tmpdir } from "os";
-import * as path from "path";
+import * as fs from "node:fs";
+import { tmpdir } from "node:os";
+import * as path from "node:path";

 export async function withTempDirectory(
  fn: (tempDir: string) => Promise<void>,
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -1,43 +1,38 @@
-// Copyright 2023 Lance Developers.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import {
+  Data as ArrowData,
  Table as ArrowTable,
  Binary,
+  Bool,
  BufferType,
  DataType,
+  Dictionary,
  Field,
  FixedSizeBinary,
  FixedSizeList,
  Float,
  Float32,
+  Float64,
  Int,
+  Int32,
+  Int64,
  LargeBinary,
  List,
  Null,
  RecordBatch,
  RecordBatchFileReader,
  RecordBatchFileWriter,
-  RecordBatchReader,
  RecordBatchStreamWriter,
  Schema,
  Struct,
  Utf8,
  Vector,
+  makeVector as arrowMakeVector,
  makeBuilder,
  makeData,
-  type makeTable,
+  makeTable,
  vectorFromArray,
 } from "apache-arrow";
 import { Buffers } from "apache-arrow/data";
@@ -247,8 +242,6 @@ export class MakeArrowTableOptions {
 * This function converts an array of Record<String, any> (row-major JS objects)
 * to an Arrow Table (a columnar structure)
 *
- * Note that it currently does not support nulls.
- *
 * If a schema is provided then it will be used to determine the resulting array
 * types.  Fields will also be reordered to fit the order defined by the schema.
 *
@@ -256,6 +249,9 @@ export class MakeArrowTableOptions {
 * will be controlled by the order of properties in the first record.  If a type
 * is inferred it will always be nullable.
 *
+ * If not all fields are found in the data, then a subset of the schema will be
+ * returned.
+ *
 * If the input is empty then a schema must be provided to create an empty table.
 *
 * When a schema is not specified then data types will be inferred.  The inference
@@ -263,11 +259,13 @@ export class MakeArrowTableOptions {
 *
 *  - boolean => Bool
 *  - number => Float64
+ *  - bigint => Int64
 *  - String => Utf8
 *  - Buffer => Binary
 *  - Record<String, any> => Struct
 *  - Array<any> => List
 * @example
+ * ```ts
 * import { fromTableToBuffer, makeArrowTable } from "../arrow";
 * import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
 *
@@ -289,43 +287,41 @@ export class MakeArrowTableOptions {
 * names and data types.
 *
 * ```ts
- *
 * const schema = new Schema([
-    new Field("a", new Float64()),
-    new Field("b", new Float64()),
-    new Field(
-      "vector",
-      new FixedSizeList(3, new Field("item", new Float32()))
-    ),
-  ]);
-  const table = makeArrowTable([
-    { a: 1, b: 2, vector: [1, 2, 3] },
-    { a: 4, b: 5, vector: [4, 5, 6] },
-    { a: 7, b: 8, vector: [7, 8, 9] },
-  ]);
-  assert.deepEqual(table.schema, schema);
+ *   new Field("a", new Float64()),
+ *   new Field("b", new Float64()),
+ *   new Field(
+ *     "vector",
+ *     new FixedSizeList(3, new Field("item", new Float32()))
+ *   ),
+ * ]);
+ * const table = makeArrowTable([
+ *   { a: 1, b: 2, vector: [1, 2, 3] },
+ *   { a: 4, b: 5, vector: [4, 5, 6] },
+ *   { a: 7, b: 8, vector: [7, 8, 9] },
+ * ]);
+ * assert.deepEqual(table.schema, schema);
 * ```
 *
 * You can specify the vector column types and names using the options as well
 *
- * ```typescript
- *
+ * ```ts
 * const schema = new Schema([
-    new Field('a', new Float64()),
-    new Field('b', new Float64()),
-    new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
-    new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
-  ]);
+ *   new Field('a', new Float64()),
+ *   new Field('b', new Float64()),
+ *   new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
+ *   new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
+ * ]);
 * const table = makeArrowTable([
-    { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
-    { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
-    { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
-  ], {
-    vectorColumns: {
-      vec1: { type: new Float16() },
-      vec2: { type: new Float16() }
-    }
-  }
+ *   { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
+ *   { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
+ *   { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
+ * ], {
+ *   vectorColumns: {
+ *     vec1: { type: new Float16() },
+ *     vec2: { type: new Float16() }
+ *   }
+ * }
 * assert.deepEqual(table.schema, schema)
 * ```
 */
@@ -334,126 +330,316 @@ export function makeArrowTable(
  options?: Partial<MakeArrowTableOptions>,
  metadata?: Map<string, string>,
 ): ArrowTable {
+  const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
+  let schema: Schema | undefined = undefined;
+  if (opt.schema !== undefined && opt.schema !== null) {
+    schema = sanitizeSchema(opt.schema);
+    schema = validateSchemaEmbeddings(
+      schema as Schema,
+      data,
+      options?.embeddingFunction,
+    );
+  }
+
+  let schemaMetadata = schema?.metadata || new Map<string, string>();
+  if (metadata !== undefined) {
+    schemaMetadata = new Map([...schemaMetadata, ...metadata]);
+  }
+
  if (
    data.length === 0 &&
    (options?.schema === undefined || options?.schema === null)
  ) {
    throw new Error("At least one record or a schema needs to be provided");
-  }
-
-  const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
-  if (opt.schema !== undefined && opt.schema !== null) {
-    opt.schema = sanitizeSchema(opt.schema);
-    opt.schema = validateSchemaEmbeddings(
-      opt.schema as Schema,
-      data,
-      options?.embeddingFunction,
-    );
-  }
-  const columns: Record<string, Vector> = {};
-  // TODO: sample dataset to find missing columns
-  // Prefer the field ordering of the schema, if present
-  const columnNames =
-    opt.schema != null ? (opt.schema.names as string[]) : Object.keys(data[0]);
-  for (const colName of columnNames) {
-    if (
-      data.length !== 0 &&
-      !Object.prototype.hasOwnProperty.call(data[0], colName)
-    ) {
-      // The field is present in the schema, but not in the data, skip it
-      continue;
-    }
-    // Extract a single column from the records (transpose from row-major to col-major)
-    let values = data.map((datum) => datum[colName]);
-
-    // By default (type === undefined) arrow will infer the type from the JS type
-    let type;
-    if (opt.schema !== undefined) {
-      // If there is a schema provided, then use that for the type instead
-      type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
-      if (DataType.isInt(type) && type.bitWidth === 64) {
-        // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
-        values = values.map((v) => {
-          if (v === null) {
-            return v;
-          }
-          if (typeof v === "bigint") {
-            return v;
-          }
-          if (typeof v === "number") {
-            return BigInt(v);
-          }
-          throw new Error(
-            `Expected BigInt or number for column ${colName}, got ${typeof v}`,
-          );
-        });
-      }
+  } else if (data.length === 0) {
+    if (schema === undefined) {
+      throw new Error("A schema must be provided if data is empty");
    } else {
-      // Otherwise, check to see if this column is one of the vector columns
-      // defined by opt.vectorColumns and, if so, use the fixed size list type
-      const vectorColumnOptions = opt.vectorColumns[colName];
-      if (vectorColumnOptions !== undefined) {
-        const firstNonNullValue = values.find((v) => v !== null);
-        if (Array.isArray(firstNonNullValue)) {
-          type = newVectorType(
-            firstNonNullValue.length,
-            vectorColumnOptions.type,
-          );
+      schema = new Schema(schema.fields, schemaMetadata);
+      return new ArrowTable(schema);
+    }
+  }
+
+  let inferredSchema = inferSchema(data, schema, opt);
+  inferredSchema = new Schema(inferredSchema.fields, schemaMetadata);
+
+  const finalColumns: Record<string, Vector> = {};
+  for (const field of inferredSchema.fields) {
+    finalColumns[field.name] = transposeData(data, field);
+  }
+
+  return new ArrowTable(inferredSchema, finalColumns);
+}
+
+function inferSchema(
+  data: Array<Record<string, unknown>>,
+  schema: Schema | undefined,
+  opts: MakeArrowTableOptions,
+): Schema {
+  // We will collect all fields we see in the data.
+  const pathTree = new PathTree<DataType>();
+
+  for (const [rowI, row] of data.entries()) {
+    for (const [path, value] of rowPathsAndValues(row)) {
+      if (!pathTree.has(path)) {
+        // First time seeing this field.
+        if (schema !== undefined) {
+          const field = getFieldForPath(schema, path);
+          if (field === undefined) {
+            throw new Error(
+              `Found field not in schema: ${path.join(".")} at row ${rowI}`,
+            );
+          } else {
+            pathTree.set(path, field.type);
+          }
        } else {
-          throw new Error(
-            `Column ${colName} is expected to be a vector column but first non-null value is not an array.  Could not determine size of vector column`,
-          );
+          const inferredType = inferType(value, path, opts);
+          if (inferredType === undefined) {
+            throw new Error(`Failed to infer data type for field ${path.join(".")} at row ${rowI}. \
+                             Consider providing an explicit schema.`);
+          }
+          pathTree.set(path, inferredType);
+        }
+      } else if (schema === undefined) {
+        const currentType = pathTree.get(path);
+        const newType = inferType(value, path, opts);
+        if (currentType !== newType) {
+          new Error(`Failed to infer schema for data. Previously inferred type \
+                     ${currentType} but found ${newType} at row ${rowI}. Consider \
+                     providing an explicit schema.`);
        }
      }
    }
-
-    try {
-      // Convert an Array of JS values to an arrow vector
-      columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
-    } catch (error: unknown) {
-      // eslint-disable-next-line @typescript-eslint/restrict-template-expressions
-      throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
-    }
  }

-  if (opt.schema != null) {
-    // `new ArrowTable(columns)` infers a schema which may sometimes have
-    // incorrect nullability (it assumes nullable=true always)
-    //
-    // `new ArrowTable(schema, columns)` will also fail because it will create a
-    // batch with an inferred schema and then complain that the batch schema
-    // does not match the provided schema.
-    //
-    // To work around this we first create a table with the wrong schema and
-    // then patch the schema of the batches so we can use
-    // `new ArrowTable(schema, batches)` which does not do any schema inference
-    const firstTable = new ArrowTable(columns);
-    const batchesFixed = firstTable.batches.map(
-      (batch) => new RecordBatch(opt.schema as Schema, batch.data),
-    );
-    let schema: Schema;
-    if (metadata !== undefined) {
-      let schemaMetadata = opt.schema.metadata;
-      if (schemaMetadata.size === 0) {
-        schemaMetadata = metadata;
-      } else {
-        for (const [key, entry] of schemaMetadata.entries()) {
-          schemaMetadata.set(key, entry);
+  if (schema === undefined) {
+    function fieldsFromPathTree(pathTree: PathTree<DataType>): Field[] {
+      const fields = [];
+      for (const [name, value] of pathTree.map.entries()) {
+        if (value instanceof PathTree) {
+          const children = fieldsFromPathTree(value);
+          fields.push(new Field(name, new Struct(children), true));
+        } else {
+          fields.push(new Field(name, value, true));
        }
      }
+      return fields;
+    }
+    const fields = fieldsFromPathTree(pathTree);
+    return new Schema(fields);
+  } else {
+    function takeMatchingFields(
+      fields: Field[],
+      pathTree: PathTree<DataType>,
+    ): Field[] {
+      const outFields = [];
+      for (const field of fields) {
+        if (pathTree.map.has(field.name)) {
+          const value = pathTree.get([field.name]);
+          if (value instanceof PathTree) {
+            const struct = field.type as Struct;
+            const children = takeMatchingFields(struct.children, value);
+            outFields.push(
+              new Field(field.name, new Struct(children), field.nullable),
+            );
+          } else {
+            outFields.push(
+              new Field(field.name, value as DataType, field.nullable),
+            );
+          }
+        }
+      }
+      return outFields;
+    }
+    const fields = takeMatchingFields(schema.fields, pathTree);
+    return new Schema(fields);
+  }
+}

-      schema = new Schema(opt.schema.fields as Field[], schemaMetadata);
+function* rowPathsAndValues(
+  row: Record<string, unknown>,
+  basePath: string[] = [],
+): Generator<[string[], unknown]> {
+  for (const [key, value] of Object.entries(row)) {
+    if (isObject(value)) {
+      yield* rowPathsAndValues(value, [...basePath, key]);
    } else {
-      schema = opt.schema as Schema;
+      yield [[...basePath, key], value];
    }
-    return new ArrowTable(schema, batchesFixed);
  }
-  const tbl = new ArrowTable(columns);
-  if (metadata !== undefined) {
-    // biome-ignore lint/suspicious/noExplicitAny: <explanation>
-    (<any>tbl.schema).metadata = metadata;
+}
+
+function isObject(value: unknown): value is Record<string, unknown> {
+  return (
+    typeof value === "object" &&
+    value !== null &&
+    !Array.isArray(value) &&
+    !(value instanceof RegExp) &&
+    !(value instanceof Date) &&
+    !(value instanceof Set) &&
+    !(value instanceof Map) &&
+    !(value instanceof Buffer)
+  );
+}
+
+function getFieldForPath(schema: Schema, path: string[]): Field | undefined {
+  let current: Field | Schema = schema;
+  for (const key of path) {
+    if (current instanceof Schema) {
+      const field: Field | undefined = current.fields.find(
+        (f) => f.name === key,
+      );
+      if (field === undefined) {
+        return undefined;
+      }
+      current = field;
+    } else if (current instanceof Field && DataType.isStruct(current.type)) {
+      const struct: Struct = current.type;
+      const field = struct.children.find((f) => f.name === key);
+      if (field === undefined) {
+        return undefined;
+      }
+      current = field;
+    } else {
+      return undefined;
+    }
+  }
+  if (current instanceof Field) {
+    return current;
+  } else {
+    return undefined;
+  }
+}
+
+/**
+ * Try to infer which Arrow type to use for a given value.
+ *
+ * May return undefined if the type cannot be inferred.
+ */
+function inferType(
+  value: unknown,
+  path: string[],
+  opts: MakeArrowTableOptions,
+): DataType | undefined {
+  if (typeof value === "bigint") {
+    return new Int64();
+  } else if (typeof value === "number") {
+    // Even if it's an integer, it's safer to assume Float64. Users can
+    // always provide an explicit schema or use BigInt if they mean integer.
+    return new Float64();
+  } else if (typeof value === "string") {
+    if (opts.dictionaryEncodeStrings) {
+      return new Dictionary(new Utf8(), new Int32());
+    } else {
+      return new Utf8();
+    }
+  } else if (typeof value === "boolean") {
+    return new Bool();
+  } else if (value instanceof Buffer) {
+    return new Binary();
+  } else if (Array.isArray(value)) {
+    if (value.length === 0) {
+      return undefined; // Without any values we can't infer the type
+    }
+    if (path.length === 1 && Object.hasOwn(opts.vectorColumns, path[0])) {
+      const floatType = sanitizeType(opts.vectorColumns[path[0]].type);
+      return new FixedSizeList(
+        value.length,
+        new Field("item", floatType, true),
+      );
+    }
+    const valueType = inferType(value[0], path, opts);
+    if (valueType === undefined) {
+      return undefined;
+    }
+    // Try to automatically detect embedding columns.
+    if (valueType instanceof Float && path[path.length - 1] === "vector") {
+      // We default to Float32 for vectors.
+      const child = new Field("item", new Float32(), true);
+      return new FixedSizeList(value.length, child);
+    } else {
+      const child = new Field("item", valueType, true);
+      return new List(child);
+    }
+  } else {
+    // TODO: timestamp
+    return undefined;
+  }
+}
+
+class PathTree<V> {
+  map: Map<string, V | PathTree<V>>;
+
+  constructor(entries?: [string[], V][]) {
+    this.map = new Map();
+    if (entries !== undefined) {
+      for (const [path, value] of entries) {
+        this.set(path, value);
+      }
+    }
+  }
+  has(path: string[]): boolean {
+    let ref: PathTree<V> = this;
+    for (const part of path) {
+      if (!(ref instanceof PathTree) || !ref.map.has(part)) {
+        return false;
+      }
+      ref = ref.map.get(part) as PathTree<V>;
+    }
+    return true;
+  }
+  get(path: string[]): V | undefined {
+    let ref: PathTree<V> = this;
+    for (const part of path) {
+      if (!(ref instanceof PathTree) || !ref.map.has(part)) {
+        return undefined;
+      }
+      ref = ref.map.get(part) as PathTree<V>;
+    }
+    return ref as V;
+  }
+  set(path: string[], value: V): void {
+    let ref: PathTree<V> = this;
+    for (const part of path.slice(0, path.length - 1)) {
+      if (!ref.map.has(part)) {
+        ref.map.set(part, new PathTree<V>());
+      }
+      ref = ref.map.get(part) as PathTree<V>;
+    }
+    ref.map.set(path[path.length - 1], value);
+  }
+}
+
+function transposeData(
+  data: Record<string, unknown>[],
+  field: Field,
+  path: string[] = [],
+): Vector {
+  if (field.type instanceof Struct) {
+    const childFields = field.type.children;
+    const childVectors = childFields.map((child) => {
+      return transposeData(data, child, [...path, child.name]);
+    });
+    const structData = makeData({
+      type: field.type,
+      children: childVectors as unknown as ArrowData<DataType>[],
+    });
+    return arrowMakeVector(structData);
+  } else {
+    const valuesPath = [...path, field.name];
+    const values = data.map((datum) => {
+      let current: unknown = datum;
+      for (const key of valuesPath) {
+        if (isObject(current) && Object.hasOwn(current, key)) {
+          current = current[key];
+        } else {
+          return null;
+        }
+      }
+      return current;
+    });
+    return makeVector(values, field.type);
  }
-  return tbl;
 }

 /**
@@ -503,6 +689,31 @@ function makeVector(
 ): Vector<any> {
  if (type !== undefined) {
    // No need for inference, let Arrow create it
+    if (type instanceof Int) {
+      if (DataType.isInt(type) && type.bitWidth === 64) {
+        // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
+        values = values.map((v) => {
+          if (v === null) {
+            return v;
+          } else if (typeof v === "bigint") {
+            return v;
+          } else if (typeof v === "number") {
+            return BigInt(v);
+          } else {
+            return v;
+          }
+        });
+      } else {
+        // Similarly, bigint isn't supported for 16 or 32-bit ints.
+        values = values.map((v) => {
+          if (typeof v == "bigint") {
+            return Number(v);
+          } else {
+            return v;
+          }
+        });
+      }
+    }
    return vectorFromArray(values, type);
  }
  if (values.length === 0) {
@@ -609,6 +820,14 @@ async function applyEmbeddings<T>(
    return table;
  }

+  let schemaMetadata = schema?.metadata || new Map<string, string>();
+
+  if (!(embeddings == null || embeddings === undefined)) {
+    const registry = getRegistry();
+    const embeddingMetadata = registry.getTableMetadata([embeddings]);
+    schemaMetadata = new Map([...schemaMetadata, ...embeddingMetadata]);
+  }
+
  // Convert from ArrowTable to Record<String, Vector>
  const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
    const name = table.schema.fields[idx].name;
@@ -677,15 +896,21 @@ async function applyEmbeddings<T>(
    newColumns[destColumn] = makeVector(vectors, destType);
  }

-  const newTable = new ArrowTable(newColumns);
+  let newTable = new ArrowTable(newColumns);
  if (schema != null) {
    if (schema.fields.find((f) => f.name === destColumn) === undefined) {
      throw new Error(
        `When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`,
      );
    }
-    return alignTable(newTable, schema as Schema);
+    newTable = alignTable(newTable, schema as Schema);
  }
+
+  newTable = new ArrowTable(
+    new Schema(newTable.schema.fields, schemaMetadata),
+    newTable.batches,
+  );
+
  return newTable;
 }

@@ -900,7 +1125,7 @@ function validateSchemaEmbeddings(
  schema: Schema,
  data: Array<Record<string, unknown>>,
  embeddings: EmbeddingFunctionConfig | undefined,
-) {
+): Schema {
  const fields = [];
  const missingEmbeddingFields = [];

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Lance Release	40f0dbb64d	Bump version: 0.19.1-beta.1 → 0.19.1-beta.2	2025-02-13 04:39:19 +00:00
BubbleCal	3b19e96ae7	fix: panic when field id doesn't equal to field index (#2116 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-02-13 12:38:35 +08:00
Will Jones	78a17ad54c	chore: improve dev instructions for Python (#2088 ) Closes #2042	2025-02-12 14:08:52 -08:00
Lance Release	a8e6b491e2	Updating package-lock.json	2025-02-11 22:05:54 +00:00
Lance Release	cea541ca46	Updating package-lock.json	2025-02-11 20:56:22 +00:00
Lance Release	873ffc1042	Updating package-lock.json	2025-02-11 20:56:05 +00:00
Lance Release	83273ad997	Bump version: 0.16.1-beta.0 → 0.16.1-beta.1	2025-02-11 20:55:43 +00:00
Lance Release	d18d63c69d	Bump version: 0.19.1-beta.0 → 0.19.1-beta.1	2025-02-11 20:55:23 +00:00
LuQQiu	c3e865e8d0	fix: fix index out of bound in load indices (#2108 ) panicked at 'index out of bounds: the len is 24 but the index is 25':Lancedb/rust/lancedb/src/index/vector.rs:26\n load_indices() on the old manifest while use the newer manifest to get column names could result in index out of bound if some columns are removed from the new version. This change reduce the possibility of index out of bound operation but does not fully remove it. Better that lance can directly provide column name info so no need extra calls to get column name but that require modify the public APIs	2025-02-11 12:54:11 -08:00
Weston Pace	a7755cb313	docs: standardize node example prints (#2080 ) Minor cleanup to help debug future CI failures	2025-02-11 08:26:29 -08:00
BubbleCal	3490f3456f	chore: upgrade lance to 0.23.1-beta.2 (#2109 )	2025-02-11 23:57:56 +08:00
Lance Release	0a1d0693e1	Updating package-lock.json	2025-02-07 20:06:22 +00:00
Lance Release	fd330b4b4b	Updating package-lock.json	2025-02-07 19:28:01 +00:00
Lance Release	d4e9fc08e0	Updating package-lock.json	2025-02-07 19:27:44 +00:00
Lance Release	3626f2f5e1	Bump version: 0.16.0 → 0.16.1-beta.0	2025-02-07 19:27:26 +00:00
Lance Release	e64712cfa5	Bump version: 0.19.0 → 0.19.1-beta.0	2025-02-07 19:27:07 +00:00
Wyatt Alt	3e3118f85c	feat: update lance dependency to 0.23.1-beta.1 (#2102 )	2025-02-07 10:56:01 -08:00
Lance Release	592598a333	Updating package-lock.json	2025-02-07 18:50:53 +00:00
Lance Release	5ad21341c9	Updating package-lock.json	2025-02-07 17:34:04 +00:00
Lance Release	6e08caa091	Updating package-lock.json	2025-02-07 17:33:48 +00:00
Lance Release	7e259d8b0f	Bump version: 0.16.0-beta.0 → 0.16.0	2025-02-07 17:33:13 +00:00
Lance Release	e84f747464	Bump version: 0.15.1-beta.3 → 0.16.0-beta.0	2025-02-07 17:33:08 +00:00
Lance Release	998cd43fe6	Bump version: 0.19.0-beta.0 → 0.19.0	2025-02-07 17:32:26 +00:00
Lance Release	4bc7eebe61	Bump version: 0.18.1-beta.4 → 0.19.0-beta.0	2025-02-07 17:32:26 +00:00
Will Jones	2e3b34e79b	feat(node): support inserting and upserting subschemas (#2100 ) Fixes #2095 Closes #1832	2025-02-07 09:30:18 -08:00
Will Jones	e7574698eb	feat: upgrade Lance to 0.23.0 (#2101 ) Upstream changelog: https://github.com/lancedb/lance/releases/tag/v0.23.0	2025-02-07 07:58:07 -08:00
Will Jones	801a9e5f6f	feat(python): streaming larger-than-memory writes (#2094 ) Makes our preprocessing pipeline do transforms in streaming fashion, so users can do larger-then-memory writes. Closes #2082	2025-02-06 16:37:30 -08:00
Weston Pace	4e5fbe6c99	fix: ensure metadata erased from schema call in table provider (#2099 ) This also adds a basic unit test for the table provider	2025-02-06 15:30:20 -08:00
Weston Pace	1a449fa49e	refactor: rename drop_db / drop_database to drop_all_tables, expose database from connection (#2098 ) If we start supporting external catalogs then "drop database" may be misleading (and not possible). We should be more clear that this is a utility method to drop all tables. This is also a nice chance for some consistency cleanup as it was `drop_db` in rust, `drop_database` in python, and non-existent in typescript. This PR also adds a public accessor to get the database trait from a connection. BREAKING CHANGE: the `drop_database` / `drop_db` methods are now deprecated.	2025-02-06 13:22:28 -08:00
Weston Pace	6bf742c759	feat: expose table trait (#2097 ) Similar to `c269524b2f` this PR reworks and exposes an internal trait (this time `TableInternal`) to be a public trait. These two PRs together should make it possible for others to integrate LanceDB on top of other catalogs. This PR also adds a basic `TableProvider` implementation for tables, although some work still needs to be done here (pushdown not yet enabled).	2025-02-05 18:13:51 -08:00
Ryan Green	ef3093bc23	feat: drop_index() remote implementation (#2093 ) Support drop_index operation in remote table.	2025-02-05 10:06:19 -03:30
Will Jones	16851389ea	feat: extra headers parameter in client options (#2091 ) Closes #1106 Unfortunately, these need to be set at the connection level. I investigated whether if we let users provide a callback they could use `AsyncLocalStorage` to access their context. However, it doesn't seem like NAPI supports this right now. I filed an issue: https://github.com/napi-rs/napi-rs/issues/2456	2025-02-04 17:26:45 -08:00
Weston Pace	c269524b2f	feat!: refactor ConnectionInternal into a Database trait (#2067 ) This opens up the door for more custom database implementations than the two we have today. The biggest change should be inivisble: `ConnectionInternal` has been renamed to `Database`, made public, and refactored However, there are a few breaking changes. `data_storage_version` and `enable_v2_manifest_paths` have been moved from options on `create_table` to options for the database which are now set via `storage_options`. Before: ``` db = connect(uri) tbl = db.create_table("my_table", data, data_storage_version="legacy", enable_v2_manifest_paths=True) ``` After: ``` db = connect(uri, storage_options={ "new_table_enable_v2_manifest_paths": "true", "new_table_data_storage_version": "legacy" }) tbl = db.create_table("my_table", data) ``` BREAKING CHANGE: the data_storage_version, enable_v2_manifest_paths options have moved from options to create_table to storage_options. BREAKING CHANGE: the use_legacy_format option has been removed, data_storage_version has replaced it for some time now	2025-02-04 14:35:14 -08:00
Lance Release	f6eef14313	Bump version: 0.18.1-beta.3 → 0.18.1-beta.4	2025-02-04 17:25:52 +00:00
Rob Meng	32716adaa3	chore: bump lance version (#2092 )	2025-02-04 12:25:05 -05:00
Lance Release	5e98b7f4c0	Updating package-lock.json	2025-02-01 02:27:43 +00:00
Lance Release	3f2589c11f	Updating package-lock.json	2025-02-01 01:22:22 +00:00
Lance Release	e3b99694d6	Updating package-lock.json	2025-02-01 01:22:05 +00:00
Lance Release	9d42dc349c	Bump version: 0.15.1-beta.2 → 0.15.1-beta.3	2025-02-01 01:21:28 +00:00
Lance Release	482f1ee1d3	Bump version: 0.18.1-beta.2 → 0.18.1-beta.3	2025-02-01 01:20:49 +00:00
Will Jones	2f39274a66	feat: upgrade lance to 0.23.0-beta.4 (#2089 ) Upstream changelog: https://github.com/lancedb/lance/releases/tag/v0.23.0-beta.4	2025-01-31 17:20:15 -08:00
Will Jones	2fc174f532	docs: add sync/async tabs to quickstart (#2087 ) Closes #2033	2025-01-31 15:43:54 -08:00
Will Jones	dba85f4d6f	docs: user guide for merge insert (#2083 ) Closes #2062	2025-01-31 10:03:21 -08:00
Jeff Simpson	555fa26147	fix(rust): add embedding_registry on open_table (#2086 ) # Description Fix for: https://github.com/lancedb/lancedb/issues/1581 This is the same implementation as https://github.com/lancedb/lancedb/pull/1781 but with the addition of a unit test and rustfmt.	2025-01-31 08:48:02 -08:00
Will Jones	e05c0cd87e	ci(node): check docs in CI (#2084 ) * Make `npm run docs` fail if there are any warnings. This will catch items missing from the API reference. * Add a check in our CI to make sure `npm run dos` runs without warnings and doesn't generate any new files (indicating it might be out-of-date. * Hide constructors that aren't user facing. * Remove unused enum `WriteMode`. Closes #2068	2025-01-30 16:06:06 -08:00
Lance Release	25c17ebf4e	Updating package-lock.json	2025-01-30 18:24:59 +00:00
Lance Release	87b12b57dc	Updating package-lock.json	2025-01-30 17:33:15 +00:00
Lance Release	3dc9b71914	Updating package-lock.json	2025-01-30 17:32:59 +00:00
Lance Release	2622f34d1a	Bump version: 0.15.1-beta.1 → 0.15.1-beta.2	2025-01-30 17:32:33 +00:00
Will Jones	a677a4b651	ci: fix arm64 windows cross compile build (#2081 ) * Adds a CI job to check the cross compiled Windows ARM build. * Didn't replace the test build because we need native build to run tests. But for some reason (I forget why) we need cross compiled for nodejs. * Pinned crunchy to workaround https://github.com/eira-fransham/crunchy/issues/13 This is needed to fix failure from https://github.com/lancedb/lancedb/actions/runs/13020773184/job/36320719331	2025-01-30 09:24:20 -08:00
Weston Pace	e6b4f14c1f	docs: clarify upper case characters in column names need to be escaped (#2079 )	2025-01-29 09:34:43 -08:00
Will Jones	15f8f4d627	ci: check license headers (#2076 ) Based on the same workflow in Lance.	2025-01-29 08:27:07 -08:00
Will Jones	6526d6c3b1	ci(rust): caching improvements (up to 2.8x faster builds) (#2075 ) Some Rust jobs (such as [Rust/linux](https://github.com/lancedb/lancedb/actions/runs/13019232960/job/36315830779)) take almost minutes. This can be a bit of a bottleneck. * Two fixes to make caches more effective * Check in `Cargo.lock` so that dependencies don't change much between runs * Added a new CI job to validate we can build without a lockfile * Altered build commands so they don't have contradictory features and therefore don't trigger multiple builds Sadly, I don't think there's much to be done for windows-arm64, as much of the compile time is because the base image is so bare we need to install the build tools ourselves.	2025-01-29 08:26:45 -08:00
Lance Release	da4d7e3ca7	Updating package-lock.json	2025-01-28 22:32:20 +00:00
Lance Release	8fbadca9aa	Updating package-lock.json	2025-01-28 22:32:05 +00:00
Lance Release	29120219cf	Bump version: 0.15.1-beta.0 → 0.15.1-beta.1	2025-01-28 22:31:39 +00:00
Lance Release	a9897d9d85	Bump version: 0.18.1-beta.1 → 0.18.1-beta.2	2025-01-28 22:31:14 +00:00
Will Jones	acda7a4589	feat: upgrade lance to v0.23.0-beta.3 (#2074 ) This includes several bugfixes for `merge_insert` and null handling in vector search. https://github.com/lancedb/lance/releases/tag/v0.23.0-beta.3	2025-01-28 14:00:06 -08:00
Vaibhav	dac0857745	feat: add `distance_type()` parameter to python sync query builders and `metric()` as an alias (#2073 ) This PR aims to fix #2047 by doing the following things: - Add a distance_type parameter to the sync query builders of Python SDK. - Make metric an alias to distance_type.	2025-01-28 13:59:53 -08:00
Will Jones	0a9e1eab75	fix(node): `createTable()` should save embeddings, and `mergeInsert` should use them (#2065 ) * `createTable()` now saves embeddings in the schema metadata. Previously, it would drop them. (`createEmptyTable()` was already tested and worked.) * `mergeInsert()` now uses embeddings. Fixes #2066	2025-01-28 12:38:50 -08:00
V	d999d72c8d	docs: pandas example (#2044 ) Fix example for section ## From pandas DataFrame	2025-01-24 11:37:47 -08:00
Lance Release	de4720993e	Updating package-lock.json	2025-01-23 23:02:20 +00:00
Lance Release	6c14a307e2	Updating package-lock.json	2025-01-23 23:02:03 +00:00
Lance Release	43747278c8	Bump version: 0.15.0 → 0.15.1-beta.0	2025-01-23 23:01:40 +00:00