Bump version: 0.25.2-beta.3 → 0.25.2

Bump version: 0.25.2-beta.2 → 0.25.2-beta.3
feat: upgrade lance to 0.38.2 (#2705 )
2025-12-24 22:09:58 +00:00 · 2025-10-08 18:11:45 +00:00 · 2025-10-08 18:11:45 +00:00 · 2025-10-08 09:59:28 -07:00 · 2025-10-08 21:35:48 +05:30 · 2025-10-08 04:41:30 -07:00
62 changed files with 1550 additions and 2217 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.22.2-beta.0"
+current_version = "0.22.2-beta.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/actions/create-failure-issue/action.yml
+++ b/.github/actions/create-failure-issue/action.yml
@@ -0,0 +1,45 @@
+name: Create Failure Issue
+description: Creates a GitHub issue if any jobs in the workflow failed
+
+inputs:
+  job-results:
+    description: 'JSON string of job results from needs context'
+    required: true
+  workflow-name:
+    description: 'Name of the workflow'
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Check for failures and create issue
+      shell: bash
+      env:
+        JOB_RESULTS: ${{ inputs.job-results }}
+        WORKFLOW_NAME: ${{ inputs.workflow-name }}
+        RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        # Check if any job failed
+        if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
+          echo "Detected job failures, creating issue..."
+
+          # Extract failed job names
+          FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
+
+          # Create issue with workflow name, failed jobs, and run URL
+          gh issue create \
+            --title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
+            --body "The workflow **$WORKFLOW_NAME** failed during execution.
+
+        **Failed jobs:** $FAILED_JOBS
+
+        **Run URL:** $RUN_URL
+
+        Please investigate the failed jobs and address any issues." \
+            --label "ci"
+
+          echo "Issue created successfully"
+        else
+          echo "No job failures detected, skipping issue creation"
+        fi
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -38,3 +38,17 @@ jobs:
      - name: Publish the package
        run: |
          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [build]
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    permissions:
+      contents: read
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -58,7 +58,7 @@ jobs:
          cache: 'npm'
          cache-dependency-path: docs/package-lock.json
      - name: Install node dependencies
-        working-directory: node
+        working-directory: nodejs
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -43,7 +43,6 @@ jobs:
      - uses: Swatinem/rust-cache@v2
      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
-          toolchain: "1.81.0"
          cache-workspaces: "./java/core/lancedb-jni"
          # Disable full debug symbol generation to speed up CI build and keep memory down
          # "1" means line tables only, which is useful for panic tracebacks.
@@ -112,3 +111,17 @@ jobs:
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [linux-arm64, linux-x86, macos-arm64]
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    permissions:
+      contents: read
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -6,6 +6,7 @@ on:
      - main
  pull_request:
    paths:
+      - Cargo.toml
      - nodejs/**
      - .github/workflows/nodejs.yml
      - docker-compose.yml
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -365,3 +365,17 @@ jobs:
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [build-lancedb, test-lancedb, publish]
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    permissions:
+      contents: read
+      issues: write
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -173,3 +173,17 @@ jobs:
          generate_release_notes: false
          name: Python LanceDB v${{ steps.extract_version.outputs.version }}
          body: ${{ steps.python_release_notes.outputs.changelog }}
+  report-failure:
+    name: Report Workflow Failure
+    runs-on: ubuntu-latest
+    needs: [linux, mac, windows]
+    permissions:
+      contents: read
+      issues: write
+    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/create-failure-issue
+        with:
+          job-results: ${{ toJSON(needs) }}
+          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -6,6 +6,7 @@ on:
      - main
  pull_request:
    paths:
+      - Cargo.toml
      - python/**
      - .github/workflows/python.yml

--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -125,6 +125,9 @@ jobs:
      - name: Run examples
        run: cargo run --example simple --locked
      - name: Run remote tests
+        # Running this requires access to secrets, so skip if this is
+        # a PR from a fork.
+        if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
        run: make -C ./lancedb remote-tests

  macos:
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,31 +15,30 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = { "version" = "=0.37.0", default-features = false, "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
-lance-namespace = "0.0.15"
+lance = { "version" = "=0.38.2", default-features = false, "features" = ["dynamodb"] }
+lance-io = { "version" = "=0.38.2", default-features = false }
+lance-index = "=0.38.2"
+lance-linalg = "=0.38.2"
+lance-table = "=0.38.2"
+lance-testing = "=0.38.2"
+lance-datafusion = "=0.38.2"
+lance-encoding = "=0.38.2"
+lance-namespace = "0.0.18"
 # Note that this one does not include pyarrow
-arrow = { version = "55.1", optional = false }
-arrow-array = "55.1"
-arrow-data = "55.1"
-arrow-ipc = "55.1"
-arrow-ord = "55.1"
-arrow-schema = "55.1"
-arrow-arith = "55.1"
-arrow-cast = "55.1"
+arrow = { version = "56.2", optional = false }
+arrow-array = "56.2"
+arrow-data = "56.2"
+arrow-ipc = "56.2"
+arrow-ord = "56.2"
+arrow-schema = "56.2"
+arrow-cast = "56.2"
 async-trait = "0"
-datafusion = { version = "49.0", default-features = false }
-datafusion-catalog = "49.0"
-datafusion-common = { version = "49.0", default-features = false }
-datafusion-execution = "49.0"
-datafusion-expr = "49.0"
-datafusion-physical-plan = "49.0"
+datafusion = { version = "50.1", default-features = false }
+datafusion-catalog = "50.1"
+datafusion-common = { version = "50.1", default-features = false }
+datafusion-execution = "50.1"
+datafusion-expr = "50.1"
+datafusion-physical-plan = "50.1"
 env_logger = "0.11"
 half = { "version" = "2.6.0", default-features = false, features = [
    "num-traits",
@@ -52,7 +51,6 @@ pin-project = "1.0.7"
 snafu = "0.8"
 url = "2"
 num-traits = "0.2"
-rand = "0.9"
 regex = "1.10"
 lazy_static = "1"
 semver = "1.0.25"
@@ -60,7 +58,17 @@ crunchy = "0.2.4"
 # Temporary pins to work around downstream issues
 # https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
 chrono = "=0.4.41"
-# https://github.com/RustCrypto/formats/issues/1684
-base64ct = "=1.6.0"
 # Workaround for: https://github.com/Lokathor/bytemuck/issues/306
 bytemuck_derive = ">=1.8.1, <1.9.0"
+
+# This is only needed when we reference preview releases of lance
+# [patch.crates-io]
+# # Force to use the same lance version as the rest of the project to avoid duplicate dependencies
+# lance = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-io = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-index = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-linalg = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-table = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-testing = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-datafusion = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
+# lance-encoding = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
--- a/ci/run_with_test_connection.sh
+++ b/ci/run_with_test_connection.sh
@@ -16,29 +16,46 @@ check_command_exists() {
 }

 if [[ ! -e ./lancedb ]]; then
-    ARCH="x64"
-    if [[ $OSTYPE == 'darwin'* ]]; then
-        UNAME=$(uname -m)
-        if [[ $UNAME == 'arm64' ]]; then
-            ARCH='arm64'
-        fi
-        OSTYPE="macos"
-    elif [[ $OSTYPE == 'linux'* ]]; then
-        if [[ $UNAME == 'aarch64' ]]; then
-            ARCH='arm64'
-        fi
-        OSTYPE="linux"
+    if [[ -v SOPHON_READ_TOKEN ]]; then
+        INPUT="lancedb-linux-x64"
+        gh release \
+            --repo lancedb/lancedb \
+            download ci-support-binaries \
+            --pattern "${INPUT}" \
+            || die "failed to fetch cli."
+        check_command_exists openssl
+        openssl enc -aes-256-cbc \
+            -d -pbkdf2 \
+            -pass "env:SOPHON_READ_TOKEN" \
+            -in "${INPUT}" \
+            -out ./lancedb-linux-x64.tar.gz \
+            || die "openssl failed"
+        TARGET="${INPUT}.tar.gz"
    else
-        die "unknown OSTYPE: $OSTYPE"
-    fi
+        ARCH="x64"
+        if [[ $OSTYPE == 'darwin'* ]]; then
+            UNAME=$(uname -m)
+            if [[ $UNAME == 'arm64' ]]; then
+                ARCH='arm64'
+            fi
+            OSTYPE="macos"
+        elif [[ $OSTYPE == 'linux'* ]]; then
+            if [[ $UNAME == 'aarch64' ]]; then
+                ARCH='arm64'
+            fi
+            OSTYPE="linux"
+        else
+            die "unknown OSTYPE: $OSTYPE"
+        fi

-    check_command_exists gh
-    TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
-    gh release \
-        --repo lancedb/sophon \
-        download lancedb-cli-v0.0.3 \
-        --pattern "${TARGET}" \
-        || die "failed to fetch cli."
+        check_command_exists gh
+        TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
+        gh release \
+            --repo lancedb/sophon \
+            download lancedb-cli-v0.0.3 \
+            --pattern "${TARGET}" \
+            || die "failed to fetch cli."
+    fi

    check_command_exists tar
    tar xvf "${TARGET}" || die "tar failed."
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -117,7 +117,7 @@ def update_cargo_toml(line_updater):
    lance_line = ""
    is_parsing_lance_line = False
    for line in lines:
-        if line.startswith("lance"):
+        if line.startswith("lance") and not line.startswith("lance-namespace"):
            # Check if this is a single-line or multi-line entry
            # Single-line entries either:
            # 1. End with } (complete inline table)
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -84,6 +84,7 @@ plugins:
        'examples.md': 'https://lancedb.com/docs/tutorials/'
        'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
        'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
+        'guides/storage.md': 'https://lancedb.com/docs/storage/integrations'



@@ -402,4 +403,4 @@ extra:
    - icon: fontawesome/brands/x-twitter
      link: https://twitter.com/lancedb
    - icon: fontawesome/brands/linkedin
-      link: https://www.linkedin.com/company/lancedb
+      link: https://www.linkedin.com/company/lancedb
--- a/docs/src/js/classes/Index.md
+++ b/docs/src/js/classes/Index.md
@@ -194,37 +194,6 @@ currently is also a memory intensive operation.

 ***

-### ivfRq()
-
-```ts
-static ivfRq(options?): Index
-```
-
-Create an IvfRq index
-
-IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
-and organizes them into IVF partitions.
-
-The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
-The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
-between index size (and thus search speed) and index accuracy.
-
-The partitioning process is called IVF and the `num_partitions` parameter controls how
-many groups to create.
-
-Note that training an IVF RQ index on a large dataset is a slow operation and
-currently is also a memory intensive operation.
-
-#### Parameters
-
-* **options?**: `Partial`&lt;[`IvfRqOptions`](../interfaces/IvfRqOptions.md)&gt;
-
-#### Returns
-
-[`Index`](Index.md)
-
-***
-
 ### labelList()

 ```ts
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -68,7 +68,6 @@
 - [IndexStatistics](interfaces/IndexStatistics.md)
 - [IvfFlatOptions](interfaces/IvfFlatOptions.md)
 - [IvfPqOptions](interfaces/IvfPqOptions.md)
- [IvfRqOptions](interfaces/IvfRqOptions.md)
 - [MergeResult](interfaces/MergeResult.md)
 - [OpenTableOptions](interfaces/OpenTableOptions.md)
 - [OptimizeOptions](interfaces/OptimizeOptions.md)
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.2-beta.0</version>
+        <version>0.22.2-beta.2</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.2-beta.0</version>
+        <version>0.22.2-beta.2</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.22.2-beta.0</version>
+    <version>0.22.2-beta.2</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.22.2-beta.0"
+version = "0.22.2-beta.2"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/sanitize.test.ts
+++ b/nodejs/test/sanitize.test.ts
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+import * as arrow from "../lancedb/arrow";
+import { sanitizeField, sanitizeType } from "../lancedb/sanitize";
+
+describe("sanitize", function () {
+  describe("sanitizeType function", function () {
+    it("should handle type objects", function () {
+      const type = new arrow.Int32();
+      const result = sanitizeType(type);
+
+      expect(result.typeId).toBe(arrow.Type.Int);
+      expect((result as arrow.Int).bitWidth).toBe(32);
+      expect((result as arrow.Int).isSigned).toBe(true);
+
+      const floatType = {
+        typeId: 3, // Type.Float = 3
+        precision: 2,
+        toString: () => "Float",
+        isFloat: true,
+        isFixedWidth: true,
+      };
+
+      const floatResult = sanitizeType(floatType);
+      expect(floatResult).toBeInstanceOf(arrow.DataType);
+      expect(floatResult.typeId).toBe(arrow.Type.Float);
+
+      const floatResult2 = sanitizeType({ ...floatType, typeId: () => 3 });
+      expect(floatResult2).toBeInstanceOf(arrow.DataType);
+      expect(floatResult2.typeId).toBe(arrow.Type.Float);
+    });
+
+    const allTypeNameTestCases = [
+      ["null", new arrow.Null()],
+      ["binary", new arrow.Binary()],
+      ["utf8", new arrow.Utf8()],
+      ["bool", new arrow.Bool()],
+      ["int8", new arrow.Int8()],
+      ["int16", new arrow.Int16()],
+      ["int32", new arrow.Int32()],
+      ["int64", new arrow.Int64()],
+      ["uint8", new arrow.Uint8()],
+      ["uint16", new arrow.Uint16()],
+      ["uint32", new arrow.Uint32()],
+      ["uint64", new arrow.Uint64()],
+      ["float16", new arrow.Float16()],
+      ["float32", new arrow.Float32()],
+      ["float64", new arrow.Float64()],
+      ["datemillisecond", new arrow.DateMillisecond()],
+      ["dateday", new arrow.DateDay()],
+      ["timenanosecond", new arrow.TimeNanosecond()],
+      ["timemicrosecond", new arrow.TimeMicrosecond()],
+      ["timemillisecond", new arrow.TimeMillisecond()],
+      ["timesecond", new arrow.TimeSecond()],
+      ["intervaldaytime", new arrow.IntervalDayTime()],
+      ["intervalyearmonth", new arrow.IntervalYearMonth()],
+      ["durationnanosecond", new arrow.DurationNanosecond()],
+      ["durationmicrosecond", new arrow.DurationMicrosecond()],
+      ["durationmillisecond", new arrow.DurationMillisecond()],
+      ["durationsecond", new arrow.DurationSecond()],
+    ] as const;
+
+    it.each(allTypeNameTestCases)(
+      'should map type name "%s" to %s',
+      function (name, expected) {
+        const result = sanitizeType(name);
+        expect(result).toBeInstanceOf(expected.constructor);
+      },
+    );
+
+    const caseVariationTestCases = [
+      ["NULL", new arrow.Null()],
+      ["Utf8", new arrow.Utf8()],
+      ["FLOAT32", new arrow.Float32()],
+      ["DaTedAy", new arrow.DateDay()],
+    ] as const;
+
+    it.each(caseVariationTestCases)(
+      'should be case insensitive for type name "%s" mapped to %s',
+      function (name, expected) {
+        const result = sanitizeType(name);
+        expect(result).toBeInstanceOf(expected.constructor);
+      },
+    );
+
+    it("should throw error for unrecognized type name", function () {
+      expect(() => sanitizeType("invalid_type")).toThrow(
+        "Unrecognized type name in schema: invalid_type",
+      );
+    });
+  });
+
+  describe("sanitizeField function", function () {
+    it("should handle field with string type name", function () {
+      const field = sanitizeField({
+        name: "string_field",
+        type: "utf8",
+        nullable: true,
+        metadata: new Map([["key", "value"]]),
+      });
+
+      expect(field).toBeInstanceOf(arrow.Field);
+      expect(field.name).toBe("string_field");
+      expect(field.type).toBeInstanceOf(arrow.Utf8);
+      expect(field.nullable).toBe(true);
+      expect(field.metadata?.get("key")).toBe("value");
+    });
+
+    it("should handle field with type object", function () {
+      const floatType = {
+        typeId: 3, // Float
+        precision: 32,
+      };
+
+      const field = sanitizeField({
+        name: "float_field",
+        type: floatType,
+        nullable: false,
+      });
+
+      expect(field).toBeInstanceOf(arrow.Field);
+      expect(field.name).toBe("float_field");
+      expect(field.type).toBeInstanceOf(arrow.DataType);
+      expect(field.type.typeId).toBe(arrow.Type.Float);
+      expect((field.type as arrow.Float64).precision).toBe(32);
+      expect(field.nullable).toBe(false);
+    });
+
+    it("should handle field with direct Type instance", function () {
+      const field = sanitizeField({
+        name: "bool_field",
+        type: new arrow.Bool(),
+        nullable: true,
+      });
+
+      expect(field).toBeInstanceOf(arrow.Field);
+      expect(field.name).toBe("bool_field");
+      expect(field.type).toBeInstanceOf(arrow.Bool);
+      expect(field.nullable).toBe(true);
+    });
+
+    it("should throw error for invalid field object", function () {
+      expect(() =>
+        sanitizeField({
+          type: "int32",
+          nullable: true,
+        }),
+      ).toThrow(
+        "The field passed in is missing a `type`/`name`/`nullable` property",
+      );
+
+      // Invalid type
+      expect(() =>
+        sanitizeField({
+          name: "invalid",
+          type: { invalid: true },
+          nullable: true,
+        }),
+      ).toThrow("Expected a Type to have a typeId property");
+
+      // Invalid nullable
+      expect(() =>
+        sanitizeField({
+          name: "invalid_nullable",
+          type: "int32",
+          nullable: "not a boolean",
+        }),
+      ).toThrow("The field passed in had a non-boolean `nullable` property");
+    });
+
+    it("should report error for invalid type name", function () {
+      expect(() =>
+        sanitizeField({
+          name: "invalid_field",
+          type: "invalid_type",
+          nullable: true,
+        }),
+      ).toThrow(
+        "Unable to sanitize type for field: invalid_field due to error: Error: Unrecognized type name in schema: invalid_type",
+      );
+    });
+  });
+});
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -10,7 +10,13 @@ import * as arrow16 from "apache-arrow-16";
 import * as arrow17 from "apache-arrow-17";
 import * as arrow18 from "apache-arrow-18";

-import { MatchQuery, PhraseQuery, Table, connect } from "../lancedb";
+import {
+  Connection,
+  MatchQuery,
+  PhraseQuery,
+  Table,
+  connect,
+} from "../lancedb";
 import {
  Table as ArrowTable,
  Field,
@@ -21,6 +27,8 @@ import {
  Int64,
  List,
  Schema,
+  SchemaLike,
+  Type,
  Uint8,
  Utf8,
  makeArrowTable,
@@ -211,8 +219,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      },
    );

-    // TODO: https://github.com/lancedb/lancedb/issues/1832
-    it.skip("should be able to omit nullable fields", async () => {
+    it("should be able to omit nullable fields", async () => {
      const db = await connect(tmpDir.name);
      const schema = new arrow.Schema([
        new arrow.Field(
@@ -236,23 +243,36 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      await table.add([data3]);

      let res = await table.query().limit(10).toArray();
-      const resVector = res.map((r) => r.get("vector").toArray());
+      const resVector = res.map((r) =>
+        r.vector ? Array.from(r.vector) : null,
+      );
      expect(resVector).toEqual([null, data2.vector, data3.vector]);
-      const resItem = res.map((r) => r.get("item").toArray());
+      const resItem = res.map((r) => r.item);
      expect(resItem).toEqual(["foo", null, "bar"]);
-      const resPrice = res.map((r) => r.get("price").toArray());
+      const resPrice = res.map((r) => r.price);
      expect(resPrice).toEqual([10.0, 2.0, 3.0]);

      const data4 = { item: "foo" };
      // We can't omit a column if it's not nullable
-      await expect(table.add([data4])).rejects.toThrow("Invalid user input");
+      await expect(table.add([data4])).rejects.toThrow(
+        "Append with different schema",
+      );

      // But we can alter columns to make them nullable
      await table.alterColumns([{ path: "price", nullable: true }]);
      await table.add([data4]);

-      res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
-      expect(res).toEqual([data1, data2, data3, data4]);
+      res = (await table.query().limit(10).toArray()).map((r) => ({
+        ...r.toJSON(),
+        vector: r.vector ? Array.from(r.vector) : null,
+      }));
+      // Rust fills missing nullable fields with null
+      expect(res).toEqual([
+        { ...data1, vector: null },
+        { ...data2, item: null },
+        data3,
+        { ...data4, price: null, vector: null },
+      ]);
    });

    it("should be able to insert nullable data for non-nullable fields", async () => {
@@ -330,6 +350,43 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      const table = await db.createTable("my_table", data);
      expect(await table.countRows()).toEqual(2);
    });
+
+    it("should allow undefined and omitted nullable vector fields", async () => {
+      // Test for the bug: can't pass undefined or omit vector column
+      const db = await connect("memory://");
+      const schema = new arrow.Schema([
+        new arrow.Field("id", new arrow.Int32(), true),
+        new arrow.Field(
+          "vector",
+          new arrow.FixedSizeList(
+            32,
+            new arrow.Field("item", new arrow.Float32(), true),
+          ),
+          true, // nullable = true
+        ),
+      ]);
+      const table = await db.createEmptyTable("test_table", schema);
+
+      // Should not throw error for undefined value
+      await table.add([{ id: 0, vector: undefined }]);
+
+      // Should not throw error for omitted field
+      await table.add([{ id: 1 }]);
+
+      // Should still work for null
+      await table.add([{ id: 2, vector: null }]);
+
+      // Should still work for actual vector
+      const testVector = new Array(32).fill(0.5);
+      await table.add([{ id: 3, vector: testVector }]);
+      expect(await table.countRows()).toEqual(4);
+
+      const res = await table.query().limit(10).toArray();
+      const resVector = res.map((r) =>
+        r.vector ? Array.from(r.vector) : null,
+      );
+      expect(resVector).toEqual([null, null, null, testVector]);
+    });
  },
 );

@@ -804,15 +861,6 @@ describe("When creating an index", () => {
    });
  });

-  it("should be able to create IVF_RQ", async () => {
-    await tbl.createIndex("vec", {
-      config: Index.ivfRq({
-        numPartitions: 10,
-        numBits: 1,
-      }),
-    });
-  });
-
  it("should allow me to replace (or not) an existing index", async () => {
    await tbl.createIndex("id");
    // Default is replace=true
@@ -1463,7 +1511,9 @@ describe("when optimizing a dataset", () => {

  it("delete unverified", async () => {
    const version = await table.version();
-    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${version - 1}.manifest`;
+    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
+      version - 1
+    }.manifest`;
    fs.rmSync(versionFile);

    let stats = await table.optimize({ deleteUnverified: false });
@@ -1977,3 +2027,52 @@ describe("column name options", () => {
    expect(results2.length).toBe(10);
  });
 });
+
+describe("when creating an empty table", () => {
+  let con: Connection;
+  beforeEach(async () => {
+    const tmpDir = tmp.dirSync({ unsafeCleanup: true });
+    con = await connect(tmpDir.name);
+  });
+  afterEach(() => {
+    con.close();
+  });
+
+  it("can create an empty table from an arrow Schema", async () => {
+    const schema = new Schema([
+      new Field("id", new Int64()),
+      new Field("vector", new Float64()),
+    ]);
+    const table = await con.createEmptyTable("test", schema);
+    const actualSchema = await table.schema();
+    expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
+    expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
+    expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
+    expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
+  });
+
+  it("can create an empty table from schema that specifies field types by name", async () => {
+    const schemaLike = {
+      fields: [
+        {
+          name: "id",
+          type: "int64",
+          nullable: true,
+        },
+        {
+          name: "vector",
+          type: "float64",
+          nullable: true,
+        },
+      ],
+      metadata: new Map(),
+      names: ["id", "vector"],
+    } satisfies SchemaLike;
+    const table = await con.createEmptyTable("test", schemaLike);
+    const actualSchema = await table.schema();
+    expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
+    expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
+    expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
+    expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
+  });
+});
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -73,7 +73,7 @@ export type FieldLike =
  | {
      type: string;
      name: string;
-      nullable?: boolean;
+      nullable: boolean;
      metadata?: Map<string, string>;
    };

@@ -1285,19 +1285,36 @@ function validateSchemaEmbeddings(
    if (isFixedSizeList(field.type)) {
      field = sanitizeField(field);
      if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
+        // Check if there's an embedding function registered for this field
+        let hasEmbeddingFunction = false;
+
+        // Check schema metadata for embedding functions
        if (schema.metadata.has("embedding_functions")) {
          const embeddings = JSON.parse(
            schema.metadata.get("embedding_functions")!,
          );
-          if (
-            // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
-            embeddings.find((f: any) => f["vectorColumn"] === field.name) ===
-            undefined
-          ) {
+          // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
+          if (embeddings.find((f: any) => f["vectorColumn"] === field.name)) {
+            hasEmbeddingFunction = true;
+          }
+        }
+
+        // Check passed embedding function parameter
+        if (embeddings && embeddings.vectorColumn === field.name) {
+          hasEmbeddingFunction = true;
+        }
+
+        // If the field is nullable AND there's no embedding function, allow undefined/omitted values
+        if (field.nullable && !hasEmbeddingFunction) {
+          fields.push(field);
+        } else {
+          // Either not nullable OR has embedding function - require explicit values
+          if (hasEmbeddingFunction) {
+            // Don't add to missingEmbeddingFields since this is expected to be filled by embedding function
+            fields.push(field);
+          } else {
            missingEmbeddingFields.push(field);
          }
-        } else {
-          missingEmbeddingFields.push(field);
        }
      } else {
        fields.push(field);
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -85,7 +85,6 @@ export {
  Index,
  IndexOptions,
  IvfPqOptions,
-  IvfRqOptions,
  IvfFlatOptions,
  HnswPqOptions,
  HnswSqOptions,
--- a/nodejs/lancedb/indices.ts
+++ b/nodejs/lancedb/indices.ts
@@ -112,77 +112,6 @@ export interface IvfPqOptions {
  sampleRate?: number;
 }

-export interface IvfRqOptions {
-  /**
-   * The number of IVF partitions to create.
-   *
-   * This value should generally scale with the number of rows in the dataset.
-   * By default the number of partitions is the square root of the number of
-   * rows.
-   *
-   * If this value is too large then the first part of the search (picking the
-   * right partition) will be slow. If this value is too small then the second
-   * part of the search (searching within a partition) will be slow.
-   */
-  numPartitions?: number;
-
-  /**
-   * Number of bits per dimension for residual quantization.
-   *
-   * This value controls how much each residual component is compressed. The more
-   * bits, the more accurate the index will be but the slower search. Typical values
-   * are small integers; the default is 1 bit per dimension.
-   */
-  numBits?: number;
-
-  /**
-   * Distance type to use to build the index.
-   *
-   * Default value is "l2".
-   *
-   * This is used when training the index to calculate the IVF partitions
-   * (vectors are grouped in partitions with similar vectors according to this
-   * distance type) and during quantization.
-   *
-   * The distance type used to train an index MUST match the distance type used
-   * to search the index. Failure to do so will yield inaccurate results.
-   *
-   * The following distance types are available:
-   *
-   * "l2" - Euclidean distance.
-   * "cosine" - Cosine distance.
-   * "dot" - Dot product.
-   */
-  distanceType?: "l2" | "cosine" | "dot";
-
-  /**
-   * Max iterations to train IVF kmeans.
-   *
-   * When training an IVF index we use kmeans to calculate the partitions. This parameter
-   * controls how many iterations of kmeans to run.
-   *
-   * The default value is 50.
-   */
-  maxIterations?: number;
-
-  /**
-   * The number of vectors, per partition, to sample when training IVF kmeans.
-   *
-   * When an IVF index is trained, we need to calculate partitions. These are groups
-   * of vectors that are similar to each other. To do this we use an algorithm called kmeans.
-   *
-   * Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
-   * random sample of the data. This parameter controls the size of the sample. The total
-   * number of vectors used to train the index is `sample_rate * num_partitions`.
-   *
-   * Increasing this value might improve the quality of the index but in most cases the
-   * default should be sufficient.
-   *
-   * The default value is 256.
-   */
-  sampleRate?: number;
-}
-
 /**
 * Options to create an `HNSW_PQ` index
 */
@@ -594,35 +523,6 @@ export class Index {
        options?.distanceType,
        options?.numPartitions,
        options?.numSubVectors,
-        options?.numBits,
-        options?.maxIterations,
-        options?.sampleRate,
-      ),
-    );
-  }
-
-  /**
-   * Create an IvfRq index
-   *
-   * IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
-   * and organizes them into IVF partitions.
-   *
-   * The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
-   * The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
-   * between index size (and thus search speed) and index accuracy.
-   *
-   * The partitioning process is called IVF and the `num_partitions` parameter controls how
-   * many groups to create.
-   *
-   * Note that training an IVF RQ index on a large dataset is a slow operation and
-   * currently is also a memory intensive operation.
-   */
-  static ivfRq(options?: Partial<IvfRqOptions>) {
-    return new Index(
-      LanceDbIndex.ivfRq(
-        options?.distanceType,
-        options?.numPartitions,
-        options?.numBits,
        options?.maxIterations,
        options?.sampleRate,
      ),
--- a/nodejs/lancedb/sanitize.ts
+++ b/nodejs/lancedb/sanitize.ts
@@ -326,6 +326,9 @@ export function sanitizeDictionary(typeLike: object) {

 // biome-ignore lint/suspicious/noExplicitAny: skip
 export function sanitizeType(typeLike: unknown): DataType<any> {
+  if (typeof typeLike === "string") {
+    return dataTypeFromName(typeLike);
+  }
  if (typeof typeLike !== "object" || typeLike === null) {
    throw Error("Expected a Type but object was null/undefined");
  }
@@ -447,7 +450,7 @@ export function sanitizeType(typeLike: unknown): DataType<any> {
    case Type.DurationSecond:
      return new DurationSecond();
    default:
-      throw new Error("Unrecoginized type id in schema: " + typeId);
+      throw new Error("Unrecognized type id in schema: " + typeId);
  }
 }

@@ -467,7 +470,15 @@ export function sanitizeField(fieldLike: unknown): Field {
      "The field passed in is missing a `type`/`name`/`nullable` property",
    );
  }
-  const type = sanitizeType(fieldLike.type);
+  let type: DataType;
+  try {
+    type = sanitizeType(fieldLike.type);
+  } catch (error: unknown) {
+    throw Error(
+      `Unable to sanitize type for field: ${fieldLike.name} due to error: ${error}`,
+      { cause: error },
+    );
+  }
  const name = fieldLike.name;
  if (!(typeof name === "string")) {
    throw Error("The field passed in had a non-string `name` property");
@@ -581,3 +592,46 @@ function sanitizeData(
    },
  );
 }
+
+const constructorsByTypeName = {
+  null: () => new Null(),
+  binary: () => new Binary(),
+  utf8: () => new Utf8(),
+  bool: () => new Bool(),
+  int8: () => new Int8(),
+  int16: () => new Int16(),
+  int32: () => new Int32(),
+  int64: () => new Int64(),
+  uint8: () => new Uint8(),
+  uint16: () => new Uint16(),
+  uint32: () => new Uint32(),
+  uint64: () => new Uint64(),
+  float16: () => new Float16(),
+  float32: () => new Float32(),
+  float64: () => new Float64(),
+  datemillisecond: () => new DateMillisecond(),
+  dateday: () => new DateDay(),
+  timenanosecond: () => new TimeNanosecond(),
+  timemicrosecond: () => new TimeMicrosecond(),
+  timemillisecond: () => new TimeMillisecond(),
+  timesecond: () => new TimeSecond(),
+  intervaldaytime: () => new IntervalDayTime(),
+  intervalyearmonth: () => new IntervalYearMonth(),
+  durationnanosecond: () => new DurationNanosecond(),
+  durationmicrosecond: () => new DurationMicrosecond(),
+  durationmillisecond: () => new DurationMillisecond(),
+  durationsecond: () => new DurationSecond(),
+} as const;
+
+type MappableTypeName = keyof typeof constructorsByTypeName;
+
+export function dataTypeFromName(typeName: string): DataType {
+  const normalizedTypeName = typeName.toLowerCase() as MappableTypeName;
+  const _constructor = constructorsByTypeName[normalizedTypeName];
+
+  if (!_constructor) {
+    throw new Error("Unrecognized type name in schema: " + typeName);
+  }
+
+  return _constructor();
+}
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.22.2-beta.0",
+	"version": "0.22.2-beta.2",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.22.2-beta.0",
+	"version": "0.22.2-beta.2",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.22.2-beta.0",
+	"version": "0.22.2-beta.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.22.2-beta.0",
+	"version": "0.22.2-beta.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.22.2-beta.0",
+	"version": "0.22.2-beta.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.22.2-beta.0",
+	"version": "0.22.2-beta.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.22.2-beta.0",
+  "version": "0.22.2-beta.2",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.22.2-beta.0",
+	"version": "0.22.2-beta.2",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.22.2-beta.0",
+  "version": "0.22.2-beta.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.22.2-beta.0",
+      "version": "0.22.2-beta.2",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.22.2-beta.0",
+  "version": "0.22.2-beta.2",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/index.rs
+++ b/nodejs/src/index.rs
@@ -6,7 +6,6 @@ use std::sync::Mutex;
 use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
 use lancedb::index::vector::{
    IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
-    IvfRqIndexBuilder,
 };
 use lancedb::index::Index as LanceDbIndex;
 use napi_derive::napi;
@@ -66,36 +65,6 @@ impl Index {
        })
    }

-    #[napi(factory)]
-    pub fn ivf_rq(
-        distance_type: Option<String>,
-        num_partitions: Option<u32>,
-        num_bits: Option<u32>,
-        max_iterations: Option<u32>,
-        sample_rate: Option<u32>,
-    ) -> napi::Result<Self> {
-        let mut ivf_rq_builder = IvfRqIndexBuilder::default();
-        if let Some(distance_type) = distance_type {
-            let distance_type = parse_distance_type(distance_type)?;
-            ivf_rq_builder = ivf_rq_builder.distance_type(distance_type);
-        }
-        if let Some(num_partitions) = num_partitions {
-            ivf_rq_builder = ivf_rq_builder.num_partitions(num_partitions);
-        }
-        if let Some(num_bits) = num_bits {
-            ivf_rq_builder = ivf_rq_builder.num_bits(num_bits);
-        }
-        if let Some(max_iterations) = max_iterations {
-            ivf_rq_builder = ivf_rq_builder.max_iterations(max_iterations);
-        }
-        if let Some(sample_rate) = sample_rate {
-            ivf_rq_builder = ivf_rq_builder.sample_rate(sample_rate);
-        }
-        Ok(Self {
-            inner: Mutex::new(Some(LanceDbIndex::IvfRq(ivf_rq_builder))),
-        })
-    }
-
    #[napi(factory)]
    pub fn ivf_flat(
        distance_type: Option<String>,
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.25.2-beta.0"
+current_version = "0.25.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.25.2-beta.0"
+version = "0.25.2"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
@@ -14,12 +14,12 @@ name = "_lancedb"
 crate-type = ["cdylib"]

 [dependencies]
-arrow = { version = "55.1", features = ["pyarrow"] }
+arrow = { version = "56.2", features = ["pyarrow"] }
 async-trait = "0.1"
 lancedb = { path = "../rust/lancedb", default-features = false }
 env_logger.workspace = true
-pyo3 = { version = "0.24", features = ["extension-module", "abi3-py39"] }
-pyo3-async-runtimes = { version = "0.24", features = [
+pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
+pyo3-async-runtimes = { version = "0.25", features = [
    "attributes",
    "tokio-runtime",
 ] }
@@ -28,7 +28,7 @@ futures.workspace = true
 tokio = { version = "1.40", features = ["sync"] }

 [build-dependencies]
-pyo3-build-config = { version = "0.24", features = [
+pyo3-build-config = { version = "0.25", features = [
    "extension-module",
    "abi3-py39",
 ] }
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -5,12 +5,12 @@ dynamic = ["version"]
 dependencies = [
    "deprecation",
    "numpy",
-    "overrides>=0.7",
+    "overrides>=0.7; python_version<'3.12'",
    "packaging",
    "pyarrow>=16",
    "pydantic>=1.10",
    "tqdm>=4.27.0",
-    "lance-namespace==0.0.6"
+    "lance-namespace>=0.0.16"
 ]
 description = "lancedb"
 authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -133,6 +133,7 @@ class Tags:
    async def update(self, tag: str, version: int): ...

 class IndexConfig:
+    name: str
    index_type: str
    columns: List[str]

--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -6,10 +6,18 @@ from __future__ import annotations

 from abc import abstractmethod
 from pathlib import Path
+import sys
 from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union

+if sys.version_info >= (3, 12):
+    from typing import override
+
+    class EnforceOverrides:
+        pass
+else:
+    from overrides import EnforceOverrides, override  # type: ignore
+
 from lancedb.embeddings.registry import EmbeddingFunctionRegistry
-from overrides import EnforceOverrides, override  # type: ignore

 from lancedb.common import data_to_reader, sanitize_uri, validate_schema
 from lancedb.background_loop import LOOP
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -605,53 +605,9 @@ class IvfPq:
    target_partition_size: Optional[int] = None


-@dataclass
-class IvfRq:
-    """Describes an IVF RQ Index
-
-    IVF-RQ (Residual Quantization) stores a compressed copy of each vector using
-    residual quantization and organizes them into IVF partitions. Parameters
-    largely mirror IVF-PQ for consistency.
-
-    Attributes
-    ----------
-    distance_type: str, default "l2"
-        Distance metric used to train the index and for quantization.
-
-        The following distance types are available:
-
-        "l2" - Euclidean distance.
-        "cosine" - Cosine distance.
-        "dot" - Dot product.
-
-    num_partitions: int, default sqrt(num_rows)
-        Number of IVF partitions to create.
-
-    num_bits: int, default 1
-        Number of bits to encode each dimension.
-
-    max_iterations: int, default 50
-        Max iterations to train kmeans when computing IVF partitions.
-
-    sample_rate: int, default 256
-        Controls the number of training vectors: sample_rate * num_partitions.
-
-    target_partition_size, default is 8192
-        Target size of each partition.
-    """
-
-    distance_type: Literal["l2", "cosine", "dot"] = "l2"
-    num_partitions: Optional[int] = None
-    num_bits: int = 1
-    max_iterations: int = 50
-    sample_rate: int = 256
-    target_partition_size: Optional[int] = None
-
-
 __all__ = [
    "BTree",
    "IvfPq",
-    "IvfRq",
    "IvfFlat",
    "HnswPq",
    "HnswSq",
--- a/python/python/lancedb/namespace.py
+++ b/python/python/lancedb/namespace.py
@@ -12,13 +12,18 @@ from __future__ import annotations

 from typing import Dict, Iterable, List, Optional, Union
 import os
+import sys
+
+if sys.version_info >= (3, 12):
+    from typing import override
+else:
+    from overrides import override

 from lancedb.db import DBConnection
 from lancedb.table import LanceTable, Table
 from lancedb.util import validate_table_name
 from lancedb.common import validate_schema
 from lancedb.table import sanitize_create_table
-from overrides import override

 from lance_namespace import LanceNamespace, connect as namespace_connect
 from lance_namespace_urllib3_client.models import (
--- a/python/python/lancedb/remote/db.py
+++ b/python/python/lancedb/remote/db.py
@@ -5,15 +5,20 @@
 from datetime import timedelta
 import logging
 from concurrent.futures import ThreadPoolExecutor
+import sys
 from typing import Any, Dict, Iterable, List, Optional, Union
 from urllib.parse import urlparse
 import warnings

+if sys.version_info >= (3, 12):
+    from typing import override
+else:
+    from overrides import override
+
 # Remove this import to fix circular dependency
 # from lancedb import connect_async
 from lancedb.remote import ClientConfig
 import pyarrow as pa
-from overrides import override

 from ..common import DATA
 from ..db import DBConnection, LOOP
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -114,7 +114,7 @@ class RemoteTable(Table):
        index_type: Literal["BTREE", "BITMAP", "LABEL_LIST", "scalar"] = "scalar",
        *,
        replace: bool = False,
-        wait_timeout: timedelta = None,
+        wait_timeout: Optional[timedelta] = None,
        name: Optional[str] = None,
    ):
        """Creates a scalar index
@@ -153,7 +153,7 @@ class RemoteTable(Table):
        column: str,
        *,
        replace: bool = False,
-        wait_timeout: timedelta = None,
+        wait_timeout: Optional[timedelta] = None,
        with_position: bool = False,
        # tokenizer configs:
        base_tokenizer: str = "simple",
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -44,7 +44,7 @@ import numpy as np

 from .common import DATA, VEC, VECTOR_COLUMN_NAME
 from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
-from .index import BTree, IvfFlat, IvfPq, Bitmap, IvfRq, LabelList, HnswPq, HnswSq, FTS
+from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
 from .merge import LanceMergeInsertBuilder
 from .pydantic import LanceModel, model_to_dict
 from .query import (
@@ -1991,7 +1991,7 @@ class LanceTable(Table):
        index_cache_size: Optional[int] = None,
        num_bits: int = 8,
        index_type: Literal[
-            "IVF_FLAT", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
+            "IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
        ] = "IVF_PQ",
        max_iterations: int = 50,
        sample_rate: int = 256,
@@ -2039,15 +2039,6 @@ class LanceTable(Table):
                sample_rate=sample_rate,
                target_partition_size=target_partition_size,
            )
-        elif index_type == "IVF_RQ":
-            config = IvfRq(
-                distance_type=metric,
-                num_partitions=num_partitions,
-                num_bits=num_bits,
-                max_iterations=max_iterations,
-                sample_rate=sample_rate,
-                target_partition_size=target_partition_size,
-            )
        elif index_type == "IVF_HNSW_PQ":
            config = HnswPq(
                distance_type=metric,
@@ -3339,7 +3330,7 @@ class AsyncTable:
        *,
        replace: Optional[bool] = None,
        config: Optional[
-            Union[IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
+            Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
        ] = None,
        wait_timeout: Optional[timedelta] = None,
        name: Optional[str] = None,
@@ -3378,12 +3369,11 @@ class AsyncTable:
        """
        if config is not None:
            if not isinstance(
-                config,
-                (IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS),
+                config, (IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS)
            ):
                raise TypeError(
-                    "config must be an instance of IvfPq, IvfRq, HnswPq, HnswSq, BTree,"
-                    " Bitmap, LabelList, or FTS, but got " + str(type(config))
+                    "config must be an instance of IvfPq, HnswPq, HnswSq, BTree,"
+                    " Bitmap, LabelList, or FTS"
                )
        try:
            await self._inner.create_index(
--- a/python/python/lancedb/types.py
+++ b/python/python/lancedb/types.py
@@ -18,17 +18,10 @@ AddMode = Literal["append", "overwrite"]
 CreateMode = Literal["create", "overwrite"]

 # Index type literals
-VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ", "IVF_RQ"]
+VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"]
 ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
 IndexType = Literal[
-    "IVF_PQ",
-    "IVF_HNSW_PQ",
-    "IVF_HNSW_SQ",
-    "FTS",
-    "BTREE",
-    "BITMAP",
-    "LABEL_LIST",
-    "IVF_RQ",
+    "IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST"
 ]

 # Tokenizer literals
--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -8,17 +8,7 @@ import pyarrow as pa
 import pytest
 import pytest_asyncio
 from lancedb import AsyncConnection, AsyncTable, connect_async
-from lancedb.index import (
-    BTree,
-    IvfFlat,
-    IvfPq,
-    IvfRq,
-    Bitmap,
-    LabelList,
-    HnswPq,
-    HnswSq,
-    FTS,
-)
+from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS


@pytest_asyncio.fixture
@@ -205,16 +195,6 @@ async def test_create_4bit_ivfpq_index(some_table: AsyncTable):
    assert stats.loss >= 0.0


-@pytest.mark.asyncio
-async def test_create_ivfrq_index(some_table: AsyncTable):
-    await some_table.create_index("vector", config=IvfRq(num_bits=1))
-    indices = await some_table.list_indices()
-    assert len(indices) == 1
-    assert indices[0].index_type == "IvfRq"
-    assert indices[0].columns == ["vector"]
-    assert indices[0].name == "vector_idx"
-
-
@pytest.mark.asyncio
 async def test_create_hnswpq_index(some_table: AsyncTable):
    await some_table.create_index("vector", config=HnswPq(num_partitions=10))
--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder};
+use lancedb::index::vector::IvfFlatIndexBuilder;
 use lancedb::index::{
    scalar::{BTreeIndexBuilder, FtsIndexBuilder},
    vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
@@ -87,22 +87,6 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                }
                Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
            },
-            "IvfRq" => {
-                let params = source.extract::<IvfRqParams>()?;
-                let distance_type = parse_distance_type(params.distance_type)?;
-                let mut ivf_rq_builder = IvfRqIndexBuilder::default()
-                    .distance_type(distance_type)
-                    .max_iterations(params.max_iterations)
-                    .sample_rate(params.sample_rate)
-                    .num_bits(params.num_bits);
-                if let Some(num_partitions) = params.num_partitions {
-                    ivf_rq_builder = ivf_rq_builder.num_partitions(num_partitions);
-                }
-                if let Some(target_partition_size) = params.target_partition_size {
-                    ivf_rq_builder = ivf_rq_builder.target_partition_size(target_partition_size);
-                }
-                Ok(LanceDbIndex::IvfRq(ivf_rq_builder))
-            },
            "HnswPq" => {
                let params = source.extract::<IvfHnswPqParams>()?;
                let distance_type = parse_distance_type(params.distance_type)?;
@@ -186,16 +170,6 @@ struct IvfPqParams {
    target_partition_size: Option<u32>,
 }

-#[derive(FromPyObject)]
-struct IvfRqParams {
-    distance_type: String,
-    num_partitions: Option<u32>,
-    num_bits: u32,
-    max_iterations: u32,
-    sample_rate: u32,
-    target_partition_size: Option<u32>,
-}
-
 #[derive(FromPyObject)]
 struct IvfHnswPqParams {
    distance_type: String,
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.22.2-beta.0"
+version = "0.22.2-beta.2"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/data/inspect.rs
+++ b/rust/lancedb/src/data/inspect.rs
@@ -52,13 +52,13 @@ pub fn infer_vector_columns(
    for field in reader.schema().fields() {
        match field.data_type() {
            DataType::FixedSizeList(sub_field, _) if sub_field.data_type().is_floating() => {
-                columns.push(field.name().to_string());
+                columns.push(field.name().clone());
            }
            DataType::List(sub_field) if sub_field.data_type().is_floating() && !strict => {
-                columns_to_infer.insert(field.name().to_string(), None);
+                columns_to_infer.insert(field.name().clone(), None);
            }
            DataType::LargeList(sub_field) if sub_field.data_type().is_floating() && !strict => {
-                columns_to_infer.insert(field.name().to_string(), None);
+                columns_to_infer.insert(field.name().clone(), None);
            }
            _ => {}
        }
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -718,9 +718,9 @@ impl Database for ListingDatabase {
            .map_err(|e| Error::Lance { source: e })?;

        let version_ref = match (request.source_version, request.source_tag) {
-            (Some(v), None) => Ok(Ref::Version(v)),
+            (Some(v), None) => Ok(Ref::Version(None, Some(v))),
            (None, Some(tag)) => Ok(Ref::Tag(tag)),
-            (None, None) => Ok(Ref::Version(source_dataset.version().version)),
+            (None, None) => Ok(Ref::Version(None, Some(source_dataset.version().version))),
            _ => Err(Error::InvalidInput {
                message: "Cannot specify both source_version and source_tag".to_string(),
            }),
--- a/rust/lancedb/src/database/namespace.rs
+++ b/rust/lancedb/src/database/namespace.rs
@@ -261,7 +261,7 @@ impl Database for LanceNamespaceDatabase {
                    return listing_db
                        .open_table(OpenTableRequest {
                            name: request.name.clone(),
-                            namespace: request.namespace.clone(),
+                            namespace: vec![],
                            index_cache_size: None,
                            lance_read_params: None,
                        })
@@ -305,7 +305,14 @@ impl Database for LanceNamespaceDatabase {
            )
            .await?;

-        listing_db.create_table(request).await
+        let create_request = DbCreateTableRequest {
+            name: request.name,
+            namespace: vec![],
+            data: request.data,
+            mode: request.mode,
+            write_options: request.write_options,
+        };
+        listing_db.create_table(create_request).await
    }

    async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
@@ -332,7 +339,13 @@ impl Database for LanceNamespaceDatabase {
            .create_listing_database(&request.name, &location, response.storage_options)
            .await?;

-        listing_db.open_table(request).await
+        let open_request = OpenTableRequest {
+            name: request.name.clone(),
+            namespace: vec![],
+            index_cache_size: request.index_cache_size,
+            lance_read_params: request.lance_read_params,
+        };
+        listing_db.open_table(open_request).await
    }

    async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {
--- a/rust/lancedb/src/index.rs
+++ b/rust/lancedb/src/index.rs
@@ -8,7 +8,6 @@ use std::sync::Arc;
 use std::time::Duration;
 use vector::IvfFlatIndexBuilder;

-use crate::index::vector::IvfRqIndexBuilder;
 use crate::{table::BaseTable, DistanceType, Error, Result};

 use self::{
@@ -54,9 +53,6 @@ pub enum Index {
    /// IVF index with Product Quantization
    IvfPq(IvfPqIndexBuilder),

-    /// IVF index with RabitQ Quantization
-    IvfRq(IvfRqIndexBuilder),
-
    /// IVF-HNSW index with Product Quantization
    /// It is a variant of the HNSW algorithm that uses product quantization to compress the vectors.
    IvfHnswPq(IvfHnswPqIndexBuilder),
@@ -279,8 +275,6 @@ pub enum IndexType {
    IvfFlat,
    #[serde(alias = "IVF_PQ")]
    IvfPq,
-    #[serde(alias = "IVF_RQ")]
-    IvfRq,
    #[serde(alias = "IVF_HNSW_PQ")]
    IvfHnswPq,
    #[serde(alias = "IVF_HNSW_SQ")]
@@ -302,7 +296,6 @@ impl std::fmt::Display for IndexType {
        match self {
            Self::IvfFlat => write!(f, "IVF_FLAT"),
            Self::IvfPq => write!(f, "IVF_PQ"),
-            Self::IvfRq => write!(f, "IVF_RQ"),
            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
            Self::BTree => write!(f, "BTREE"),
@@ -324,7 +317,6 @@ impl std::str::FromStr for IndexType {
            "FTS" | "INVERTED" => Ok(Self::FTS),
            "IVF_FLAT" => Ok(Self::IvfFlat),
            "IVF_PQ" => Ok(Self::IvfPq),
-            "IVF_RQ" => Ok(Self::IvfRq),
            "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
            "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
            _ => Err(Error::InvalidInput {
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -291,52 +291,6 @@ pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
    }
 }

-/// Builder for an IVF RQ index.
-///
-/// This index stores a compressed (quantized) copy of every vector. Each dimension
-/// is quantized into a small number of bits.
-/// The parameters `num_bits` control this process, providing a tradeoff
-/// between index size (and thus search speed) and index accuracy.
-///
-/// The partitioning process is called IVF and the `num_partitions` parameter controls how
-/// many groups to create.
-///
-/// Note that training an IVF RQ index on a large dataset is a slow operation and
-/// currently is also a memory intensive operation.
-#[derive(Debug, Clone)]
-pub struct IvfRqIndexBuilder {
-    // IVF
-    pub(crate) distance_type: DistanceType,
-    pub(crate) num_partitions: Option<u32>,
-    pub(crate) num_bits: Option<u32>,
-    pub(crate) sample_rate: u32,
-    pub(crate) max_iterations: u32,
-    pub(crate) target_partition_size: Option<u32>,
-}
-
-impl Default for IvfRqIndexBuilder {
-    fn default() -> Self {
-        Self {
-            distance_type: DistanceType::L2,
-            num_partitions: None,
-            num_bits: None,
-            sample_rate: 256,
-            max_iterations: 50,
-            target_partition_size: None,
-        }
-    }
-}
-
-impl IvfRqIndexBuilder {
-    impl_distance_type_setter!();
-    impl_ivf_params_setter!();
-
-    pub fn num_bits(mut self, num_bits: u32) -> Self {
-        self.num_bits = Some(num_bits);
-        self
-    }
-}
-
 /// Builder for an IVF HNSW PQ index.
 ///
 /// This index is a combination of IVF and HNSW.
--- a/rust/lancedb/src/remote/db.rs
+++ b/rust/lancedb/src/remote/db.rs
@@ -647,7 +647,7 @@ impl From<StorageOptions> for RemoteOptions {
        let mut filtered = HashMap::new();
        for opt in supported_opts {
            if let Some(v) = options.0.get(opt) {
-                filtered.insert(opt.to_string(), v.to_string());
+                filtered.insert(opt.to_string(), v.clone());
            }
        }
        Self::new(filtered)
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -1383,30 +1383,35 @@ impl Table {
 }

 pub struct NativeTags {
-    inner: LanceTags,
+    dataset: dataset::DatasetConsistencyWrapper,
 }
 #[async_trait]
 impl Tags for NativeTags {
    async fn list(&self) -> Result<HashMap<String, TagContents>> {
-        Ok(self.inner.list().await?)
+        let dataset = self.dataset.get().await?;
+        Ok(dataset.tags().list().await?)
    }

    async fn get_version(&self, tag: &str) -> Result<u64> {
-        Ok(self.inner.get_version(tag).await?)
+        let dataset = self.dataset.get().await?;
+        Ok(dataset.tags().get_version(tag).await?)
    }

    async fn create(&mut self, tag: &str, version: u64) -> Result<()> {
-        self.inner.create(tag, version).await?;
+        let dataset = self.dataset.get().await?;
+        dataset.tags().create(tag, version).await?;
        Ok(())
    }

    async fn delete(&mut self, tag: &str) -> Result<()> {
-        self.inner.delete(tag).await?;
+        let dataset = self.dataset.get().await?;
+        dataset.tags().delete(tag).await?;
        Ok(())
    }

    async fn update(&mut self, tag: &str, version: u64) -> Result<()> {
-        self.inner.update(tag, version).await?;
+        let dataset = self.dataset.get().await?;
+        dataset.tags().update(tag, version).await?;
        Ok(())
    }
 }
@@ -1780,13 +1785,13 @@ impl NativeTable {
                        BuiltinIndexType::BTree,
                    )))
                } else {
-                    return Err(Error::InvalidInput {
+                    Err(Error::InvalidInput {
                        message: format!(
                            "there are no indices supported for the field `{}` with the data type {}",
                            field.name(),
                            field.data_type()
                        ),
-                    });
+                    })?
                }
            }
            Index::BTree(_) => {
@@ -1838,18 +1843,6 @@ impl NativeTable {
                );
                Ok(Box::new(lance_idx_params))
            }
-            Index::IvfRq(index) => {
-                Self::validate_index_type(field, "IVF RQ", supported_vector_data_type)?;
-                let num_partitions = self
-                    .get_num_partitions(index.num_partitions, false, None)
-                    .await?;
-                let lance_idx_params = VectorIndexParams::ivf_rq(
-                    num_partitions as usize,
-                    index.num_bits.unwrap_or(1) as u8,
-                    index.distance_type.into(),
-                );
-                Ok(Box::new(lance_idx_params))
-            }
            Index::IvfHnswPq(index) => {
                Self::validate_index_type(field, "IVF HNSW PQ", supported_vector_data_type)?;
                let dim = Self::get_vector_dimension(field)?;
@@ -1919,11 +1912,9 @@ impl NativeTable {
            Index::Bitmap(_) => IndexType::Bitmap,
            Index::LabelList(_) => IndexType::LabelList,
            Index::FTS(_) => IndexType::Inverted,
-            Index::IvfFlat(_)
-            | Index::IvfPq(_)
-            | Index::IvfRq(_)
-            | Index::IvfHnswPq(_)
-            | Index::IvfHnswSq(_) => IndexType::Vector,
+            Index::IvfFlat(_) | Index::IvfPq(_) | Index::IvfHnswPq(_) | Index::IvfHnswSq(_) => {
+                IndexType::Vector
+            }
        }
    }

@@ -2454,10 +2445,8 @@ impl BaseTable for NativeTable {
    }

    async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
-        let dataset = self.dataset.get().await?;
-
        Ok(Box::new(NativeTags {
-            inner: dataset.tags.clone(),
+            dataset: self.dataset.clone(),
        }))
    }

--- a/rust/lancedb/src/table/datafusion.rs
+++ b/rust/lancedb/src/table/datafusion.rs
@@ -172,7 +172,7 @@ impl TableProvider for BaseTableAdapter {
        if let Some(projection) = projection {
            let field_names = projection
                .iter()
-                .map(|i| self.schema.field(*i).name().to_string())
+                .map(|i| self.schema.field(*i).name().clone())
                .collect();
            query.select = Select::Columns(field_names);
        }
--- a/rust/lancedb/src/table/dataset.rs
+++ b/rust/lancedb/src/table/dataset.rs
@@ -98,8 +98,9 @@ impl DatasetRef {
            }
            Self::TimeTravel { dataset, version } => {
                let should_checkout = match &target_ref {
-                    refs::Ref::Version(target_ver) => version != target_ver,
-                    refs::Ref::Tag(_) => true, // Always checkout for tags
+                    refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
+                    refs::Ref::Version(_, None) => true, // No specific version, always checkout
+                    refs::Ref::Tag(_) => true,           // Always checkout for tags
                };

                if should_checkout {
--- a/rust/lancedb/src/utils.rs
+++ b/rust/lancedb/src/utils.rs
@@ -39,7 +39,7 @@ impl PatchStoreParam for Option<ObjectStoreParams> {
        let mut params = self.unwrap_or_default();
        if params.object_store_wrapper.is_some() {
            return Err(Error::Other {
-                message: "can not patch param because object store is already set.".into(),
+                message: "can not patch param because object store is already set".into(),
                source: None,
            });
        }
@@ -174,7 +174,7 @@ pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result
            ),
        })
    } else {
-        Ok(candidates[0].to_string())
+        Ok(candidates[0].clone())
    }
 }
Author	SHA1	Message	Date
Lance Release	5cbbaa2e4a	Bump version: 0.25.2-beta.3 → 0.25.2	2025-10-08 18:11:45 +00:00
Lance Release	1b6bd2498e	Bump version: 0.25.2-beta.2 → 0.25.2-beta.3	2025-10-08 18:11:45 +00:00
Jack Ye	285da9db1d	feat: upgrade lance to 0.38.2 (#2705 )	2025-10-08 09:59:28 -07:00
Ayush Chaurasia	ad8306c96b	docs: add custom redirect for storage page (#2706 ) Expand the custom redirection links list to include storage page	2025-10-08 21:35:48 +05:30
Wyatt Alt	3594538509	fix: add name to index config and fix create_index typing (#2660 ) Co-authored-by: Mark McCaskey <markm@harvey.ai>	2025-10-08 04:41:30 -07:00
Tom LaMarre	917aabd077	fix(node): support specifying arrow field types by name (#2704 ) The [`FieldLike` type in arrow.ts](`5ec12c9971/nodejs/lancedb/arrow.ts (L71-L78)`) can have a `type: string` property, but before this change, actually trying to create a table that has a schema that specifies field types by name results in an error: ``` Error: Expected a Type but object was null/undefined ``` This change adds support for mapping some type name strings to arrow `DataType`s, so that passing `FieldLike`s with a `type: string` property to `sanitizeField` does not throw an error. The type names that can be passed are upper/lowercase variations of the keys of the `constructorsByTypeName` object. This does not support mapping types that need parameters, such as timestamps which need timezones. With this, it is possible to create empty tables from `SchemaLike` objects without instantiating arrow types, e.g.: ``` import { SchemaLike } from "../lancedb/arrow" // ... const schemaLike = { fields: [ { name: "id", type: "int64", nullable: true, }, { name: "vector", type: "float64", nullable: true, }, ], // ... } satisfies SchemaLike; const table = await con.createEmptyTable("test", schemaLike); ``` This change also makes `FieldLike.nullable` required since the `sanitizeField` function throws if it is undefined.	2025-10-08 04:40:06 -07:00
Jack Ye	5ec12c9971	fix: federated database should not pass namesapce to listing database (#2702 ) Fixes error that when converting a federated database operation to a listing database operation, the namespace parameter is no longer correct and should be dropped. Note that with the testing infra we have today, we don't have a good way to test these changes. I will do a quick follow up on https://github.com/lancedb/lancedb/issues/2701 but would be great to get this in first to resolve the related issues.	2025-10-06 14:12:41 -07:00
Ed Rogers	d0ce489b21	fix: use stdlib override when possible (#2699 ) ## Description of changes Fixes #2698 This PR uses [`typing.override`](https://docs.python.org/3/library/typing.html#typing.override) in favor of the [`overrides`](https://pypi.org/project/overrides/) dependency when possible. As of Python 3.12, the standard library offers `typing.override` to perform a static check on overridden methods. ### Motivation Currently, `overrides` is incompatible with Python 3.14. As a result, any package that attempts to import `overrides` using Python 3.14+ will raise an `AttributeError`. An [issue](https://github.com/mkorpela/overrides/issues/127) has been raised and a [pull request](https://github.com/mkorpela/overrides/pull/133) has been submitted to the GitHub repo for the `overrides` project. But the maintainer has been unresponsive. To ensure readiness for Python 3.14, this package (and any other package directly depending on `overrides`) should consider using `typing.override` instead. ### Impact The standard library added `typing.override` as of 3.12. As a result, this change will affect only users of Python 3.12+. Previous versions will continue to rely on `overrides`. Notably, the standard library implementation is slightly different than that of `overrides`. A thorough discussion of those differences is shown in [PEP 698](https://peps.python.org/pep-0698/), and it is also summarized nicely by the maintainer of `overrides` [here](https://github.com/mkorpela/overrides/issues/126#issuecomment-2401327116). There are 2 main ways that switching from `overrides` to `typing.override` will have an impact on developers of this repo. 1. `typing.override` does not implement any runtime checking. Instead, it provides information to type checkers. 2. The stdlib does not provide a mixin class to enforce override decorators on child classes. (Their reasoning for this is explained in [the PEP](https://peps.python.org/pep-0698/).) This PR disables that behavior entirely by replacing the `EnforceOverrides`.	2025-10-06 11:23:20 -07:00
Lance Release	d7e02c8181	Bump version: 0.22.2-beta.1 → 0.22.2-beta.2	2025-10-06 18:10:40 +00:00
Lance Release	70958f6366	Bump version: 0.25.2-beta.1 → 0.25.2-beta.2	2025-10-06 18:09:24 +00:00
Will Jones	1ac745eb18	ci: fix Python and Node CI on main (#2700 ) Example failure: https://github.com/lancedb/lancedb/actions/runs/18237024283/job/51932651993	2025-10-06 09:40:08 -07:00
Will Jones	1357fe8aa1	ci: run remote tests on PRs only if they aren't a fork (#2697 )	2025-10-03 17:38:40 -07:00
LuQQiu	0d78929893	feat: upgrade lance to 0.38.0 (#2695 ) https://github.com/lancedb/lance/releases/tag/v0.38.0 --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2025-10-03 16:47:05 -07:00
Neha Prasad	9e2a68541e	fix(node): allow undefined/omitted values for nullable vector fields (#2656 ) Problem: When a vector field is marked as nullable, users should be able to omit it or pass `undefined`, but this was throwing an error: "Table has embeddings: 'vector', but no embedding function was provided" fixes: #2646 Solution: Modified `validateSchemaEmbeddings` to check `field.nullable` before treating `undefined` values as missing embedding fields. Changes: - Fixed validation logic in `nodejs/lancedb/arrow.ts` - Enabled previously skipped test for nullable fields - Added reproduction test case Behavior: - ✅ `{ vector: undefined }` now works for nullable fields - ✅ `{}` (omitted field) now works for nullable fields - ✅ `{ vector: null }` still works (unchanged) - ✅ Non-nullable fields still properly throw errors (unchanged) --------- Co-authored-by: Will Jones <willjones127@gmail.com> Co-authored-by: neha <neha@posthog.com>	2025-10-02 10:53:05 -07:00
Will Jones	1aa0fd16e7	ci: automatic issue creation for failed publish workflows (#2694 ) ## Summary - Created custom GitHub Action that creates issues when workflow jobs fail - Added report-failure jobs to cargo-publish.yml, java-publish.yml, npm-publish.yml, and pypi-publish.yml - Issues are created automatically with workflow name, failed job names, and run URL ## Test plan - Workflows will only create issues on actual release or workflow_dispatch events - Can be tested by triggering workflow_dispatch on a publish workflow Based on lancedb/lance#4873 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com>	2025-10-02 08:24:16 -07:00
Lance Release	fec2a05629	Bump version: 0.22.2-beta.0 → 0.22.2-beta.1	2025-09-30 19:31:44 +00:00
Lance Release	79a1cd60ee	Bump version: 0.25.2-beta.0 → 0.25.2-beta.1	2025-09-30 19:30:39 +00:00
Colin Patrick McCabe	88807a59a4	fix: have CI download from ci-support-binaries (#2692 ) Have CI download from ci-support-binaries to fix the build.	2025-09-30 11:54:43 -07:00
Jack Ye	e0e7e01ea8	fix: inflated release size due to lance-namespace transitive dependency (#2691 ) Fixed the issue on lance-namespace side to avoid pinning to a specific lance version. This should fix the issue of the increased release artifact size and build time.	2025-09-30 11:18:32 -07:00
Ayush Chaurasia	a416ebc11d	fix: use correct nodejs path for ci (#2689 )	2025-09-30 14:18:42 +05:30