test

Merge branch 'main' of https://github.com/lancedb/lancedb into add-ivfrq
update docs
2025-12-24 13:59:58 +00:00 · 2025-09-30 11:58:20 -07:00 · 2025-09-30 16:30:58 +08:00 · 2025-09-29 18:24:58 +08:00 · 2025-09-29 17:47:59 +08:00 · 2025-09-29 17:08:12 +08:00
62 changed files with 2219 additions and 1552 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.22.2-beta.2"
+current_version = "0.22.2-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/actions/create-failure-issue/action.yml
+++ b/.github/actions/create-failure-issue/action.yml
@@ -1,45 +0,0 @@
-name: Create Failure Issue
-description: Creates a GitHub issue if any jobs in the workflow failed
-
-inputs:
-  job-results:
-    description: 'JSON string of job results from needs context'
-    required: true
-  workflow-name:
-    description: 'Name of the workflow'
-    required: true
-
-runs:
-  using: composite
-  steps:
-    - name: Check for failures and create issue
-      shell: bash
-      env:
-        JOB_RESULTS: ${{ inputs.job-results }}
-        WORKFLOW_NAME: ${{ inputs.workflow-name }}
-        RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
-        GH_TOKEN: ${{ github.token }}
-      run: |
-        # Check if any job failed
-        if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
-          echo "Detected job failures, creating issue..."
-
-          # Extract failed job names
-          FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
-
-          # Create issue with workflow name, failed jobs, and run URL
-          gh issue create \
-            --title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
-            --body "The workflow **$WORKFLOW_NAME** failed during execution.
-
-        **Failed jobs:** $FAILED_JOBS
-
-        **Run URL:** $RUN_URL
-
-        Please investigate the failed jobs and address any issues." \
-            --label "ci"
-
-          echo "Issue created successfully"
-        else
-          echo "No job failures detected, skipping issue creation"
-        fi
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -38,17 +38,3 @@ jobs:
      - name: Publish the package
        run: |
          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
-  report-failure:
-    name: Report Workflow Failure
-    runs-on: ubuntu-latest
-    needs: [build]
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
-    permissions:
-      contents: read
-      issues: write
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/create-failure-issue
-        with:
-          job-results: ${{ toJSON(needs) }}
-          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -58,7 +58,7 @@ jobs:
          cache: 'npm'
          cache-dependency-path: docs/package-lock.json
      - name: Install node dependencies
-        working-directory: nodejs
+        working-directory: node
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
--- a/.github/workflows/java-publish.yml
+++ b/.github/workflows/java-publish.yml
@@ -43,6 +43,7 @@ jobs:
      - uses: Swatinem/rust-cache@v2
      - uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
+          toolchain: "1.81.0"
          cache-workspaces: "./java/core/lancedb-jni"
          # Disable full debug symbol generation to speed up CI build and keep memory down
          # "1" means line tables only, which is useful for panic tracebacks.
@@ -111,17 +112,3 @@ jobs:
        env:
          SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
          SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
-  report-failure:
-    name: Report Workflow Failure
-    runs-on: ubuntu-latest
-    needs: [linux-arm64, linux-x86, macos-arm64]
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
-    permissions:
-      contents: read
-      issues: write
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/create-failure-issue
-        with:
-          job-results: ${{ toJSON(needs) }}
-          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -6,7 +6,6 @@ on:
      - main
  pull_request:
    paths:
-      - Cargo.toml
      - nodejs/**
      - .github/workflows/nodejs.yml
      - docker-compose.yml
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -365,17 +365,3 @@ jobs:
            ARGS="$ARGS --tag preview"
          fi
          npm publish $ARGS
-  report-failure:
-    name: Report Workflow Failure
-    runs-on: ubuntu-latest
-    needs: [build-lancedb, test-lancedb, publish]
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
-    permissions:
-      contents: read
-      issues: write
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/create-failure-issue
-        with:
-          job-results: ${{ toJSON(needs) }}
-          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -173,17 +173,3 @@ jobs:
          generate_release_notes: false
          name: Python LanceDB v${{ steps.extract_version.outputs.version }}
          body: ${{ steps.python_release_notes.outputs.changelog }}
-  report-failure:
-    name: Report Workflow Failure
-    runs-on: ubuntu-latest
-    needs: [linux, mac, windows]
-    permissions:
-      contents: read
-      issues: write
-    if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/create-failure-issue
-        with:
-          job-results: ${{ toJSON(needs) }}
-          workflow-name: ${{ github.workflow }}
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -6,7 +6,6 @@ on:
      - main
  pull_request:
    paths:
-      - Cargo.toml
      - python/**
      - .github/workflows/python.yml

--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -125,9 +125,6 @@ jobs:
      - name: Run examples
        run: cargo run --example simple --locked
      - name: Run remote tests
-        # Running this requires access to secrets, so skip if this is
-        # a PR from a fork.
-        if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
        run: make -C ./lancedb remote-tests

  macos:
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,30 +15,31 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.38.2", default-features = false, "features" = ["dynamodb"] }
-lance-io = { "version" = "=0.38.2", default-features = false }
-lance-index = "=0.38.2"
-lance-linalg = "=0.38.2"
-lance-table = "=0.38.2"
-lance-testing = "=0.38.2"
-lance-datafusion = "=0.38.2"
-lance-encoding = "=0.38.2"
-lance-namespace = "0.0.18"
+lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-io = { "version" = "=0.37.0", default-features = false, "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-index = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-linalg = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-table = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-testing = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-datafusion = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-encoding = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
+lance-namespace = "0.0.15"
 # Note that this one does not include pyarrow
-arrow = { version = "56.2", optional = false }
-arrow-array = "56.2"
-arrow-data = "56.2"
-arrow-ipc = "56.2"
-arrow-ord = "56.2"
-arrow-schema = "56.2"
-arrow-cast = "56.2"
+arrow = { version = "55.1", optional = false }
+arrow-array = "55.1"
+arrow-data = "55.1"
+arrow-ipc = "55.1"
+arrow-ord = "55.1"
+arrow-schema = "55.1"
+arrow-arith = "55.1"
+arrow-cast = "55.1"
 async-trait = "0"
-datafusion = { version = "50.1", default-features = false }
-datafusion-catalog = "50.1"
-datafusion-common = { version = "50.1", default-features = false }
-datafusion-execution = "50.1"
-datafusion-expr = "50.1"
-datafusion-physical-plan = "50.1"
+datafusion = { version = "49.0", default-features = false }
+datafusion-catalog = "49.0"
+datafusion-common = { version = "49.0", default-features = false }
+datafusion-execution = "49.0"
+datafusion-expr = "49.0"
+datafusion-physical-plan = "49.0"
 env_logger = "0.11"
 half = { "version" = "2.6.0", default-features = false, features = [
    "num-traits",
@@ -51,6 +52,7 @@ pin-project = "1.0.7"
 snafu = "0.8"
 url = "2"
 num-traits = "0.2"
+rand = "0.9"
 regex = "1.10"
 lazy_static = "1"
 semver = "1.0.25"
@@ -58,17 +60,7 @@ crunchy = "0.2.4"
 # Temporary pins to work around downstream issues
 # https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
 chrono = "=0.4.41"
+# https://github.com/RustCrypto/formats/issues/1684
+base64ct = "=1.6.0"
 # Workaround for: https://github.com/Lokathor/bytemuck/issues/306
 bytemuck_derive = ">=1.8.1, <1.9.0"
-
-# This is only needed when we reference preview releases of lance
-# [patch.crates-io]
-# # Force to use the same lance version as the rest of the project to avoid duplicate dependencies
-# lance = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
-# lance-io = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
-# lance-index = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
-# lance-linalg = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
-# lance-table = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
-# lance-testing = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
-# lance-datafusion = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
-# lance-encoding = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
--- a/ci/run_with_test_connection.sh
+++ b/ci/run_with_test_connection.sh
@@ -16,47 +16,30 @@ check_command_exists() {
 }

 if [[ ! -e ./lancedb ]]; then
-    if [[ -v SOPHON_READ_TOKEN ]]; then
-        INPUT="lancedb-linux-x64"
-        gh release \
-            --repo lancedb/lancedb \
-            download ci-support-binaries \
-            --pattern "${INPUT}" \
-            || die "failed to fetch cli."
-        check_command_exists openssl
-        openssl enc -aes-256-cbc \
-            -d -pbkdf2 \
-            -pass "env:SOPHON_READ_TOKEN" \
-            -in "${INPUT}" \
-            -out ./lancedb-linux-x64.tar.gz \
-            || die "openssl failed"
-        TARGET="${INPUT}.tar.gz"
-    else
-        ARCH="x64"
-        if [[ $OSTYPE == 'darwin'* ]]; then
-            UNAME=$(uname -m)
-            if [[ $UNAME == 'arm64' ]]; then
-                ARCH='arm64'
-            fi
-            OSTYPE="macos"
-        elif [[ $OSTYPE == 'linux'* ]]; then
-            if [[ $UNAME == 'aarch64' ]]; then
-                ARCH='arm64'
-            fi
-            OSTYPE="linux"
-        else
-            die "unknown OSTYPE: $OSTYPE"
+    ARCH="x64"
+    if [[ $OSTYPE == 'darwin'* ]]; then
+        UNAME=$(uname -m)
+        if [[ $UNAME == 'arm64' ]]; then
+            ARCH='arm64'
        fi
-
-        check_command_exists gh
-        TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
-        gh release \
-            --repo lancedb/sophon \
-            download lancedb-cli-v0.0.3 \
-            --pattern "${TARGET}" \
-            || die "failed to fetch cli."
+        OSTYPE="macos"
+    elif [[ $OSTYPE == 'linux'* ]]; then
+        if [[ $UNAME == 'aarch64' ]]; then
+            ARCH='arm64'
+        fi
+        OSTYPE="linux"
+    else
+        die "unknown OSTYPE: $OSTYPE"
    fi

+    check_command_exists gh
+    TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
+    gh release \
+        --repo lancedb/sophon \
+        download lancedb-cli-v0.0.3 \
+        --pattern "${TARGET}" \
+        || die "failed to fetch cli."
+
    check_command_exists tar
    tar xvf "${TARGET}" || die "tar failed."
    [[ -e ./lancedb ]] || die "failed to extract lancedb."
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -117,7 +117,7 @@ def update_cargo_toml(line_updater):
    lance_line = ""
    is_parsing_lance_line = False
    for line in lines:
-        if line.startswith("lance") and not line.startswith("lance-namespace"):
+        if line.startswith("lance"):
            # Check if this is a single-line or multi-line entry
            # Single-line entries either:
            # 1. End with } (complete inline table)
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -84,7 +84,6 @@ plugins:
        'examples.md': 'https://lancedb.com/docs/tutorials/'
        'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
        'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
-        'guides/storage.md': 'https://lancedb.com/docs/storage/integrations'



@@ -403,4 +402,4 @@ extra:
    - icon: fontawesome/brands/x-twitter
      link: https://twitter.com/lancedb
    - icon: fontawesome/brands/linkedin
-      link: https://www.linkedin.com/company/lancedb
+      link: https://www.linkedin.com/company/lancedb
--- a/docs/src/js/classes/Index.md
+++ b/docs/src/js/classes/Index.md
@@ -194,6 +194,37 @@ currently is also a memory intensive operation.

 ***

+### ivfRq()
+
+```ts
+static ivfRq(options?): Index
+```
+
+Create an IvfRq index
+
+IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
+and organizes them into IVF partitions.
+
+The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
+The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
+between index size (and thus search speed) and index accuracy.
+
+The partitioning process is called IVF and the `num_partitions` parameter controls how
+many groups to create.
+
+Note that training an IVF RQ index on a large dataset is a slow operation and
+currently is also a memory intensive operation.
+
+#### Parameters
+
+* **options?**: `Partial`&lt;[`IvfRqOptions`](../interfaces/IvfRqOptions.md)&gt;
+
+#### Returns
+
+[`Index`](Index.md)
+
+***
+
 ### labelList()

 ```ts
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -68,6 +68,7 @@
 - [IndexStatistics](interfaces/IndexStatistics.md)
 - [IvfFlatOptions](interfaces/IvfFlatOptions.md)
 - [IvfPqOptions](interfaces/IvfPqOptions.md)
+- [IvfRqOptions](interfaces/IvfRqOptions.md)
 - [MergeResult](interfaces/MergeResult.md)
 - [OpenTableOptions](interfaces/OpenTableOptions.md)
 - [OptimizeOptions](interfaces/OptimizeOptions.md)
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.2-beta.2</version>
+        <version>0.22.2-beta.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.2-beta.2</version>
+        <version>0.22.2-beta.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.22.2-beta.2</version>
+    <version>0.22.2-beta.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.22.2-beta.2"
+version = "0.22.2-beta.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/sanitize.test.ts
+++ b/nodejs/test/sanitize.test.ts
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-import * as arrow from "../lancedb/arrow";
-import { sanitizeField, sanitizeType } from "../lancedb/sanitize";
-
-describe("sanitize", function () {
-  describe("sanitizeType function", function () {
-    it("should handle type objects", function () {
-      const type = new arrow.Int32();
-      const result = sanitizeType(type);
-
-      expect(result.typeId).toBe(arrow.Type.Int);
-      expect((result as arrow.Int).bitWidth).toBe(32);
-      expect((result as arrow.Int).isSigned).toBe(true);
-
-      const floatType = {
-        typeId: 3, // Type.Float = 3
-        precision: 2,
-        toString: () => "Float",
-        isFloat: true,
-        isFixedWidth: true,
-      };
-
-      const floatResult = sanitizeType(floatType);
-      expect(floatResult).toBeInstanceOf(arrow.DataType);
-      expect(floatResult.typeId).toBe(arrow.Type.Float);
-
-      const floatResult2 = sanitizeType({ ...floatType, typeId: () => 3 });
-      expect(floatResult2).toBeInstanceOf(arrow.DataType);
-      expect(floatResult2.typeId).toBe(arrow.Type.Float);
-    });
-
-    const allTypeNameTestCases = [
-      ["null", new arrow.Null()],
-      ["binary", new arrow.Binary()],
-      ["utf8", new arrow.Utf8()],
-      ["bool", new arrow.Bool()],
-      ["int8", new arrow.Int8()],
-      ["int16", new arrow.Int16()],
-      ["int32", new arrow.Int32()],
-      ["int64", new arrow.Int64()],
-      ["uint8", new arrow.Uint8()],
-      ["uint16", new arrow.Uint16()],
-      ["uint32", new arrow.Uint32()],
-      ["uint64", new arrow.Uint64()],
-      ["float16", new arrow.Float16()],
-      ["float32", new arrow.Float32()],
-      ["float64", new arrow.Float64()],
-      ["datemillisecond", new arrow.DateMillisecond()],
-      ["dateday", new arrow.DateDay()],
-      ["timenanosecond", new arrow.TimeNanosecond()],
-      ["timemicrosecond", new arrow.TimeMicrosecond()],
-      ["timemillisecond", new arrow.TimeMillisecond()],
-      ["timesecond", new arrow.TimeSecond()],
-      ["intervaldaytime", new arrow.IntervalDayTime()],
-      ["intervalyearmonth", new arrow.IntervalYearMonth()],
-      ["durationnanosecond", new arrow.DurationNanosecond()],
-      ["durationmicrosecond", new arrow.DurationMicrosecond()],
-      ["durationmillisecond", new arrow.DurationMillisecond()],
-      ["durationsecond", new arrow.DurationSecond()],
-    ] as const;
-
-    it.each(allTypeNameTestCases)(
-      'should map type name "%s" to %s',
-      function (name, expected) {
-        const result = sanitizeType(name);
-        expect(result).toBeInstanceOf(expected.constructor);
-      },
-    );
-
-    const caseVariationTestCases = [
-      ["NULL", new arrow.Null()],
-      ["Utf8", new arrow.Utf8()],
-      ["FLOAT32", new arrow.Float32()],
-      ["DaTedAy", new arrow.DateDay()],
-    ] as const;
-
-    it.each(caseVariationTestCases)(
-      'should be case insensitive for type name "%s" mapped to %s',
-      function (name, expected) {
-        const result = sanitizeType(name);
-        expect(result).toBeInstanceOf(expected.constructor);
-      },
-    );
-
-    it("should throw error for unrecognized type name", function () {
-      expect(() => sanitizeType("invalid_type")).toThrow(
-        "Unrecognized type name in schema: invalid_type",
-      );
-    });
-  });
-
-  describe("sanitizeField function", function () {
-    it("should handle field with string type name", function () {
-      const field = sanitizeField({
-        name: "string_field",
-        type: "utf8",
-        nullable: true,
-        metadata: new Map([["key", "value"]]),
-      });
-
-      expect(field).toBeInstanceOf(arrow.Field);
-      expect(field.name).toBe("string_field");
-      expect(field.type).toBeInstanceOf(arrow.Utf8);
-      expect(field.nullable).toBe(true);
-      expect(field.metadata?.get("key")).toBe("value");
-    });
-
-    it("should handle field with type object", function () {
-      const floatType = {
-        typeId: 3, // Float
-        precision: 32,
-      };
-
-      const field = sanitizeField({
-        name: "float_field",
-        type: floatType,
-        nullable: false,
-      });
-
-      expect(field).toBeInstanceOf(arrow.Field);
-      expect(field.name).toBe("float_field");
-      expect(field.type).toBeInstanceOf(arrow.DataType);
-      expect(field.type.typeId).toBe(arrow.Type.Float);
-      expect((field.type as arrow.Float64).precision).toBe(32);
-      expect(field.nullable).toBe(false);
-    });
-
-    it("should handle field with direct Type instance", function () {
-      const field = sanitizeField({
-        name: "bool_field",
-        type: new arrow.Bool(),
-        nullable: true,
-      });
-
-      expect(field).toBeInstanceOf(arrow.Field);
-      expect(field.name).toBe("bool_field");
-      expect(field.type).toBeInstanceOf(arrow.Bool);
-      expect(field.nullable).toBe(true);
-    });
-
-    it("should throw error for invalid field object", function () {
-      expect(() =>
-        sanitizeField({
-          type: "int32",
-          nullable: true,
-        }),
-      ).toThrow(
-        "The field passed in is missing a `type`/`name`/`nullable` property",
-      );
-
-      // Invalid type
-      expect(() =>
-        sanitizeField({
-          name: "invalid",
-          type: { invalid: true },
-          nullable: true,
-        }),
-      ).toThrow("Expected a Type to have a typeId property");
-
-      // Invalid nullable
-      expect(() =>
-        sanitizeField({
-          name: "invalid_nullable",
-          type: "int32",
-          nullable: "not a boolean",
-        }),
-      ).toThrow("The field passed in had a non-boolean `nullable` property");
-    });
-
-    it("should report error for invalid type name", function () {
-      expect(() =>
-        sanitizeField({
-          name: "invalid_field",
-          type: "invalid_type",
-          nullable: true,
-        }),
-      ).toThrow(
-        "Unable to sanitize type for field: invalid_field due to error: Error: Unrecognized type name in schema: invalid_type",
-      );
-    });
-  });
-});
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -10,13 +10,7 @@ import * as arrow16 from "apache-arrow-16";
 import * as arrow17 from "apache-arrow-17";
 import * as arrow18 from "apache-arrow-18";

-import {
-  Connection,
-  MatchQuery,
-  PhraseQuery,
-  Table,
-  connect,
-} from "../lancedb";
+import { MatchQuery, PhraseQuery, Table, connect } from "../lancedb";
 import {
  Table as ArrowTable,
  Field,
@@ -27,8 +21,6 @@ import {
  Int64,
  List,
  Schema,
-  SchemaLike,
-  Type,
  Uint8,
  Utf8,
  makeArrowTable,
@@ -219,7 +211,8 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      },
    );

-    it("should be able to omit nullable fields", async () => {
+    // TODO: https://github.com/lancedb/lancedb/issues/1832
+    it.skip("should be able to omit nullable fields", async () => {
      const db = await connect(tmpDir.name);
      const schema = new arrow.Schema([
        new arrow.Field(
@@ -243,36 +236,23 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      await table.add([data3]);

      let res = await table.query().limit(10).toArray();
-      const resVector = res.map((r) =>
-        r.vector ? Array.from(r.vector) : null,
-      );
+      const resVector = res.map((r) => r.get("vector").toArray());
      expect(resVector).toEqual([null, data2.vector, data3.vector]);
-      const resItem = res.map((r) => r.item);
+      const resItem = res.map((r) => r.get("item").toArray());
      expect(resItem).toEqual(["foo", null, "bar"]);
-      const resPrice = res.map((r) => r.price);
+      const resPrice = res.map((r) => r.get("price").toArray());
      expect(resPrice).toEqual([10.0, 2.0, 3.0]);

      const data4 = { item: "foo" };
      // We can't omit a column if it's not nullable
-      await expect(table.add([data4])).rejects.toThrow(
-        "Append with different schema",
-      );
+      await expect(table.add([data4])).rejects.toThrow("Invalid user input");

      // But we can alter columns to make them nullable
      await table.alterColumns([{ path: "price", nullable: true }]);
      await table.add([data4]);

-      res = (await table.query().limit(10).toArray()).map((r) => ({
-        ...r.toJSON(),
-        vector: r.vector ? Array.from(r.vector) : null,
-      }));
-      // Rust fills missing nullable fields with null
-      expect(res).toEqual([
-        { ...data1, vector: null },
-        { ...data2, item: null },
-        data3,
-        { ...data4, price: null, vector: null },
-      ]);
+      res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
+      expect(res).toEqual([data1, data2, data3, data4]);
    });

    it("should be able to insert nullable data for non-nullable fields", async () => {
@@ -350,43 +330,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      const table = await db.createTable("my_table", data);
      expect(await table.countRows()).toEqual(2);
    });
-
-    it("should allow undefined and omitted nullable vector fields", async () => {
-      // Test for the bug: can't pass undefined or omit vector column
-      const db = await connect("memory://");
-      const schema = new arrow.Schema([
-        new arrow.Field("id", new arrow.Int32(), true),
-        new arrow.Field(
-          "vector",
-          new arrow.FixedSizeList(
-            32,
-            new arrow.Field("item", new arrow.Float32(), true),
-          ),
-          true, // nullable = true
-        ),
-      ]);
-      const table = await db.createEmptyTable("test_table", schema);
-
-      // Should not throw error for undefined value
-      await table.add([{ id: 0, vector: undefined }]);
-
-      // Should not throw error for omitted field
-      await table.add([{ id: 1 }]);
-
-      // Should still work for null
-      await table.add([{ id: 2, vector: null }]);
-
-      // Should still work for actual vector
-      const testVector = new Array(32).fill(0.5);
-      await table.add([{ id: 3, vector: testVector }]);
-      expect(await table.countRows()).toEqual(4);
-
-      const res = await table.query().limit(10).toArray();
-      const resVector = res.map((r) =>
-        r.vector ? Array.from(r.vector) : null,
-      );
-      expect(resVector).toEqual([null, null, null, testVector]);
-    });
  },
 );

@@ -861,6 +804,15 @@ describe("When creating an index", () => {
    });
  });

+  it("should be able to create IVF_RQ", async () => {
+    await tbl.createIndex("vec", {
+      config: Index.ivfRq({
+        numPartitions: 10,
+        numBits: 1,
+      }),
+    });
+  });
+
  it("should allow me to replace (or not) an existing index", async () => {
    await tbl.createIndex("id");
    // Default is replace=true
@@ -1511,9 +1463,7 @@ describe("when optimizing a dataset", () => {

  it("delete unverified", async () => {
    const version = await table.version();
-    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
-      version - 1
-    }.manifest`;
+    const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${version - 1}.manifest`;
    fs.rmSync(versionFile);

    let stats = await table.optimize({ deleteUnverified: false });
@@ -2027,52 +1977,3 @@ describe("column name options", () => {
    expect(results2.length).toBe(10);
  });
 });
-
-describe("when creating an empty table", () => {
-  let con: Connection;
-  beforeEach(async () => {
-    const tmpDir = tmp.dirSync({ unsafeCleanup: true });
-    con = await connect(tmpDir.name);
-  });
-  afterEach(() => {
-    con.close();
-  });
-
-  it("can create an empty table from an arrow Schema", async () => {
-    const schema = new Schema([
-      new Field("id", new Int64()),
-      new Field("vector", new Float64()),
-    ]);
-    const table = await con.createEmptyTable("test", schema);
-    const actualSchema = await table.schema();
-    expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
-    expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
-    expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
-    expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
-  });
-
-  it("can create an empty table from schema that specifies field types by name", async () => {
-    const schemaLike = {
-      fields: [
-        {
-          name: "id",
-          type: "int64",
-          nullable: true,
-        },
-        {
-          name: "vector",
-          type: "float64",
-          nullable: true,
-        },
-      ],
-      metadata: new Map(),
-      names: ["id", "vector"],
-    } satisfies SchemaLike;
-    const table = await con.createEmptyTable("test", schemaLike);
-    const actualSchema = await table.schema();
-    expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
-    expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
-    expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
-    expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
-  });
-});
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -73,7 +73,7 @@ export type FieldLike =
  | {
      type: string;
      name: string;
-      nullable: boolean;
+      nullable?: boolean;
      metadata?: Map<string, string>;
    };

@@ -1285,36 +1285,19 @@ function validateSchemaEmbeddings(
    if (isFixedSizeList(field.type)) {
      field = sanitizeField(field);
      if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
-        // Check if there's an embedding function registered for this field
-        let hasEmbeddingFunction = false;
-
-        // Check schema metadata for embedding functions
        if (schema.metadata.has("embedding_functions")) {
          const embeddings = JSON.parse(
            schema.metadata.get("embedding_functions")!,
          );
-          // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
-          if (embeddings.find((f: any) => f["vectorColumn"] === field.name)) {
-            hasEmbeddingFunction = true;
-          }
-        }
-
-        // Check passed embedding function parameter
-        if (embeddings && embeddings.vectorColumn === field.name) {
-          hasEmbeddingFunction = true;
-        }
-
-        // If the field is nullable AND there's no embedding function, allow undefined/omitted values
-        if (field.nullable && !hasEmbeddingFunction) {
-          fields.push(field);
-        } else {
-          // Either not nullable OR has embedding function - require explicit values
-          if (hasEmbeddingFunction) {
-            // Don't add to missingEmbeddingFields since this is expected to be filled by embedding function
-            fields.push(field);
-          } else {
+          if (
+            // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
+            embeddings.find((f: any) => f["vectorColumn"] === field.name) ===
+            undefined
+          ) {
            missingEmbeddingFields.push(field);
          }
+        } else {
+          missingEmbeddingFields.push(field);
        }
      } else {
        fields.push(field);
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -85,6 +85,7 @@ export {
  Index,
  IndexOptions,
  IvfPqOptions,
+  IvfRqOptions,
  IvfFlatOptions,
  HnswPqOptions,
  HnswSqOptions,
--- a/nodejs/lancedb/indices.ts
+++ b/nodejs/lancedb/indices.ts
@@ -112,6 +112,77 @@ export interface IvfPqOptions {
  sampleRate?: number;
 }

+export interface IvfRqOptions {
+  /**
+   * The number of IVF partitions to create.
+   *
+   * This value should generally scale with the number of rows in the dataset.
+   * By default the number of partitions is the square root of the number of
+   * rows.
+   *
+   * If this value is too large then the first part of the search (picking the
+   * right partition) will be slow. If this value is too small then the second
+   * part of the search (searching within a partition) will be slow.
+   */
+  numPartitions?: number;
+
+  /**
+   * Number of bits per dimension for residual quantization.
+   *
+   * This value controls how much each residual component is compressed. The more
+   * bits, the more accurate the index will be but the slower search. Typical values
+   * are small integers; the default is 1 bit per dimension.
+   */
+  numBits?: number;
+
+  /**
+   * Distance type to use to build the index.
+   *
+   * Default value is "l2".
+   *
+   * This is used when training the index to calculate the IVF partitions
+   * (vectors are grouped in partitions with similar vectors according to this
+   * distance type) and during quantization.
+   *
+   * The distance type used to train an index MUST match the distance type used
+   * to search the index. Failure to do so will yield inaccurate results.
+   *
+   * The following distance types are available:
+   *
+   * "l2" - Euclidean distance.
+   * "cosine" - Cosine distance.
+   * "dot" - Dot product.
+   */
+  distanceType?: "l2" | "cosine" | "dot";
+
+  /**
+   * Max iterations to train IVF kmeans.
+   *
+   * When training an IVF index we use kmeans to calculate the partitions. This parameter
+   * controls how many iterations of kmeans to run.
+   *
+   * The default value is 50.
+   */
+  maxIterations?: number;
+
+  /**
+   * The number of vectors, per partition, to sample when training IVF kmeans.
+   *
+   * When an IVF index is trained, we need to calculate partitions. These are groups
+   * of vectors that are similar to each other. To do this we use an algorithm called kmeans.
+   *
+   * Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
+   * random sample of the data. This parameter controls the size of the sample. The total
+   * number of vectors used to train the index is `sample_rate * num_partitions`.
+   *
+   * Increasing this value might improve the quality of the index but in most cases the
+   * default should be sufficient.
+   *
+   * The default value is 256.
+   */
+  sampleRate?: number;
+}
+
 /**
 * Options to create an `HNSW_PQ` index
 */
@@ -523,6 +594,35 @@ export class Index {
        options?.distanceType,
        options?.numPartitions,
        options?.numSubVectors,
+        options?.numBits,
+        options?.maxIterations,
+        options?.sampleRate,
+      ),
+    );
+  }
+
+  /**
+   * Create an IvfRq index
+   *
+   * IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
+   * and organizes them into IVF partitions.
+   *
+   * The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
+   * The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
+   * between index size (and thus search speed) and index accuracy.
+   *
+   * The partitioning process is called IVF and the `num_partitions` parameter controls how
+   * many groups to create.
+   *
+   * Note that training an IVF RQ index on a large dataset is a slow operation and
+   * currently is also a memory intensive operation.
+   */
+  static ivfRq(options?: Partial<IvfRqOptions>) {
+    return new Index(
+      LanceDbIndex.ivfRq(
+        options?.distanceType,
+        options?.numPartitions,
+        options?.numBits,
        options?.maxIterations,
        options?.sampleRate,
      ),
--- a/nodejs/lancedb/sanitize.ts
+++ b/nodejs/lancedb/sanitize.ts
@@ -326,9 +326,6 @@ export function sanitizeDictionary(typeLike: object) {

 // biome-ignore lint/suspicious/noExplicitAny: skip
 export function sanitizeType(typeLike: unknown): DataType<any> {
-  if (typeof typeLike === "string") {
-    return dataTypeFromName(typeLike);
-  }
  if (typeof typeLike !== "object" || typeLike === null) {
    throw Error("Expected a Type but object was null/undefined");
  }
@@ -450,7 +447,7 @@ export function sanitizeType(typeLike: unknown): DataType<any> {
    case Type.DurationSecond:
      return new DurationSecond();
    default:
-      throw new Error("Unrecognized type id in schema: " + typeId);
+      throw new Error("Unrecoginized type id in schema: " + typeId);
  }
 }

@@ -470,15 +467,7 @@ export function sanitizeField(fieldLike: unknown): Field {
      "The field passed in is missing a `type`/`name`/`nullable` property",
    );
  }
-  let type: DataType;
-  try {
-    type = sanitizeType(fieldLike.type);
-  } catch (error: unknown) {
-    throw Error(
-      `Unable to sanitize type for field: ${fieldLike.name} due to error: ${error}`,
-      { cause: error },
-    );
-  }
+  const type = sanitizeType(fieldLike.type);
  const name = fieldLike.name;
  if (!(typeof name === "string")) {
    throw Error("The field passed in had a non-string `name` property");
@@ -592,46 +581,3 @@ function sanitizeData(
    },
  );
 }
-
-const constructorsByTypeName = {
-  null: () => new Null(),
-  binary: () => new Binary(),
-  utf8: () => new Utf8(),
-  bool: () => new Bool(),
-  int8: () => new Int8(),
-  int16: () => new Int16(),
-  int32: () => new Int32(),
-  int64: () => new Int64(),
-  uint8: () => new Uint8(),
-  uint16: () => new Uint16(),
-  uint32: () => new Uint32(),
-  uint64: () => new Uint64(),
-  float16: () => new Float16(),
-  float32: () => new Float32(),
-  float64: () => new Float64(),
-  datemillisecond: () => new DateMillisecond(),
-  dateday: () => new DateDay(),
-  timenanosecond: () => new TimeNanosecond(),
-  timemicrosecond: () => new TimeMicrosecond(),
-  timemillisecond: () => new TimeMillisecond(),
-  timesecond: () => new TimeSecond(),
-  intervaldaytime: () => new IntervalDayTime(),
-  intervalyearmonth: () => new IntervalYearMonth(),
-  durationnanosecond: () => new DurationNanosecond(),
-  durationmicrosecond: () => new DurationMicrosecond(),
-  durationmillisecond: () => new DurationMillisecond(),
-  durationsecond: () => new DurationSecond(),
-} as const;
-
-type MappableTypeName = keyof typeof constructorsByTypeName;
-
-export function dataTypeFromName(typeName: string): DataType {
-  const normalizedTypeName = typeName.toLowerCase() as MappableTypeName;
-  const _constructor = constructorsByTypeName[normalizedTypeName];
-
-  if (!_constructor) {
-    throw new Error("Unrecognized type name in schema: " + typeName);
-  }
-
-  return _constructor();
-}
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.22.2-beta.2",
+	"version": "0.22.2-beta.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.22.2-beta.2",
+	"version": "0.22.2-beta.0",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.22.2-beta.2",
+	"version": "0.22.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.22.2-beta.2",
+	"version": "0.22.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.22.2-beta.2",
+	"version": "0.22.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.22.2-beta.2",
+	"version": "0.22.2-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.22.2-beta.2",
+  "version": "0.22.2-beta.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.22.2-beta.2",
+	"version": "0.22.2-beta.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.22.2-beta.2",
+  "version": "0.22.2-beta.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.22.2-beta.2",
+      "version": "0.22.2-beta.0",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.22.2-beta.2",
+  "version": "0.22.2-beta.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/index.rs
+++ b/nodejs/src/index.rs
@@ -6,6 +6,7 @@ use std::sync::Mutex;
 use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
 use lancedb::index::vector::{
    IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
+    IvfRqIndexBuilder,
 };
 use lancedb::index::Index as LanceDbIndex;
 use napi_derive::napi;
@@ -65,6 +66,36 @@ impl Index {
        })
    }

+    #[napi(factory)]
+    pub fn ivf_rq(
+        distance_type: Option<String>,
+        num_partitions: Option<u32>,
+        num_bits: Option<u32>,
+        max_iterations: Option<u32>,
+        sample_rate: Option<u32>,
+    ) -> napi::Result<Self> {
+        let mut ivf_rq_builder = IvfRqIndexBuilder::default();
+        if let Some(distance_type) = distance_type {
+            let distance_type = parse_distance_type(distance_type)?;
+            ivf_rq_builder = ivf_rq_builder.distance_type(distance_type);
+        }
+        if let Some(num_partitions) = num_partitions {
+            ivf_rq_builder = ivf_rq_builder.num_partitions(num_partitions);
+        }
+        if let Some(num_bits) = num_bits {
+            ivf_rq_builder = ivf_rq_builder.num_bits(num_bits);
+        }
+        if let Some(max_iterations) = max_iterations {
+            ivf_rq_builder = ivf_rq_builder.max_iterations(max_iterations);
+        }
+        if let Some(sample_rate) = sample_rate {
+            ivf_rq_builder = ivf_rq_builder.sample_rate(sample_rate);
+        }
+        Ok(Self {
+            inner: Mutex::new(Some(LanceDbIndex::IvfRq(ivf_rq_builder))),
+        })
+    }
+
    #[napi(factory)]
    pub fn ivf_flat(
        distance_type: Option<String>,
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.25.2"
+current_version = "0.25.2-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.25.2"
+version = "0.25.2-beta.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
@@ -14,12 +14,12 @@ name = "_lancedb"
 crate-type = ["cdylib"]

 [dependencies]
-arrow = { version = "56.2", features = ["pyarrow"] }
+arrow = { version = "55.1", features = ["pyarrow"] }
 async-trait = "0.1"
 lancedb = { path = "../rust/lancedb", default-features = false }
 env_logger.workspace = true
-pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
-pyo3-async-runtimes = { version = "0.25", features = [
+pyo3 = { version = "0.24", features = ["extension-module", "abi3-py39"] }
+pyo3-async-runtimes = { version = "0.24", features = [
    "attributes",
    "tokio-runtime",
 ] }
@@ -28,7 +28,7 @@ futures.workspace = true
 tokio = { version = "1.40", features = ["sync"] }

 [build-dependencies]
-pyo3-build-config = { version = "0.25", features = [
+pyo3-build-config = { version = "0.24", features = [
    "extension-module",
    "abi3-py39",
 ] }
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -5,12 +5,12 @@ dynamic = ["version"]
 dependencies = [
    "deprecation",
    "numpy",
-    "overrides>=0.7; python_version<'3.12'",
+    "overrides>=0.7",
    "packaging",
    "pyarrow>=16",
    "pydantic>=1.10",
    "tqdm>=4.27.0",
-    "lance-namespace>=0.0.16"
+    "lance-namespace==0.0.6"
 ]
 description = "lancedb"
 authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -133,7 +133,6 @@ class Tags:
    async def update(self, tag: str, version: int): ...

 class IndexConfig:
-    name: str
    index_type: str
    columns: List[str]

--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -6,18 +6,10 @@ from __future__ import annotations

 from abc import abstractmethod
 from pathlib import Path
-import sys
 from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union

-if sys.version_info >= (3, 12):
-    from typing import override
-
-    class EnforceOverrides:
-        pass
-else:
-    from overrides import EnforceOverrides, override  # type: ignore
-
 from lancedb.embeddings.registry import EmbeddingFunctionRegistry
+from overrides import EnforceOverrides, override  # type: ignore

 from lancedb.common import data_to_reader, sanitize_uri, validate_schema
 from lancedb.background_loop import LOOP
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -605,9 +605,53 @@ class IvfPq:
    target_partition_size: Optional[int] = None


+@dataclass
+class IvfRq:
+    """Describes an IVF RQ Index
+
+    IVF-RQ (Residual Quantization) stores a compressed copy of each vector using
+    residual quantization and organizes them into IVF partitions. Parameters
+    largely mirror IVF-PQ for consistency.
+
+    Attributes
+    ----------
+    distance_type: str, default "l2"
+        Distance metric used to train the index and for quantization.
+
+        The following distance types are available:
+
+        "l2" - Euclidean distance.
+        "cosine" - Cosine distance.
+        "dot" - Dot product.
+
+    num_partitions: int, default sqrt(num_rows)
+        Number of IVF partitions to create.
+
+    num_bits: int, default 1
+        Number of bits to encode each dimension.
+
+    max_iterations: int, default 50
+        Max iterations to train kmeans when computing IVF partitions.
+
+    sample_rate: int, default 256
+        Controls the number of training vectors: sample_rate * num_partitions.
+
+    target_partition_size, default is 8192
+        Target size of each partition.
+    """
+
+    distance_type: Literal["l2", "cosine", "dot"] = "l2"
+    num_partitions: Optional[int] = None
+    num_bits: int = 1
+    max_iterations: int = 50
+    sample_rate: int = 256
+    target_partition_size: Optional[int] = None
+
+
 __all__ = [
    "BTree",
    "IvfPq",
+    "IvfRq",
    "IvfFlat",
    "HnswPq",
    "HnswSq",
--- a/python/python/lancedb/namespace.py
+++ b/python/python/lancedb/namespace.py
@@ -12,18 +12,13 @@ from __future__ import annotations

 from typing import Dict, Iterable, List, Optional, Union
 import os
-import sys
-
-if sys.version_info >= (3, 12):
-    from typing import override
-else:
-    from overrides import override

 from lancedb.db import DBConnection
 from lancedb.table import LanceTable, Table
 from lancedb.util import validate_table_name
 from lancedb.common import validate_schema
 from lancedb.table import sanitize_create_table
+from overrides import override

 from lance_namespace import LanceNamespace, connect as namespace_connect
 from lance_namespace_urllib3_client.models import (
--- a/python/python/lancedb/remote/db.py
+++ b/python/python/lancedb/remote/db.py
@@ -5,20 +5,15 @@
 from datetime import timedelta
 import logging
 from concurrent.futures import ThreadPoolExecutor
-import sys
 from typing import Any, Dict, Iterable, List, Optional, Union
 from urllib.parse import urlparse
 import warnings

-if sys.version_info >= (3, 12):
-    from typing import override
-else:
-    from overrides import override
-
 # Remove this import to fix circular dependency
 # from lancedb import connect_async
 from lancedb.remote import ClientConfig
 import pyarrow as pa
+from overrides import override

 from ..common import DATA
 from ..db import DBConnection, LOOP
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -114,7 +114,7 @@ class RemoteTable(Table):
        index_type: Literal["BTREE", "BITMAP", "LABEL_LIST", "scalar"] = "scalar",
        *,
        replace: bool = False,
-        wait_timeout: Optional[timedelta] = None,
+        wait_timeout: timedelta = None,
        name: Optional[str] = None,
    ):
        """Creates a scalar index
@@ -153,7 +153,7 @@ class RemoteTable(Table):
        column: str,
        *,
        replace: bool = False,
-        wait_timeout: Optional[timedelta] = None,
+        wait_timeout: timedelta = None,
        with_position: bool = False,
        # tokenizer configs:
        base_tokenizer: str = "simple",
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -44,7 +44,7 @@ import numpy as np

 from .common import DATA, VEC, VECTOR_COLUMN_NAME
 from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
-from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
+from .index import BTree, IvfFlat, IvfPq, Bitmap, IvfRq, LabelList, HnswPq, HnswSq, FTS
 from .merge import LanceMergeInsertBuilder
 from .pydantic import LanceModel, model_to_dict
 from .query import (
@@ -1991,7 +1991,7 @@ class LanceTable(Table):
        index_cache_size: Optional[int] = None,
        num_bits: int = 8,
        index_type: Literal[
-            "IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
+            "IVF_FLAT", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"
        ] = "IVF_PQ",
        max_iterations: int = 50,
        sample_rate: int = 256,
@@ -2039,6 +2039,15 @@ class LanceTable(Table):
                sample_rate=sample_rate,
                target_partition_size=target_partition_size,
            )
+        elif index_type == "IVF_RQ":
+            config = IvfRq(
+                distance_type=metric,
+                num_partitions=num_partitions,
+                num_bits=num_bits,
+                max_iterations=max_iterations,
+                sample_rate=sample_rate,
+                target_partition_size=target_partition_size,
+            )
        elif index_type == "IVF_HNSW_PQ":
            config = HnswPq(
                distance_type=metric,
@@ -3330,7 +3339,7 @@ class AsyncTable:
        *,
        replace: Optional[bool] = None,
        config: Optional[
-            Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
+            Union[IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
        ] = None,
        wait_timeout: Optional[timedelta] = None,
        name: Optional[str] = None,
@@ -3369,11 +3378,12 @@ class AsyncTable:
        """
        if config is not None:
            if not isinstance(
-                config, (IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS)
+                config,
+                (IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS),
            ):
                raise TypeError(
-                    "config must be an instance of IvfPq, HnswPq, HnswSq, BTree,"
-                    " Bitmap, LabelList, or FTS"
+                    "config must be an instance of IvfPq, IvfRq, HnswPq, HnswSq, BTree,"
+                    " Bitmap, LabelList, or FTS, but got " + str(type(config))
                )
        try:
            await self._inner.create_index(
--- a/python/python/lancedb/types.py
+++ b/python/python/lancedb/types.py
@@ -18,10 +18,17 @@ AddMode = Literal["append", "overwrite"]
 CreateMode = Literal["create", "overwrite"]

 # Index type literals
-VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"]
+VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ", "IVF_RQ"]
 ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"]
 IndexType = Literal[
-    "IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST"
+    "IVF_PQ",
+    "IVF_HNSW_PQ",
+    "IVF_HNSW_SQ",
+    "FTS",
+    "BTREE",
+    "BITMAP",
+    "LABEL_LIST",
+    "IVF_RQ",
 ]

 # Tokenizer literals
--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -8,7 +8,17 @@ import pyarrow as pa
 import pytest
 import pytest_asyncio
 from lancedb import AsyncConnection, AsyncTable, connect_async
-from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
+from lancedb.index import (
+    BTree,
+    IvfFlat,
+    IvfPq,
+    IvfRq,
+    Bitmap,
+    LabelList,
+    HnswPq,
+    HnswSq,
+    FTS,
+)


@pytest_asyncio.fixture
@@ -195,6 +205,16 @@ async def test_create_4bit_ivfpq_index(some_table: AsyncTable):
    assert stats.loss >= 0.0


+@pytest.mark.asyncio
+async def test_create_ivfrq_index(some_table: AsyncTable):
+    await some_table.create_index("vector", config=IvfRq(num_bits=1))
+    indices = await some_table.list_indices()
+    assert len(indices) == 1
+    assert indices[0].index_type == "IvfRq"
+    assert indices[0].columns == ["vector"]
+    assert indices[0].name == "vector_idx"
+
+
@pytest.mark.asyncio
 async def test_create_hnswpq_index(some_table: AsyncTable):
    await some_table.create_index("vector", config=HnswPq(num_partitions=10))
--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use lancedb::index::vector::IvfFlatIndexBuilder;
+use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder};
 use lancedb::index::{
    scalar::{BTreeIndexBuilder, FtsIndexBuilder},
    vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
@@ -87,6 +87,22 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
                }
                Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
            },
+            "IvfRq" => {
+                let params = source.extract::<IvfRqParams>()?;
+                let distance_type = parse_distance_type(params.distance_type)?;
+                let mut ivf_rq_builder = IvfRqIndexBuilder::default()
+                    .distance_type(distance_type)
+                    .max_iterations(params.max_iterations)
+                    .sample_rate(params.sample_rate)
+                    .num_bits(params.num_bits);
+                if let Some(num_partitions) = params.num_partitions {
+                    ivf_rq_builder = ivf_rq_builder.num_partitions(num_partitions);
+                }
+                if let Some(target_partition_size) = params.target_partition_size {
+                    ivf_rq_builder = ivf_rq_builder.target_partition_size(target_partition_size);
+                }
+                Ok(LanceDbIndex::IvfRq(ivf_rq_builder))
+            },
            "HnswPq" => {
                let params = source.extract::<IvfHnswPqParams>()?;
                let distance_type = parse_distance_type(params.distance_type)?;
@@ -170,6 +186,16 @@ struct IvfPqParams {
    target_partition_size: Option<u32>,
 }

+#[derive(FromPyObject)]
+struct IvfRqParams {
+    distance_type: String,
+    num_partitions: Option<u32>,
+    num_bits: u32,
+    max_iterations: u32,
+    sample_rate: u32,
+    target_partition_size: Option<u32>,
+}
+
 #[derive(FromPyObject)]
 struct IvfHnswPqParams {
    distance_type: String,
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.22.2-beta.2"
+version = "0.22.2-beta.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/data/inspect.rs
+++ b/rust/lancedb/src/data/inspect.rs
@@ -52,13 +52,13 @@ pub fn infer_vector_columns(
    for field in reader.schema().fields() {
        match field.data_type() {
            DataType::FixedSizeList(sub_field, _) if sub_field.data_type().is_floating() => {
-                columns.push(field.name().clone());
+                columns.push(field.name().to_string());
            }
            DataType::List(sub_field) if sub_field.data_type().is_floating() && !strict => {
-                columns_to_infer.insert(field.name().clone(), None);
+                columns_to_infer.insert(field.name().to_string(), None);
            }
            DataType::LargeList(sub_field) if sub_field.data_type().is_floating() && !strict => {
-                columns_to_infer.insert(field.name().clone(), None);
+                columns_to_infer.insert(field.name().to_string(), None);
            }
            _ => {}
        }
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -718,9 +718,9 @@ impl Database for ListingDatabase {
            .map_err(|e| Error::Lance { source: e })?;

        let version_ref = match (request.source_version, request.source_tag) {
-            (Some(v), None) => Ok(Ref::Version(None, Some(v))),
+            (Some(v), None) => Ok(Ref::Version(v)),
            (None, Some(tag)) => Ok(Ref::Tag(tag)),
-            (None, None) => Ok(Ref::Version(None, Some(source_dataset.version().version))),
+            (None, None) => Ok(Ref::Version(source_dataset.version().version)),
            _ => Err(Error::InvalidInput {
                message: "Cannot specify both source_version and source_tag".to_string(),
            }),
--- a/rust/lancedb/src/database/namespace.rs
+++ b/rust/lancedb/src/database/namespace.rs
@@ -261,7 +261,7 @@ impl Database for LanceNamespaceDatabase {
                    return listing_db
                        .open_table(OpenTableRequest {
                            name: request.name.clone(),
-                            namespace: vec![],
+                            namespace: request.namespace.clone(),
                            index_cache_size: None,
                            lance_read_params: None,
                        })
@@ -305,14 +305,7 @@ impl Database for LanceNamespaceDatabase {
            )
            .await?;

-        let create_request = DbCreateTableRequest {
-            name: request.name,
-            namespace: vec![],
-            data: request.data,
-            mode: request.mode,
-            write_options: request.write_options,
-        };
-        listing_db.create_table(create_request).await
+        listing_db.create_table(request).await
    }

    async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
@@ -339,13 +332,7 @@ impl Database for LanceNamespaceDatabase {
            .create_listing_database(&request.name, &location, response.storage_options)
            .await?;

-        let open_request = OpenTableRequest {
-            name: request.name.clone(),
-            namespace: vec![],
-            index_cache_size: request.index_cache_size,
-            lance_read_params: request.lance_read_params,
-        };
-        listing_db.open_table(open_request).await
+        listing_db.open_table(request).await
    }

    async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {
--- a/rust/lancedb/src/index.rs
+++ b/rust/lancedb/src/index.rs
@@ -8,6 +8,7 @@ use std::sync::Arc;
 use std::time::Duration;
 use vector::IvfFlatIndexBuilder;

+use crate::index::vector::IvfRqIndexBuilder;
 use crate::{table::BaseTable, DistanceType, Error, Result};

 use self::{
@@ -53,6 +54,9 @@ pub enum Index {
    /// IVF index with Product Quantization
    IvfPq(IvfPqIndexBuilder),

+    /// IVF index with RabitQ Quantization
+    IvfRq(IvfRqIndexBuilder),
+
    /// IVF-HNSW index with Product Quantization
    /// It is a variant of the HNSW algorithm that uses product quantization to compress the vectors.
    IvfHnswPq(IvfHnswPqIndexBuilder),
@@ -275,6 +279,8 @@ pub enum IndexType {
    IvfFlat,
    #[serde(alias = "IVF_PQ")]
    IvfPq,
+    #[serde(alias = "IVF_RQ")]
+    IvfRq,
    #[serde(alias = "IVF_HNSW_PQ")]
    IvfHnswPq,
    #[serde(alias = "IVF_HNSW_SQ")]
@@ -296,6 +302,7 @@ impl std::fmt::Display for IndexType {
        match self {
            Self::IvfFlat => write!(f, "IVF_FLAT"),
            Self::IvfPq => write!(f, "IVF_PQ"),
+            Self::IvfRq => write!(f, "IVF_RQ"),
            Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
            Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
            Self::BTree => write!(f, "BTREE"),
@@ -317,6 +324,7 @@ impl std::str::FromStr for IndexType {
            "FTS" | "INVERTED" => Ok(Self::FTS),
            "IVF_FLAT" => Ok(Self::IvfFlat),
            "IVF_PQ" => Ok(Self::IvfPq),
+            "IVF_RQ" => Ok(Self::IvfRq),
            "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq),
            "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq),
            _ => Err(Error::InvalidInput {
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -291,6 +291,52 @@ pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
    }
 }

+/// Builder for an IVF RQ index.
+///
+/// This index stores a compressed (quantized) copy of every vector. Each dimension
+/// is quantized into a small number of bits.
+/// The parameters `num_bits` control this process, providing a tradeoff
+/// between index size (and thus search speed) and index accuracy.
+///
+/// The partitioning process is called IVF and the `num_partitions` parameter controls how
+/// many groups to create.
+///
+/// Note that training an IVF RQ index on a large dataset is a slow operation and
+/// currently is also a memory intensive operation.
+#[derive(Debug, Clone)]
+pub struct IvfRqIndexBuilder {
+    // IVF
+    pub(crate) distance_type: DistanceType,
+    pub(crate) num_partitions: Option<u32>,
+    pub(crate) num_bits: Option<u32>,
+    pub(crate) sample_rate: u32,
+    pub(crate) max_iterations: u32,
+    pub(crate) target_partition_size: Option<u32>,
+}
+
+impl Default for IvfRqIndexBuilder {
+    fn default() -> Self {
+        Self {
+            distance_type: DistanceType::L2,
+            num_partitions: None,
+            num_bits: None,
+            sample_rate: 256,
+            max_iterations: 50,
+            target_partition_size: None,
+        }
+    }
+}
+
+impl IvfRqIndexBuilder {
+    impl_distance_type_setter!();
+    impl_ivf_params_setter!();
+
+    pub fn num_bits(mut self, num_bits: u32) -> Self {
+        self.num_bits = Some(num_bits);
+        self
+    }
+}
+
 /// Builder for an IVF HNSW PQ index.
 ///
 /// This index is a combination of IVF and HNSW.
--- a/rust/lancedb/src/remote/db.rs
+++ b/rust/lancedb/src/remote/db.rs
@@ -647,7 +647,7 @@ impl From<StorageOptions> for RemoteOptions {
        let mut filtered = HashMap::new();
        for opt in supported_opts {
            if let Some(v) = options.0.get(opt) {
-                filtered.insert(opt.to_string(), v.clone());
+                filtered.insert(opt.to_string(), v.to_string());
            }
        }
        Self::new(filtered)
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -1383,35 +1383,30 @@ impl Table {
 }

 pub struct NativeTags {
-    dataset: dataset::DatasetConsistencyWrapper,
+    inner: LanceTags,
 }
 #[async_trait]
 impl Tags for NativeTags {
    async fn list(&self) -> Result<HashMap<String, TagContents>> {
-        let dataset = self.dataset.get().await?;
-        Ok(dataset.tags().list().await?)
+        Ok(self.inner.list().await?)
    }

    async fn get_version(&self, tag: &str) -> Result<u64> {
-        let dataset = self.dataset.get().await?;
-        Ok(dataset.tags().get_version(tag).await?)
+        Ok(self.inner.get_version(tag).await?)
    }

    async fn create(&mut self, tag: &str, version: u64) -> Result<()> {
-        let dataset = self.dataset.get().await?;
-        dataset.tags().create(tag, version).await?;
+        self.inner.create(tag, version).await?;
        Ok(())
    }

    async fn delete(&mut self, tag: &str) -> Result<()> {
-        let dataset = self.dataset.get().await?;
-        dataset.tags().delete(tag).await?;
+        self.inner.delete(tag).await?;
        Ok(())
    }

    async fn update(&mut self, tag: &str, version: u64) -> Result<()> {
-        let dataset = self.dataset.get().await?;
-        dataset.tags().update(tag, version).await?;
+        self.inner.update(tag, version).await?;
        Ok(())
    }
 }
@@ -1785,13 +1780,13 @@ impl NativeTable {
                        BuiltinIndexType::BTree,
                    )))
                } else {
-                    Err(Error::InvalidInput {
+                    return Err(Error::InvalidInput {
                        message: format!(
                            "there are no indices supported for the field `{}` with the data type {}",
                            field.name(),
                            field.data_type()
                        ),
-                    })?
+                    });
                }
            }
            Index::BTree(_) => {
@@ -1843,6 +1838,18 @@ impl NativeTable {
                );
                Ok(Box::new(lance_idx_params))
            }
+            Index::IvfRq(index) => {
+                Self::validate_index_type(field, "IVF RQ", supported_vector_data_type)?;
+                let num_partitions = self
+                    .get_num_partitions(index.num_partitions, false, None)
+                    .await?;
+                let lance_idx_params = VectorIndexParams::ivf_rq(
+                    num_partitions as usize,
+                    index.num_bits.unwrap_or(1) as u8,
+                    index.distance_type.into(),
+                );
+                Ok(Box::new(lance_idx_params))
+            }
            Index::IvfHnswPq(index) => {
                Self::validate_index_type(field, "IVF HNSW PQ", supported_vector_data_type)?;
                let dim = Self::get_vector_dimension(field)?;
@@ -1912,9 +1919,11 @@ impl NativeTable {
            Index::Bitmap(_) => IndexType::Bitmap,
            Index::LabelList(_) => IndexType::LabelList,
            Index::FTS(_) => IndexType::Inverted,
-            Index::IvfFlat(_) | Index::IvfPq(_) | Index::IvfHnswPq(_) | Index::IvfHnswSq(_) => {
-                IndexType::Vector
-            }
+            Index::IvfFlat(_)
+            | Index::IvfPq(_)
+            | Index::IvfRq(_)
+            | Index::IvfHnswPq(_)
+            | Index::IvfHnswSq(_) => IndexType::Vector,
        }
    }

@@ -2445,8 +2454,10 @@ impl BaseTable for NativeTable {
    }

    async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
+        let dataset = self.dataset.get().await?;
+
        Ok(Box::new(NativeTags {
-            dataset: self.dataset.clone(),
+            inner: dataset.tags.clone(),
        }))
    }

--- a/rust/lancedb/src/table/datafusion.rs
+++ b/rust/lancedb/src/table/datafusion.rs
@@ -172,7 +172,7 @@ impl TableProvider for BaseTableAdapter {
        if let Some(projection) = projection {
            let field_names = projection
                .iter()
-                .map(|i| self.schema.field(*i).name().clone())
+                .map(|i| self.schema.field(*i).name().to_string())
                .collect();
            query.select = Select::Columns(field_names);
        }
--- a/rust/lancedb/src/table/dataset.rs
+++ b/rust/lancedb/src/table/dataset.rs
@@ -98,9 +98,8 @@ impl DatasetRef {
            }
            Self::TimeTravel { dataset, version } => {
                let should_checkout = match &target_ref {
-                    refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
-                    refs::Ref::Version(_, None) => true, // No specific version, always checkout
-                    refs::Ref::Tag(_) => true,           // Always checkout for tags
+                    refs::Ref::Version(target_ver) => version != target_ver,
+                    refs::Ref::Tag(_) => true, // Always checkout for tags
                };

                if should_checkout {
--- a/rust/lancedb/src/utils.rs
+++ b/rust/lancedb/src/utils.rs
@@ -39,7 +39,7 @@ impl PatchStoreParam for Option<ObjectStoreParams> {
        let mut params = self.unwrap_or_default();
        if params.object_store_wrapper.is_some() {
            return Err(Error::Other {
-                message: "can not patch param because object store is already set".into(),
+                message: "can not patch param because object store is already set.".into(),
                source: None,
            });
        }
@@ -174,7 +174,7 @@ pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result
            ),
        })
    } else {
-        Ok(candidates[0].clone())
+        Ok(candidates[0].to_string())
    }
 }
Author	SHA1	Message	Date
Colin P. McCabe	d6ea17073c	test	2025-09-30 11:58:20 -07:00
BubbleCal	c123bbf391	Merge branch 'main' of https://github.com/lancedb/lancedb into add-ivfrq	2025-09-30 16:30:58 +08:00
BubbleCal	fb856005a9	update docs Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-09-29 18:24:58 +08:00
BubbleCal	5c1c2e2dd6	fmt Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-09-29 17:47:59 +08:00
BubbleCal	1beef5f6e3	fix Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-09-29 17:08:12 +08:00
BubbleCal	0913632584	feat: support IVF_RQ index type Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-09-29 16:53:43 +08:00