Bump version: 0.25.1 → 0.25.2-beta.0

feat(rust): support namespace backed database (#2664 )
This PR adds support for namespace-backed databases through lance-namespace integration, enabling centralized table management through namespace APIs. --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-12-24 13:59:58 +00:00 · 2025-09-24 22:54:09 +00:00 · 2025-09-24 15:33:31 -07:00 · 2025-09-24 12:50:21 -07:00 · 2025-09-24 09:46:04 -07:00 · 2025-09-23 18:49:42 -07:00
49 changed files with 2861 additions and 335 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.22.1-beta.2"
+current_version = "0.22.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -116,7 +116,7 @@ jobs:
        set -e
        npm ci
        npm run docs
-        if ! git diff --exit-code -- . ':(exclude)Cargo.lock'; then
+        if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
          echo "Docs need to be updated"
          echo "Run 'npm run docs', fix any warnings, and commit the changes."
          exit 1
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,14 +15,15 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.35.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-io = { "version" = "=0.35.0", default-features = false, "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-index = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-linalg = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-table = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-testing = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-datafusion = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
-lance-encoding = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
+lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"] }
+lance-io = { "version" = "=0.37.0", default-features = false }
+lance-index = "=0.37.0"
+lance-linalg = "=0.37.0"
+lance-table = "=0.37.0"
+lance-testing = "=0.37.0"
+lance-datafusion = "=0.37.0"
+lance-encoding = "=0.37.0"
+lance-namespace = "0.0.15"
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -1,4 +1,5 @@
 import argparse
+import re
 import sys
 import json

@@ -18,8 +19,12 @@ def run_command(command: str) -> str:

 def get_latest_stable_version() -> str:
    version_line = run_command("cargo info lance | grep '^version:'")
-    version = version_line.split(" ")[1].strip()
-    return version
+    # Example output: "version: 0.35.0 (latest 0.37.0)"
+    match = re.search(r'\(latest ([0-9.]+)\)', version_line)
+    if match:
+        return match.group(1)
+    # Fallback: use the first version after 'version:'
+    return version_line.split("version:")[1].split()[0].strip()


 def get_latest_preview_version() -> str:
--- a/docs/src/js/classes/Connection.md
+++ b/docs/src/js/classes/Connection.md
@@ -25,6 +25,51 @@ the underlying connection has been closed.

 ## Methods

+### cloneTable()
+
+```ts
+abstract cloneTable(
+   targetTableName,
+   sourceUri,
+   options?): Promise<Table>
+```
+
+Clone a table from a source table.
+
+A shallow clone creates a new table that shares the underlying data files
+with the source table but has its own independent manifest. This allows
+both the source and cloned tables to evolve independently while initially
+sharing the same data, deletion, and index files.
+
+#### Parameters
+
+* **targetTableName**: `string`
+    The name of the target table to create.
+
+* **sourceUri**: `string`
+    The URI of the source table to clone from.
+
+* **options?**
+    Clone options.
+
+* **options.isShallow?**: `boolean`
+    Whether to perform a shallow clone (defaults to true).
+
+* **options.sourceTag?**: `string`
+    The tag of the source table to clone.
+
+* **options.sourceVersion?**: `number`
+    The version of the source table to clone.
+
+* **options.targetNamespace?**: `string`[]
+    The namespace for the target table (defaults to root namespace).
+
+#### Returns
+
+`Promise`&lt;[`Table`](Table.md)&gt;
+
+***
+
 ### close()

 ```ts
--- a/docs/src/js/functions/makeArrowTable.md
+++ b/docs/src/js/functions/makeArrowTable.md
@@ -13,7 +13,7 @@ function makeArrowTable(
   metadata?): ArrowTable
 ```

-An enhanced version of the makeTable function from Apache Arrow
+An enhanced version of the apache-arrow makeTable function from Apache Arrow
 that supports nested fields and embeddings columns.

 (typically you do not need to call this function.  It will be called automatically
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -78,6 +78,7 @@
 - [TableNamesOptions](interfaces/TableNamesOptions.md)
 - [TableStatistics](interfaces/TableStatistics.md)
 - [TimeoutConfig](interfaces/TimeoutConfig.md)
+- [TlsConfig](interfaces/TlsConfig.md)
 - [TokenResponse](interfaces/TokenResponse.md)
 - [UpdateOptions](interfaces/UpdateOptions.md)
 - [UpdateResult](interfaces/UpdateResult.md)
--- a/docs/src/js/interfaces/ClientConfig.md
+++ b/docs/src/js/interfaces/ClientConfig.md
@@ -40,6 +40,14 @@ optional timeoutConfig: TimeoutConfig;

 ***

+### tlsConfig?
+
+```ts
+optional tlsConfig: TlsConfig;
+```
+
+***
+
 ### userAgent?

 ```ts
--- a/docs/src/js/interfaces/TlsConfig.md
+++ b/docs/src/js/interfaces/TlsConfig.md
@@ -0,0 +1,49 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / TlsConfig
+
+# Interface: TlsConfig
+
+TLS/mTLS configuration for the remote HTTP client.
+
+## Properties
+
+### assertHostname?
+
+```ts
+optional assertHostname: boolean;
+```
+
+Whether to verify the hostname in the server's certificate.
+
+***
+
+### certFile?
+
+```ts
+optional certFile: string;
+```
+
+Path to the client certificate file (PEM format) for mTLS authentication.
+
+***
+
+### keyFile?
+
+```ts
+optional keyFile: string;
+```
+
+Path to the client private key file (PEM format) for mTLS authentication.
+
+***
+
+### sslCaCert?
+
+```ts
+optional sslCaCert: string;
+```
+
+Path to the CA certificate file (PEM format) for server verification.
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.1-beta.2</version>
+        <version>0.22.1-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.22.1-beta.2</version>
+        <version>0.22.1-final.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.22.1-beta.2</version>
+    <version>0.22.1-final.0</version>
    <packaging>pom</packaging>
    <name>${project.artifactId}</name>
    <description>LanceDB Java SDK Parent POM</description>
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.22.1-beta.2"
+version = "0.22.1"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -1,17 +1,5 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-import {
-  Bool,
-  Field,
-  Int32,
-  List,
-  Schema,
-  Struct,
-  Uint8,
-  Utf8,
-} from "apache-arrow";
-
 import * as arrow15 from "apache-arrow-15";
 import * as arrow16 from "apache-arrow-16";
 import * as arrow17 from "apache-arrow-17";
@@ -25,11 +13,9 @@ import {
  fromTableToBuffer,
  makeArrowTable,
  makeEmptyTable,
-  tableFromIPC,
 } from "../lancedb/arrow";
 import {
  EmbeddingFunction,
-  FieldOptions,
  FunctionOptions,
 } from "../lancedb/embedding/embedding_function";
 import { EmbeddingFunctionConfig } from "../lancedb/embedding/registry";
@@ -1008,5 +994,64 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(result).toEqual(null);
      });
    });
+
+    describe("boolean null handling", function () {
+      it("should handle null values in nullable boolean fields", () => {
+        const { makeArrowTable } = require("../lancedb/arrow");
+        const schema = new Schema([new Field("test", new arrow.Bool(), true)]);
+
+        // Test with all null values
+        const data = [{ test: null }];
+        const table = makeArrowTable(data, { schema });
+
+        expect(table.numRows).toBe(1);
+        expect(table.schema.names).toEqual(["test"]);
+        expect(table.getChild("test")!.get(0)).toBeNull();
+      });
+
+      it("should handle mixed null and non-null boolean values", () => {
+        const { makeArrowTable } = require("../lancedb/arrow");
+        const schema = new Schema([new Field("test", new Bool(), true)]);
+
+        // Test with mixed values
+        const data = [{ test: true }, { test: null }, { test: false }];
+        const table = makeArrowTable(data, { schema });
+
+        expect(table.numRows).toBe(3);
+        expect(table.getChild("test")!.get(0)).toBe(true);
+        expect(table.getChild("test")!.get(1)).toBeNull();
+        expect(table.getChild("test")!.get(2)).toBe(false);
+      });
+    });
+
+    // Test for the undefined values bug fix
+    describe("undefined values handling", () => {
+      it("should handle mixed undefined and actual values", () => {
+        const schema = new Schema([
+          new Field("text", new Utf8(), true), // nullable
+          new Field("number", new Int32(), true), // nullable
+          new Field("bool", new Bool(), true), // nullable
+        ]);
+
+        const data = [
+          { text: undefined, number: 42, bool: true },
+          { text: "hello", number: undefined, bool: false },
+          { text: "world", number: 123, bool: undefined },
+        ];
+        const table = makeArrowTable(data, { schema });
+
+        const result = table.toArray();
+        expect(result).toHaveLength(3);
+        expect(result[0].text).toBe(null);
+        expect(result[0].number).toBe(42);
+        expect(result[0].bool).toBe(true);
+        expect(result[1].text).toBe("hello");
+        expect(result[1].number).toBe(null);
+        expect(result[1].bool).toBe(false);
+        expect(result[2].text).toBe("world");
+        expect(result[2].number).toBe(123);
+        expect(result[2].bool).toBe(null);
+      });
+    });
  },
 );
--- a/nodejs/test/remote.test.ts
+++ b/nodejs/test/remote.test.ts
@@ -7,7 +7,6 @@ import {
  ClientConfig,
  Connection,
  ConnectionOptions,
-  NativeJsHeaderProvider,
  TlsConfig,
  connect,
 } from "../lancedb";
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -39,7 +39,6 @@ import {
  Operator,
  instanceOfFullTextQuery,
 } from "../lancedb/query";
-import exp = require("constants");

 describe.each([arrow15, arrow16, arrow17, arrow18])(
  "Given a table",
@@ -488,6 +487,32 @@ describe("merge insert", () => {
        .execute(newData, { timeoutMs: 0 }),
    ).rejects.toThrow("merge insert timed out");
  });
+
+  test("useIndex", async () => {
+    const newData = [
+      { a: 2, b: "x" },
+      { a: 4, b: "z" },
+    ];
+
+    // Test with useIndex(true) - should work fine
+    const result1 = await table
+      .mergeInsert("a")
+      .whenNotMatchedInsertAll()
+      .useIndex(true)
+      .execute(newData);
+
+    expect(result1.numInsertedRows).toBe(1); // Only a=4 should be inserted
+
+    // Test with useIndex(false) - should also work fine
+    const newData2 = [{ a: 5, b: "w" }];
+    const result2 = await table
+      .mergeInsert("a")
+      .whenNotMatchedInsertAll()
+      .useIndex(false)
+      .execute(newData2);
+
+    expect(result2.numInsertedRows).toBe(1); // a=5 should be inserted
+  });
 });

 describe("When creating an index", () => {
--- a/nodejs/biome.json
+++ b/nodejs/biome.json
@@ -48,6 +48,7 @@
        "noUnreachableSuper": "error",
        "noUnsafeFinally": "error",
        "noUnsafeOptionalChaining": "error",
+        "noUnusedImports": "error",
        "noUnusedLabels": "error",
        "noUnusedVariables": "warn",
        "useIsNan": "error",
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -41,7 +41,6 @@ import {
  vectorFromArray as badVectorFromArray,
  makeBuilder,
  makeData,
-  makeTable,
 } from "apache-arrow";
 import { Buffers } from "apache-arrow/data";
 import { type EmbeddingFunction } from "./embedding/embedding_function";
@@ -279,7 +278,7 @@ export class MakeArrowTableOptions {
 }

 /**
- * An enhanced version of the {@link makeTable} function from Apache Arrow
+ * An enhanced version of the apache-arrow makeTable function from Apache Arrow
 * that supports nested fields and embeddings columns.
 *
 * (typically you do not need to call this function.  It will be called automatically
@@ -705,7 +704,7 @@ function transposeData(
      }
      return current;
    });
-    return makeVector(values, field.type);
+    return makeVector(values, field.type, undefined, field.nullable);
  }
 }

@@ -752,9 +751,30 @@ function makeVector(
  values: unknown[],
  type?: DataType,
  stringAsDictionary?: boolean,
+  nullable?: boolean,
  // biome-ignore lint/suspicious/noExplicitAny: skip
 ): Vector<any> {
  if (type !== undefined) {
+    // Convert undefined values to null for nullable fields
+    if (nullable) {
+      values = values.map((v) => (v === undefined ? null : v));
+    }
+
+    // workaround for: https://github.com/apache/arrow-js/issues/68
+    if (DataType.isBool(type)) {
+      const hasNonNullValue = values.some((v) => v !== null && v !== undefined);
+      if (!hasNonNullValue) {
+        const nullBitmap = new Uint8Array(Math.ceil(values.length / 8));
+        const data = makeData({
+          type: type,
+          length: values.length,
+          nullCount: values.length,
+          nullBitmap,
+        });
+        return arrowMakeVector(data);
+      }
+    }
+
    // No need for inference, let Arrow create it
    if (type instanceof Int) {
      if (DataType.isInt(type) && type.bitWidth === 64) {
@@ -879,7 +899,12 @@ async function applyEmbeddingsFromMetadata(
  for (const field of schema.fields) {
    if (!(field.name in columns)) {
      const nullValues = new Array(table.numRows).fill(null);
-      columns[field.name] = makeVector(nullValues, field.type);
+      columns[field.name] = makeVector(
+        nullValues,
+        field.type,
+        undefined,
+        field.nullable,
+      );
    }
  }

@@ -943,7 +968,12 @@ async function applyEmbeddings<T>(
    } else if (schema != null) {
      const destField = schema.fields.find((f) => f.name === destColumn);
      if (destField != null) {
-        newColumns[destColumn] = makeVector([], destField.type);
+        newColumns[destColumn] = makeVector(
+          [],
+          destField.type,
+          undefined,
+          destField.nullable,
+        );
      } else {
        throw new Error(
          `Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`,
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -3,7 +3,6 @@

 import {
  Data,
-  Schema,
  SchemaLike,
  TableLike,
  fromTableToStreamBuffer,
--- a/nodejs/lancedb/merge.ts
+++ b/nodejs/lancedb/merge.ts
@@ -70,6 +70,23 @@ export class MergeInsertBuilder {
      this.#schema,
    );
  }
+
+  /**
+   * Controls whether to use indexes for the merge operation.
+   *
+   * When set to `true` (the default), the operation will use an index if available
+   * on the join key for improved performance. When set to `false`, it forces a full
+   * table scan even if an index exists. This can be useful for benchmarking or when
+   * the query optimizer chooses a suboptimal path.
+   *
+   * @param useIndex - Whether to use indices for the merge operation. Defaults to `true`.
+   */
+  useIndex(useIndex: boolean): MergeInsertBuilder {
+    return new MergeInsertBuilder(
+      this.#native.useIndex(useIndex),
+      this.#schema,
+    );
+  }
  /**
   * Executes the merge insert operation
   *
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.1",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.1",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.1",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.1",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.22.1-beta.2",
+  "version": "0.22.1",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.22.1-beta.2",
+	"version": "0.22.1",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.22.1-beta.2",
+  "version": "0.22.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.22.1-beta.2",
+      "version": "0.22.1",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.22.1-beta.2",
+  "version": "0.22.1",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/merge.rs
+++ b/nodejs/src/merge.rs
@@ -43,6 +43,13 @@ impl NativeMergeInsertBuilder {
        self.inner.timeout(Duration::from_millis(timeout as u64));
    }

+    #[napi]
+    pub fn use_index(&self, use_index: bool) -> Self {
+        let mut this = self.clone();
+        this.inner.use_index(use_index);
+        this
+    }
+
    #[napi(catch_unwind)]
    pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeResult> {
        let data = ipc_file_to_batches(buf.to_vec())
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.25.1-beta.3"
+current_version = "0.25.2-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.25.1-beta.3"
+version = "0.25.2-beta.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/python/lancedb/merge.py
+++ b/python/python/lancedb/merge.py
@@ -33,6 +33,7 @@ class LanceMergeInsertBuilder(object):
        self._when_not_matched_by_source_delete = False
        self._when_not_matched_by_source_condition = None
        self._timeout = None
+        self._use_index = True

    def when_matched_update_all(
        self, *, where: Optional[str] = None
@@ -78,6 +79,23 @@ class LanceMergeInsertBuilder(object):
            self._when_not_matched_by_source_condition = condition
        return self

+    def use_index(self, use_index: bool) -> LanceMergeInsertBuilder:
+        """
+        Controls whether to use indexes for the merge operation.
+
+        When set to `True` (the default), the operation will use an index if available
+        on the join key for improved performance. When set to `False`, it forces a full
+        table scan even if an index exists. This can be useful for benchmarking or when
+        the query optimizer chooses a suboptimal path.
+
+        Parameters
+        ----------
+        use_index: bool
+            Whether to use indices for the merge operation. Defaults to `True`.
+        """
+        self._use_index = use_index
+        return self
+
    def execute(
        self,
        new_data: DATA,
--- a/python/python/lancedb/rerankers/init.py
+++ b/python/python/lancedb/rerankers/init.py
@@ -9,6 +9,7 @@ from .linear_combination import LinearCombinationReranker
 from .openai import OpenaiReranker
 from .jinaai import JinaReranker
 from .rrf import RRFReranker
+from .mrr import MRRReranker
 from .answerdotai import AnswerdotaiRerankers
 from .voyageai import VoyageAIReranker

@@ -23,4 +24,5 @@ __all__ = [
    "RRFReranker",
    "AnswerdotaiRerankers",
    "VoyageAIReranker",
+    "MRRReranker",
 ]
--- a/python/python/lancedb/rerankers/mrr.py
+++ b/python/python/lancedb/rerankers/mrr.py
@@ -0,0 +1,169 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+
+from typing import Union, List, TYPE_CHECKING
+import pyarrow as pa
+import numpy as np
+
+from collections import defaultdict
+from .base import Reranker
+
+if TYPE_CHECKING:
+    from ..table import LanceVectorQueryBuilder
+
+
+class MRRReranker(Reranker):
+    """
+    Reranks the results using Mean Reciprocal Rank (MRR) algorithm based
+    on the scores of vector and FTS search.
+    Algorithm reference - https://en.wikipedia.org/wiki/Mean_reciprocal_rank
+
+    MRR calculates the average of reciprocal ranks across different search results.
+    For each document, it computes the reciprocal of its rank in each system,
+    then takes the mean of these reciprocal ranks as the final score.
+
+    Parameters
+    ----------
+    weight_vector : float, default 0.5
+        Weight for vector search results (0.0 to 1.0)
+    weight_fts : float, default 0.5
+        Weight for FTS search results (0.0 to 1.0)
+        Note: weight_vector + weight_fts should equal 1.0
+    return_score : str, default "relevance"
+        Options are "relevance" or "all"
+        The type of score to return. If "relevance", will return only the relevance
+        score. If "all", will return all scores from the vector and FTS search along
+        with the relevance score.
+    """
+
+    def __init__(
+        self,
+        weight_vector: float = 0.5,
+        weight_fts: float = 0.5,
+        return_score="relevance",
+    ):
+        if not (0.0 <= weight_vector <= 1.0):
+            raise ValueError("weight_vector must be between 0.0 and 1.0")
+        if not (0.0 <= weight_fts <= 1.0):
+            raise ValueError("weight_fts must be between 0.0 and 1.0")
+        if abs(weight_vector + weight_fts - 1.0) > 1e-6:
+            raise ValueError("weight_vector + weight_fts must equal 1.0")
+
+        super().__init__(return_score)
+        self.weight_vector = weight_vector
+        self.weight_fts = weight_fts
+
+    def rerank_hybrid(
+        self,
+        query: str,  # noqa: F821
+        vector_results: pa.Table,
+        fts_results: pa.Table,
+    ):
+        vector_ids = vector_results["_rowid"].to_pylist() if vector_results else []
+        fts_ids = fts_results["_rowid"].to_pylist() if fts_results else []
+
+        # Maps result_id to list of (type, reciprocal_rank)
+        mrr_score_map = defaultdict(list)
+
+        if vector_ids:
+            for rank, result_id in enumerate(vector_ids, 1):
+                reciprocal_rank = 1.0 / rank
+                mrr_score_map[result_id].append(("vector", reciprocal_rank))
+
+        if fts_ids:
+            for rank, result_id in enumerate(fts_ids, 1):
+                reciprocal_rank = 1.0 / rank
+                mrr_score_map[result_id].append(("fts", reciprocal_rank))
+
+        final_mrr_scores = {}
+        for result_id, scores in mrr_score_map.items():
+            vector_rr = 0.0
+            fts_rr = 0.0
+
+            for score_type, reciprocal_rank in scores:
+                if score_type == "vector":
+                    vector_rr = reciprocal_rank
+                elif score_type == "fts":
+                    fts_rr = reciprocal_rank
+
+            # If a document doesn't appear, its reciprocal rank is 0
+            weighted_mrr = self.weight_vector * vector_rr + self.weight_fts * fts_rr
+            final_mrr_scores[result_id] = weighted_mrr
+
+        combined_results = self.merge_results(vector_results, fts_results)
+        combined_row_ids = combined_results["_rowid"].to_pylist()
+        relevance_scores = [final_mrr_scores[row_id] for row_id in combined_row_ids]
+        combined_results = combined_results.append_column(
+            "_relevance_score", pa.array(relevance_scores, type=pa.float32())
+        )
+        combined_results = combined_results.sort_by(
+            [("_relevance_score", "descending")]
+        )
+
+        if self.score == "relevance":
+            combined_results = self._keep_relevance_score(combined_results)
+
+        return combined_results
+
+    def rerank_multivector(
+        self,
+        vector_results: Union[List[pa.Table], List["LanceVectorQueryBuilder"]],
+        query: str = None,
+        deduplicate: bool = True,  # noqa: F821
+    ):
+        """
+        Reranks the results from multiple vector searches using MRR algorithm.
+        Each vector search result is treated as a separate ranking system,
+        and MRR calculates the mean of reciprocal ranks across all systems.
+        This cannot reuse rerank_hybrid because MRR semantics require treating
+        each vector result as a separate ranking system.
+        """
+        if not all(isinstance(v, type(vector_results[0])) for v in vector_results):
+            raise ValueError(
+                "All elements in vector_results should be of the same type"
+            )
+
+        # avoid circular import
+        if type(vector_results[0]).__name__ == "LanceVectorQueryBuilder":
+            vector_results = [result.to_arrow() for result in vector_results]
+        elif not isinstance(vector_results[0], pa.Table):
+            raise ValueError(
+                "vector_results should be a list of pa.Table or LanceVectorQueryBuilder"
+            )
+
+        if not all("_rowid" in result.column_names for result in vector_results):
+            raise ValueError(
+                "'_rowid' is required for deduplication. \
+                    add _rowid to search results like this: \
+                    `search().with_row_id(True)`"
+            )
+
+        mrr_score_map = defaultdict(list)
+
+        for result_table in vector_results:
+            result_ids = result_table["_rowid"].to_pylist()
+            for rank, result_id in enumerate(result_ids, 1):
+                reciprocal_rank = 1.0 / rank
+                mrr_score_map[result_id].append(reciprocal_rank)
+
+        final_mrr_scores = {}
+        for result_id, reciprocal_ranks in mrr_score_map.items():
+            mean_rr = np.mean(reciprocal_ranks)
+            final_mrr_scores[result_id] = mean_rr
+
+        combined = pa.concat_tables(vector_results, **self._concat_tables_args)
+        combined = self._deduplicate(combined)
+
+        combined_row_ids = combined["_rowid"].to_pylist()
+
+        relevance_scores = [final_mrr_scores[row_id] for row_id in combined_row_ids]
+        combined = combined.append_column(
+            "_relevance_score", pa.array(relevance_scores, type=pa.float32())
+        )
+        combined = combined.sort_by([("_relevance_score", "descending")])
+
+        if self.score == "relevance":
+            combined = self._keep_relevance_score(combined)
+
+        return combined
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -1470,10 +1470,7 @@ class Table(ABC):
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
        retrain: bool, default False
-            If True, retrain the vector indices, this would refine the IVF clustering
-            and quantization, which may improve the search accuracy. It's faster than
-            re-creating the index from scratch, so it's recommended to try this first,
-            when the data distribution has changed significantly.
+            This parameter is no longer used and is deprecated.

        Experimental API
        ----------------
@@ -2835,10 +2832,7 @@ class LanceTable(Table):
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
        retrain: bool, default False
-            If True, retrain the vector indices, this would refine the IVF clustering
-            and quantization, which may improve the search accuracy. It's faster than
-            re-creating the index from scratch, so it's recommended to try this first,
-            when the data distribution has changed significantly.
+            This parameter is no longer used and is deprecated.

        Experimental API
        ----------------
@@ -3926,6 +3920,7 @@ class AsyncTable:
                when_not_matched_by_source_delete=merge._when_not_matched_by_source_delete,
                when_not_matched_by_source_condition=merge._when_not_matched_by_source_condition,
                timeout=merge._timeout,
+                use_index=merge._use_index,
            ),
        )

@@ -4298,10 +4293,7 @@ class AsyncTable:
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
        retrain: bool, default False
-            If True, retrain the vector indices, this would refine the IVF clustering
-            and quantization, which may improve the search accuracy. It's faster than
-            re-creating the index from scratch, so it's recommended to try this first,
-            when the data distribution has changed significantly.
+            This parameter is no longer used and is deprecated.

        Experimental API
        ----------------
@@ -4324,10 +4316,19 @@ class AsyncTable:
        cleanup_since_ms: Optional[int] = None
        if cleanup_older_than is not None:
            cleanup_since_ms = round(cleanup_older_than.total_seconds() * 1000)
+
+        if retrain:
+            import warnings
+
+            warnings.warn(
+                "The 'retrain' parameter is deprecated and will be removed in a "
+                "future version.",
+                DeprecationWarning,
+            )
+
        return await self._inner.optimize(
            cleanup_since_ms=cleanup_since_ms,
            delete_unverified=delete_unverified,
-            retrain=retrain,
        )

    async def list_indices(self) -> Iterable[IndexConfig]:
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -22,6 +22,7 @@ from lancedb.rerankers import (
    JinaReranker,
    AnswerdotaiRerankers,
    VoyageAIReranker,
+    MRRReranker,
 )
 from lancedb.table import LanceTable

@@ -46,6 +47,7 @@ def get_test_table(tmp_path, use_tantivy):
        db,
        "my_table",
        schema=MyTable,
+        mode="overwrite",
    )

    # Need to test with a bunch of phrases to make sure sorting is consistent
@@ -96,7 +98,7 @@ def get_test_table(tmp_path, use_tantivy):
    )

    # Create a fts index
-    table.create_fts_index("text", use_tantivy=use_tantivy)
+    table.create_fts_index("text", use_tantivy=use_tantivy, replace=True)

    return table, MyTable

@@ -320,6 +322,34 @@ def test_rrf_reranker(tmp_path, use_tantivy):
    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)


+@pytest.mark.parametrize("use_tantivy", [True, False])
+def test_mrr_reranker(tmp_path, use_tantivy):
+    reranker = MRRReranker()
+    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)
+
+    # Test multi-vector part
+    table, schema = get_test_table(tmp_path, use_tantivy)
+    query = "single player experience"
+    rs1 = table.search(query, vector_column_name="vector").limit(10).with_row_id(True)
+    rs2 = (
+        table.search(query, vector_column_name="meta_vector")
+        .limit(10)
+        .with_row_id(True)
+    )
+    result = reranker.rerank_multivector([rs1, rs2])
+    assert "_relevance_score" in result.column_names
+    assert len(result) <= 20
+
+    if len(result) > 1:
+        assert np.all(np.diff(result.column("_relevance_score").to_numpy()) <= 0), (
+            "The _relevance_score should be descending."
+        )
+
+    # Test with duplicate results
+    result_deduped = reranker.rerank_multivector([rs1, rs2, rs1])
+    assert len(result_deduped) == len(result)
+
+
 def test_rrf_reranker_distance():
    data = pa.table(
        {
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -591,12 +591,11 @@ impl Table {
    }

    /// Optimize the on-disk data by compacting and pruning old data, for better performance.
-    #[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None, retrain=None))]
+    #[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None))]
    pub fn optimize(
        self_: PyRef<'_, Self>,
        cleanup_since_ms: Option<u64>,
        delete_unverified: Option<bool>,
-        retrain: Option<bool>,
    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        let older_than = if let Some(ms) = cleanup_since_ms {
@@ -632,10 +631,9 @@ impl Table {
                .prune
                .unwrap();
            inner
-                .optimize(lancedb::table::OptimizeAction::Index(match retrain {
-                    Some(true) => OptimizeOptions::retrain(),
-                    _ => OptimizeOptions::default(),
-                }))
+                .optimize(lancedb::table::OptimizeAction::Index(
+                    OptimizeOptions::default(),
+                ))
                .await
                .infer_error()?;
            Ok(OptimizeStats {
@@ -674,6 +672,9 @@ impl Table {
        if let Some(timeout) = parameters.timeout {
            builder.timeout(timeout);
        }
+        if let Some(use_index) = parameters.use_index {
+            builder.use_index(use_index);
+        }

        future_into_py(self_.py(), async move {
            let res = builder.execute(Box::new(batches)).await.infer_error()?;
@@ -833,6 +834,7 @@ pub struct MergeInsertParams {
    when_not_matched_by_source_delete: bool,
    when_not_matched_by_source_condition: Option<String>,
    timeout: Option<std::time::Duration>,
+    use_index: Option<bool>,
 }

 #[pyclass]
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.22.1-beta.2"
+version = "0.22.1"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
@@ -36,6 +36,7 @@ lance-table = { workspace = true }
 lance-linalg = { workspace = true }
 lance-testing = { workspace = true }
 lance-encoding = { workspace = true }
+lance-namespace = { workspace = true }
 moka = { workspace = true }
 pin-project = { workspace = true }
 tokio = { version = "1.23", features = ["rt-multi-thread"] }
--- a/rust/lancedb/src/connection.rs
+++ b/rust/lancedb/src/connection.rs
@@ -1015,6 +1015,117 @@ pub fn connect(uri: &str) -> ConnectBuilder {
    ConnectBuilder::new(uri)
 }

+pub struct ConnectNamespaceBuilder {
+    ns_impl: String,
+    properties: HashMap<String, String>,
+    storage_options: HashMap<String, String>,
+    read_consistency_interval: Option<std::time::Duration>,
+    embedding_registry: Option<Arc<dyn EmbeddingRegistry>>,
+    session: Option<Arc<lance::session::Session>>,
+}
+
+impl ConnectNamespaceBuilder {
+    fn new(ns_impl: &str, properties: HashMap<String, String>) -> Self {
+        Self {
+            ns_impl: ns_impl.to_string(),
+            properties,
+            storage_options: HashMap::new(),
+            read_consistency_interval: None,
+            embedding_registry: None,
+            session: None,
+        }
+    }
+
+    /// Set an option for the storage layer.
+    ///
+    /// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
+    pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
+        self.storage_options.insert(key.into(), value.into());
+        self
+    }
+
+    /// Set multiple options for the storage layer.
+    ///
+    /// See available options at <https://lancedb.github.io/lancedb/guides/storage/>
+    pub fn storage_options(
+        mut self,
+        pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
+    ) -> Self {
+        for (key, value) in pairs {
+            self.storage_options.insert(key.into(), value.into());
+        }
+        self
+    }
+
+    /// The interval at which to check for updates from other processes.
+    ///
+    /// If left unset, consistency is not checked. For maximum read
+    /// performance, this is the default. For strong consistency, set this to
+    /// zero seconds. Then every read will check for updates from other processes.
+    /// As a compromise, set this to a non-zero duration for eventual consistency.
+    pub fn read_consistency_interval(
+        mut self,
+        read_consistency_interval: std::time::Duration,
+    ) -> Self {
+        self.read_consistency_interval = Some(read_consistency_interval);
+        self
+    }
+
+    /// Provide a custom [`EmbeddingRegistry`] to use for this connection.
+    pub fn embedding_registry(mut self, registry: Arc<dyn EmbeddingRegistry>) -> Self {
+        self.embedding_registry = Some(registry);
+        self
+    }
+
+    /// Set a custom session for object stores and caching.
+    ///
+    /// By default, a new session with default configuration will be created.
+    /// This method allows you to provide a custom session with your own
+    /// configuration for object store registries, caching, etc.
+    pub fn session(mut self, session: Arc<lance::session::Session>) -> Self {
+        self.session = Some(session);
+        self
+    }
+
+    /// Execute the connection
+    pub async fn execute(self) -> Result<Connection> {
+        use crate::database::namespace::LanceNamespaceDatabase;
+
+        let internal = Arc::new(
+            LanceNamespaceDatabase::connect(
+                &self.ns_impl,
+                self.properties,
+                self.storage_options,
+                self.read_consistency_interval,
+                self.session,
+            )
+            .await?,
+        );
+
+        Ok(Connection {
+            internal,
+            uri: format!("namespace://{}", self.ns_impl),
+            embedding_registry: self
+                .embedding_registry
+                .unwrap_or_else(|| Arc::new(MemoryRegistry::new())),
+        })
+    }
+}
+
+/// Connect to a LanceDB database through a namespace.
+///
+/// # Arguments
+///
+/// * `ns_impl` - The namespace implementation to use (e.g., "dir" for directory-based, "rest" for REST API)
+/// * `properties` - Configuration properties for the namespace implementation
+/// ```
+pub fn connect_namespace(
+    ns_impl: &str,
+    properties: HashMap<String, String>,
+) -> ConnectNamespaceBuilder {
+    ConnectNamespaceBuilder::new(ns_impl, properties)
+}
+
 #[cfg(all(test, feature = "remote"))]
 mod test_utils {
    use super::*;
--- a/rust/lancedb/src/database.rs
+++ b/rust/lancedb/src/database.rs
@@ -29,6 +29,7 @@ use crate::error::Result;
 use crate::table::{BaseTable, TableDefinition, WriteOptions};

 pub mod listing;
+pub mod namespace;

 pub trait DatabaseOptions {
    fn serialize_into_map(&self, map: &mut HashMap<String, String>);
--- a/rust/lancedb/src/database/namespace.rs
+++ b/rust/lancedb/src/database/namespace.rs
@@ -0,0 +1,840 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+//! Namespace-based database implementation that delegates table management to lance-namespace
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use lance_namespace::{
+    connect as connect_namespace,
+    models::{
+        CreateEmptyTableRequest, CreateNamespaceRequest, DescribeTableRequest,
+        DropNamespaceRequest, DropTableRequest, ListNamespacesRequest, ListTablesRequest,
+    },
+    LanceNamespace,
+};
+
+use crate::connection::ConnectRequest;
+use crate::database::listing::ListingDatabase;
+use crate::error::{Error, Result};
+
+use super::{
+    BaseTable, CloneTableRequest, CreateNamespaceRequest as DbCreateNamespaceRequest,
+    CreateTableMode, CreateTableRequest as DbCreateTableRequest, Database,
+    DropNamespaceRequest as DbDropNamespaceRequest,
+    ListNamespacesRequest as DbListNamespacesRequest, OpenTableRequest, TableNamesRequest,
+};
+
+/// A database implementation that uses lance-namespace for table management
+pub struct LanceNamespaceDatabase {
+    namespace: Arc<dyn LanceNamespace>,
+    // Storage options to be inherited by tables
+    storage_options: HashMap<String, String>,
+    // Read consistency interval for tables
+    read_consistency_interval: Option<std::time::Duration>,
+    // Optional session for object stores and caching
+    session: Option<Arc<lance::session::Session>>,
+}
+
+impl LanceNamespaceDatabase {
+    pub async fn connect(
+        ns_impl: &str,
+        ns_properties: HashMap<String, String>,
+        storage_options: HashMap<String, String>,
+        read_consistency_interval: Option<std::time::Duration>,
+        session: Option<Arc<lance::session::Session>>,
+    ) -> Result<Self> {
+        let namespace = connect_namespace(ns_impl, ns_properties.clone())
+            .await
+            .map_err(|e| Error::InvalidInput {
+                message: format!("Failed to connect to namespace: {:?}", e),
+            })?;
+
+        Ok(Self {
+            namespace,
+            storage_options,
+            read_consistency_interval,
+            session,
+        })
+    }
+
+    /// Helper method to create a ListingDatabase from a table location
+    ///
+    /// This method:
+    /// 1. Validates that the location ends with <table_name>.lance
+    /// 2. Extracts the parent directory from the location
+    /// 3. Creates a ListingDatabase at that parent directory
+    async fn create_listing_database(
+        &self,
+        table_name: &str,
+        location: &str,
+        additional_storage_options: Option<HashMap<String, String>>,
+    ) -> Result<Arc<ListingDatabase>> {
+        let expected_suffix = format!("{}.lance", table_name);
+        if !location.ends_with(&expected_suffix) {
+            return Err(Error::Runtime {
+                message: format!(
+                    "Invalid table location '{}': expected to end with '{}'",
+                    location, expected_suffix
+                ),
+            });
+        }
+
+        let parent_dir = location
+            .rsplit_once('/')
+            .map(|(parent, _)| parent.to_string())
+            .ok_or_else(|| Error::Runtime {
+                message: format!("Invalid table location '{}': no parent directory", location),
+            })?;
+
+        let mut merged_storage_options = self.storage_options.clone();
+        if let Some(opts) = additional_storage_options {
+            merged_storage_options.extend(opts);
+        }
+
+        let connect_request = ConnectRequest {
+            uri: parent_dir,
+            options: merged_storage_options,
+            read_consistency_interval: self.read_consistency_interval,
+            session: self.session.clone(),
+            #[cfg(feature = "remote")]
+            client_config: Default::default(),
+        };
+
+        let listing_db = ListingDatabase::connect_with_options(&connect_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to create listing database: {}", e),
+            })?;
+
+        Ok(Arc::new(listing_db))
+    }
+}
+
+impl std::fmt::Debug for LanceNamespaceDatabase {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("LanceNamespaceDatabase")
+            .field("storage_options", &self.storage_options)
+            .field("read_consistency_interval", &self.read_consistency_interval)
+            .finish()
+    }
+}
+
+impl std::fmt::Display for LanceNamespaceDatabase {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "LanceNamespaceDatabase")
+    }
+}
+
+#[async_trait]
+impl Database for LanceNamespaceDatabase {
+    async fn list_namespaces(&self, request: DbListNamespacesRequest) -> Result<Vec<String>> {
+        let ns_request = ListNamespacesRequest {
+            id: if request.namespace.is_empty() {
+                None
+            } else {
+                Some(request.namespace)
+            },
+            page_token: request.page_token,
+            limit: request.limit.map(|l| l as i32),
+        };
+
+        let response = self
+            .namespace
+            .list_namespaces(ns_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to list namespaces: {}", e),
+            })?;
+
+        Ok(response.namespaces)
+    }
+
+    async fn create_namespace(&self, request: DbCreateNamespaceRequest) -> Result<()> {
+        let ns_request = CreateNamespaceRequest {
+            id: if request.namespace.is_empty() {
+                None
+            } else {
+                Some(request.namespace)
+            },
+            mode: None,
+            properties: None,
+        };
+
+        self.namespace
+            .create_namespace(ns_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to create namespace: {}", e),
+            })?;
+
+        Ok(())
+    }
+
+    async fn drop_namespace(&self, request: DbDropNamespaceRequest) -> Result<()> {
+        let ns_request = DropNamespaceRequest {
+            id: if request.namespace.is_empty() {
+                None
+            } else {
+                Some(request.namespace)
+            },
+            mode: None,
+            behavior: None,
+        };
+
+        self.namespace
+            .drop_namespace(ns_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to drop namespace: {}", e),
+            })?;
+
+        Ok(())
+    }
+
+    async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
+        let ns_request = ListTablesRequest {
+            id: if request.namespace.is_empty() {
+                None
+            } else {
+                Some(request.namespace)
+            },
+            page_token: request.start_after,
+            limit: request.limit.map(|l| l as i32),
+        };
+
+        let response =
+            self.namespace
+                .list_tables(ns_request)
+                .await
+                .map_err(|e| Error::Runtime {
+                    message: format!("Failed to list tables: {}", e),
+                })?;
+
+        Ok(response.tables)
+    }
+
+    async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
+        let mut table_id = request.namespace.clone();
+        table_id.push(request.name.clone());
+        let describe_request = DescribeTableRequest {
+            id: Some(table_id.clone()),
+            version: None,
+        };
+
+        let describe_result = self.namespace.describe_table(describe_request).await;
+
+        match request.mode {
+            CreateTableMode::Create => {
+                if describe_result.is_ok() {
+                    return Err(Error::TableAlreadyExists {
+                        name: request.name.clone(),
+                    });
+                }
+            }
+            CreateTableMode::Overwrite => {
+                if describe_result.is_ok() {
+                    // Drop the existing table - must succeed
+                    let drop_request = DropTableRequest {
+                        id: Some(table_id.clone()),
+                    };
+                    self.namespace
+                        .drop_table(drop_request)
+                        .await
+                        .map_err(|e| Error::Runtime {
+                            message: format!("Failed to drop existing table for overwrite: {}", e),
+                        })?;
+                }
+            }
+            CreateTableMode::ExistOk(_) => {
+                if let Ok(response) = describe_result {
+                    let location = response.location.ok_or_else(|| Error::Runtime {
+                        message: "Table location is missing from namespace response".to_string(),
+                    })?;
+
+                    let listing_db = self
+                        .create_listing_database(&request.name, &location, response.storage_options)
+                        .await?;
+
+                    return listing_db
+                        .open_table(OpenTableRequest {
+                            name: request.name.clone(),
+                            namespace: request.namespace.clone(),
+                            index_cache_size: None,
+                            lance_read_params: None,
+                        })
+                        .await;
+                }
+            }
+        }
+
+        let mut table_id = request.namespace.clone();
+        table_id.push(request.name.clone());
+
+        let create_empty_request = CreateEmptyTableRequest {
+            id: Some(table_id),
+            location: None,
+            properties: if self.storage_options.is_empty() {
+                None
+            } else {
+                Some(self.storage_options.clone())
+            },
+        };
+
+        let create_empty_response = self
+            .namespace
+            .create_empty_table(create_empty_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to create empty table: {}", e),
+            })?;
+
+        let location = create_empty_response
+            .location
+            .ok_or_else(|| Error::Runtime {
+                message: "Table location is missing from create_empty_table response".to_string(),
+            })?;
+
+        let listing_db = self
+            .create_listing_database(
+                &request.name,
+                &location,
+                create_empty_response.storage_options,
+            )
+            .await?;
+
+        listing_db.create_table(request).await
+    }
+
+    async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
+        let mut table_id = request.namespace.clone();
+        table_id.push(request.name.clone());
+
+        let describe_request = DescribeTableRequest {
+            id: Some(table_id),
+            version: None,
+        };
+        let response = self
+            .namespace
+            .describe_table(describe_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to describe table: {}", e),
+            })?;
+
+        let location = response.location.ok_or_else(|| Error::Runtime {
+            message: "Table location is missing from namespace response".to_string(),
+        })?;
+
+        let listing_db = self
+            .create_listing_database(&request.name, &location, response.storage_options)
+            .await?;
+
+        listing_db.open_table(request).await
+    }
+
+    async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {
+        Err(Error::NotSupported {
+            message: "clone_table is not supported for namespace connections".to_string(),
+        })
+    }
+
+    async fn rename_table(
+        &self,
+        _cur_name: &str,
+        _new_name: &str,
+        _cur_namespace: &[String],
+        _new_namespace: &[String],
+    ) -> Result<()> {
+        Err(Error::NotSupported {
+            message: "rename_table is not supported for namespace connections".to_string(),
+        })
+    }
+
+    async fn drop_table(&self, name: &str, namespace: &[String]) -> Result<()> {
+        let mut table_id = namespace.to_vec();
+        table_id.push(name.to_string());
+
+        let drop_request = DropTableRequest { id: Some(table_id) };
+        self.namespace
+            .drop_table(drop_request)
+            .await
+            .map_err(|e| Error::Runtime {
+                message: format!("Failed to drop table: {}", e),
+            })?;
+
+        Ok(())
+    }
+
+    async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> {
+        let tables = self
+            .table_names(TableNamesRequest {
+                namespace: namespace.to_vec(),
+                start_after: None,
+                limit: None,
+            })
+            .await?;
+
+        for table in tables {
+            self.drop_table(&table, namespace).await?;
+        }
+
+        Ok(())
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+}
+
+#[cfg(test)]
+#[cfg(not(windows))] // TODO: support windows for lance-namespace
+mod tests {
+    use super::*;
+    use crate::connect_namespace;
+    use crate::query::ExecutableQuery;
+    use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
+    use arrow_schema::{DataType, Field, Schema};
+    use futures::TryStreamExt;
+    use tempfile::tempdir;
+
+    /// Helper function to create test data
+    fn create_test_data() -> RecordBatchIterator<
+        std::vec::IntoIter<std::result::Result<RecordBatch, arrow_schema::ArrowError>>,
+    > {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, false),
+        ]));
+
+        let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+        let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie", "David", "Eve"]);
+
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(id_array), Arc::new(name_array)],
+        )
+        .unwrap();
+        RecordBatchIterator::new(vec![std::result::Result::Ok(batch)].into_iter(), schema)
+    }
+
+    #[tokio::test]
+    async fn test_namespace_connection_simple() {
+        // Test that namespace connections work with simple connect_namespace(impl_type, properties)
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        // This should succeed with directory-based namespace
+        let result = connect_namespace("dir", properties).execute().await;
+
+        assert!(result.is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_namespace_connection_with_storage_options() {
+        // Test namespace connections with storage options
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        // This should succeed with directory-based namespace and storage options
+        let result = connect_namespace("dir", properties)
+            .storage_option("timeout", "30s")
+            .execute()
+            .await;
+
+        assert!(result.is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_namespace_connection_with_all_options() {
+        use crate::embeddings::MemoryRegistry;
+        use std::time::Duration;
+
+        // Test namespace connections with all configuration options
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let embedding_registry = Arc::new(MemoryRegistry::new());
+        let session = Arc::new(lance::session::Session::default());
+
+        // Test with all options set
+        let result = connect_namespace("dir", properties)
+            .storage_option("timeout", "30s")
+            .storage_options([("cache_size", "1gb"), ("region", "us-east-1")])
+            .read_consistency_interval(Duration::from_secs(5))
+            .embedding_registry(embedding_registry.clone())
+            .session(session.clone())
+            .execute()
+            .await;
+
+        assert!(result.is_ok());
+
+        let conn = result.unwrap();
+
+        // Verify embedding registry is set correctly
+        assert!(std::ptr::eq(
+            conn.embedding_registry() as *const _,
+            embedding_registry.as_ref() as *const _
+        ));
+    }
+
+    #[tokio::test]
+    async fn test_namespace_create_table_basic() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        // Connect to namespace using DirectoryNamespace
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Test: Create a table
+        let test_data = create_test_data();
+        let table = conn
+            .create_table("test_table", test_data)
+            .execute()
+            .await
+            .expect("Failed to create table");
+
+        // Verify: Table was created and can be queried
+        let results = table
+            .query()
+            .execute()
+            .await
+            .expect("Failed to query table")
+            .try_collect::<Vec<_>>()
+            .await
+            .expect("Failed to collect results");
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].num_rows(), 5);
+
+        // Verify: Table appears in table_names
+        let table_names = conn
+            .table_names()
+            .execute()
+            .await
+            .expect("Failed to list tables");
+        assert!(table_names.contains(&"test_table".to_string()));
+    }
+
+    #[tokio::test]
+    async fn test_namespace_describe_table() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        // Connect to namespace
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Create a table first
+        let test_data = create_test_data();
+        let _table = conn
+            .create_table("describe_test", test_data)
+            .execute()
+            .await
+            .expect("Failed to create table");
+
+        // Test: Open the table (which internally uses describe_table)
+        let opened_table = conn
+            .open_table("describe_test")
+            .execute()
+            .await
+            .expect("Failed to open table");
+
+        // Verify: Can query the opened table
+        let results = opened_table
+            .query()
+            .execute()
+            .await
+            .expect("Failed to query table")
+            .try_collect::<Vec<_>>()
+            .await
+            .expect("Failed to collect results");
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].num_rows(), 5);
+
+        // Verify schema matches
+        let schema = opened_table.schema().await.expect("Failed to get schema");
+        assert_eq!(schema.fields.len(), 2);
+        assert_eq!(schema.field(0).name(), "id");
+        assert_eq!(schema.field(1).name(), "name");
+    }
+
+    #[tokio::test]
+    async fn test_namespace_create_table_overwrite_mode() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Create initial table with 5 rows
+        let test_data1 = create_test_data();
+        let _table1 = conn
+            .create_table("overwrite_test", test_data1)
+            .execute()
+            .await
+            .expect("Failed to create table");
+
+        // Create new data with 3 rows
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, false),
+        ]));
+        let id_array = Int32Array::from(vec![10, 20, 30]);
+        let name_array = StringArray::from(vec!["New1", "New2", "New3"]);
+        let test_data2 = RecordBatch::try_new(
+            schema.clone(),
+            vec![Arc::new(id_array), Arc::new(name_array)],
+        )
+        .unwrap();
+
+        // Test: Overwrite the table
+        let table2 = conn
+            .create_table(
+                "overwrite_test",
+                RecordBatchIterator::new(
+                    vec![std::result::Result::Ok(test_data2)].into_iter(),
+                    schema,
+                ),
+            )
+            .mode(CreateTableMode::Overwrite)
+            .execute()
+            .await
+            .expect("Failed to overwrite table");
+
+        // Verify: Table has new data (3 rows instead of 5)
+        let results = table2
+            .query()
+            .execute()
+            .await
+            .expect("Failed to query table")
+            .try_collect::<Vec<_>>()
+            .await
+            .expect("Failed to collect results");
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].num_rows(), 3);
+
+        // Verify the data is actually the new data
+        let id_col = results[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(id_col.value(0), 10);
+        assert_eq!(id_col.value(1), 20);
+        assert_eq!(id_col.value(2), 30);
+    }
+
+    #[tokio::test]
+    async fn test_namespace_create_table_exist_ok_mode() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Create initial table with test data
+        let test_data1 = create_test_data();
+        let _table1 = conn
+            .create_table("exist_ok_test", test_data1)
+            .execute()
+            .await
+            .expect("Failed to create table");
+
+        // Try to create again with exist_ok mode
+        let test_data2 = create_test_data();
+        let table2 = conn
+            .create_table("exist_ok_test", test_data2)
+            .mode(CreateTableMode::exist_ok(|req| req))
+            .execute()
+            .await
+            .expect("Failed with exist_ok mode");
+
+        // Verify: Table still has original data (5 rows)
+        let results = table2
+            .query()
+            .execute()
+            .await
+            .expect("Failed to query table")
+            .try_collect::<Vec<_>>()
+            .await
+            .expect("Failed to collect results");
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].num_rows(), 5);
+    }
+
+    #[tokio::test]
+    async fn test_namespace_create_multiple_tables() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Create first table
+        let test_data1 = create_test_data();
+        let _table1 = conn
+            .create_table("table1", test_data1)
+            .execute()
+            .await
+            .expect("Failed to create first table");
+
+        // Create second table
+        let test_data2 = create_test_data();
+        let _table2 = conn
+            .create_table("table2", test_data2)
+            .execute()
+            .await
+            .expect("Failed to create second table");
+
+        // Verify: Both tables appear in table list
+        let table_names = conn
+            .table_names()
+            .execute()
+            .await
+            .expect("Failed to list tables");
+
+        assert!(table_names.contains(&"table1".to_string()));
+        assert!(table_names.contains(&"table2".to_string()));
+
+        // Verify: Can open both tables
+        let opened_table1 = conn
+            .open_table("table1")
+            .execute()
+            .await
+            .expect("Failed to open table1");
+
+        let opened_table2 = conn
+            .open_table("table2")
+            .execute()
+            .await
+            .expect("Failed to open table2");
+
+        // Verify both tables work
+        let count1 = opened_table1
+            .count_rows(None)
+            .await
+            .expect("Failed to count rows in table1");
+        assert_eq!(count1, 5);
+
+        let count2 = opened_table2
+            .count_rows(None)
+            .await
+            .expect("Failed to count rows in table2");
+        assert_eq!(count2, 5);
+    }
+
+    #[tokio::test]
+    async fn test_namespace_table_not_found() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Test: Try to open a non-existent table
+        let result = conn.open_table("non_existent_table").execute().await;
+
+        // Verify: Should return an error
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_namespace_drop_table() {
+        // Setup: Create a temporary directory for the namespace
+        let tmp_dir = tempdir().unwrap();
+        let root_path = tmp_dir.path().to_str().unwrap().to_string();
+
+        let mut properties = HashMap::new();
+        properties.insert("root".to_string(), root_path);
+
+        let conn = connect_namespace("dir", properties)
+            .execute()
+            .await
+            .expect("Failed to connect to namespace");
+
+        // Create a table first
+        let test_data = create_test_data();
+        let _table = conn
+            .create_table("drop_test", test_data)
+            .execute()
+            .await
+            .expect("Failed to create table");
+
+        // Verify table exists
+        let table_names_before = conn
+            .table_names()
+            .execute()
+            .await
+            .expect("Failed to list tables");
+        assert!(table_names_before.contains(&"drop_test".to_string()));
+
+        // Test: Drop the table
+        conn.drop_table("drop_test", &[])
+            .await
+            .expect("Failed to drop table");
+
+        // Verify: Table no longer exists
+        let table_names_after = conn
+            .table_names()
+            .execute()
+            .await
+            .expect("Failed to list tables");
+        assert!(!table_names_after.contains(&"drop_test".to_string()));
+
+        // Verify: Cannot open dropped table
+        let open_result = conn.open_table("drop_test").execute().await;
+        assert!(open_result.is_err());
+    }
+}
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -8,7 +8,7 @@
 //! values
 use std::cmp::max;

-use lance::table::format::{Index, Manifest};
+use lance::table::format::{IndexMetadata, Manifest};

 use crate::DistanceType;

@@ -19,7 +19,7 @@ pub struct VectorIndex {
 }

 impl VectorIndex {
-    pub fn new_from_format(manifest: &Manifest, index: &Index) -> Self {
+    pub fn new_from_format(manifest: &Manifest, index: &IndexMetadata) -> Self {
        let fields = index
            .fields
            .iter()
--- a/rust/lancedb/src/lib.rs
+++ b/rust/lancedb/src/lib.rs
@@ -212,7 +212,7 @@ use std::fmt::Display;

 use serde::{Deserialize, Serialize};

-pub use connection::Connection;
+pub use connection::{ConnectNamespaceBuilder, Connection};
 pub use error::{Error, Result};
 use lance_linalg::distance::DistanceType as LanceDistanceType;
 pub use table::Table;
@@ -289,6 +289,8 @@ impl Display for DistanceType {

 /// Connect to a database
 pub use connection::connect;
+/// Connect to a namespace-backed database
+pub use connection::connect_namespace;

 /// Re-export Lance Session and ObjectStoreRegistry for custom session creation
 pub use lance::session::Session;
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -1452,6 +1452,14 @@ struct MergeInsertRequest {
    when_not_matched_insert_all: bool,
    when_not_matched_by_source_delete: bool,
    when_not_matched_by_source_delete_filt: Option<String>,
+    // For backwards compatibility, only serialize use_index when it's false
+    // (the default is true)
+    #[serde(skip_serializing_if = "is_true")]
+    use_index: bool,
+}
+
+fn is_true(b: &bool) -> bool {
+    *b
 }

 impl TryFrom<MergeInsertBuilder> for MergeInsertRequest {
@@ -1476,6 +1484,8 @@ impl TryFrom<MergeInsertBuilder> for MergeInsertRequest {
            when_not_matched_insert_all: value.when_not_matched_insert_all,
            when_not_matched_by_source_delete: value.when_not_matched_by_source_delete,
            when_not_matched_by_source_delete_filt: value.when_not_matched_by_source_delete_filt,
+            // Only serialize use_index when it's false for backwards compatibility
+            use_index: value.use_index,
        })
    }
 }
@@ -1942,6 +1952,7 @@ mod tests {
                assert_eq!(params["when_not_matched_by_source_delete"], "false");
                assert!(!params.contains_key("when_matched_update_all_filt"));
                assert!(!params.contains_key("when_not_matched_by_source_delete_filt"));
+                assert!(!params.contains_key("use_index"));

                if old_server {
                    http::Response::builder().status(200).body("{}").unwrap()
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -1976,6 +1976,8 @@ impl NativeTable {
    /// Delete keys from the config
    pub async fn delete_config_keys(&self, delete_keys: &[&str]) -> Result<()> {
        let mut dataset = self.dataset.get_mut().await?;
+        // TODO: update this when we implement metadata APIs
+        #[allow(deprecated)]
        dataset.delete_config_keys(delete_keys).await?;
        Ok(())
    }
@@ -1986,6 +1988,8 @@ impl NativeTable {
        upsert_values: impl IntoIterator<Item = (String, String)>,
    ) -> Result<()> {
        let mut dataset = self.dataset.get_mut().await?;
+        // TODO: update this when we implement metadata APIs
+        #[allow(deprecated)]
        dataset.replace_schema_metadata(upsert_values).await?;
        Ok(())
    }
@@ -2395,6 +2399,7 @@ impl BaseTable for NativeTable {
        } else {
            builder.when_not_matched_by_source(WhenNotMatchedBySource::Keep);
        }
+        builder.use_index(params.use_index);

        let future = if let Some(timeout) = params.timeout {
            // The default retry timeout is 30s, so we pass the full timeout down
@@ -2902,6 +2907,38 @@ mod tests {
        );
    }

+    #[tokio::test]
+    async fn test_merge_insert_use_index() {
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+        let conn = connect(uri).execute().await.unwrap();
+
+        // Create a dataset with i=0..10
+        let batches = merge_insert_test_batches(0, 0);
+        let table = conn
+            .create_table("my_table", batches)
+            .execute()
+            .await
+            .unwrap();
+        assert_eq!(table.count_rows(None).await.unwrap(), 10);
+
+        // Test use_index=true (default behavior)
+        let new_batches = Box::new(merge_insert_test_batches(5, 1));
+        let mut merge_insert_builder = table.merge_insert(&["i"]);
+        merge_insert_builder.when_not_matched_insert_all();
+        merge_insert_builder.use_index(true);
+        merge_insert_builder.execute(new_batches).await.unwrap();
+        assert_eq!(table.count_rows(None).await.unwrap(), 15);
+
+        // Test use_index=false (force table scan)
+        let new_batches = Box::new(merge_insert_test_batches(15, 2));
+        let mut merge_insert_builder = table.merge_insert(&["i"]);
+        merge_insert_builder.when_not_matched_insert_all();
+        merge_insert_builder.use_index(false);
+        merge_insert_builder.execute(new_batches).await.unwrap();
+        assert_eq!(table.count_rows(None).await.unwrap(), 25);
+    }
+
    #[tokio::test]
    async fn test_add_overwrite() {
        let tmp_dir = tempdir().unwrap();
--- a/rust/lancedb/src/table/merge.rs
+++ b/rust/lancedb/src/table/merge.rs
@@ -22,6 +22,7 @@ pub struct MergeInsertBuilder {
    pub(crate) when_not_matched_by_source_delete: bool,
    pub(crate) when_not_matched_by_source_delete_filt: Option<String>,
    pub(crate) timeout: Option<Duration>,
+    pub(crate) use_index: bool,
 }

 impl MergeInsertBuilder {
@@ -35,6 +36,7 @@ impl MergeInsertBuilder {
            when_not_matched_by_source_delete: false,
            when_not_matched_by_source_delete_filt: None,
            timeout: None,
+            use_index: true,
        }
    }

@@ -101,6 +103,19 @@ impl MergeInsertBuilder {
        self
    }

+    /// Controls whether to use indexes for the merge operation.
+    ///
+    /// When set to `true` (the default), the operation will use an index if available
+    /// on the join key for improved performance. When set to `false`, it forces a full
+    /// table scan even if an index exists. This can be useful for benchmarking or when
+    /// the query optimizer chooses a suboptimal path.
+    ///
+    /// If not set, defaults to `true` (use index if available).
+    pub fn use_index(&mut self, use_index: bool) -> &mut Self {
+        self.use_index = use_index;
+        self
+    }
+
    /// Executes the merge insert operation
    ///
    /// Returns version and statistics about the merge operation including the number of rows
Author	SHA1	Message	Date
Lance Release	247fb58400	Bump version: 0.25.1 → 0.25.2-beta.0	2025-09-24 22:54:09 +00:00
Jack Ye	504bdc471c	feat(rust): support namespace backed database (#2664 ) This PR adds support for namespace-backed databases through lance-namespace integration, enabling centralized table management through namespace APIs. --------- Co-authored-by: Claude <noreply@anthropic.com>	2025-09-24 15:33:31 -07:00
Will Jones	d617cdef4a	feat: add use_index parameter to merge insert operations (#2674 ) ## Summary Exposes `use_index` Merge Insert parameter, which was created upstream in https://github.com/lancedb/lance/pull/4688. ## API Examples ### Python ```python # Force table scan table.merge_insert(["id"]) \ .when_not_matched_insert_all() \ .use_index(False) \ .execute(data) ``` ### Node.js/TypeScript ```typescript // Force table scan await table.mergeInsert("id") .whenNotMatchedInsertAll() .useIndex(false) .execute(data); ``` ### Rust ```rust // Force table scan let mut builder = table.merge_insert(&["id"]); builder.when_not_matched_insert_all() .use_index(false); builder.execute(data).await?; ``` 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude <noreply@anthropic.com>	2025-09-24 12:50:21 -07:00
Will Jones	356d7046fd	ci: fix test failure on main (#2677 ) Test was in wrong position.	2025-09-24 09:46:04 -07:00
Will Jones	48e5caabda	ci(nodejs): lint for unused imports (#2673 )	2025-09-23 18:49:42 -07:00
Lance Release	d6cc68f671	Bump version: 0.22.1-beta.4 → 0.22.1	2025-09-23 22:07:31 +00:00
Lance Release	55eacfa685	Bump version: 0.22.1-beta.3 → 0.22.1-beta.4	2025-09-23 22:06:45 +00:00
Lance Release	222e3264ab	Bump version: 0.25.1-beta.4 → 0.25.1	2025-09-23 22:06:08 +00:00
Lance Release	13505026cb	Bump version: 0.25.1-beta.3 → 0.25.1-beta.4	2025-09-23 22:06:08 +00:00
Neha Prasad	b0800b4b71	fix: undefined values should become null in nullable fields (#2658 ) ### Bug Fix: Undefined Values in Nullable Fields Issue: When inserting data with `undefined` values into nullable fields, LanceDB was incorrectly coercing them to default values (`false` for booleans, `NaN` for numbers, `""` for strings) instead of `null`. Fix: Modified the `makeVector()` function in `arrow.ts` to properly convert `undefined` values to `null` for nullable fields before passing data to Apache Arrow. fixes: #2645 Result: Now `{ text: undefined, number: undefined, bool: undefined }` correctly becomes `{ text: null, number: null, bool: null }` when fields are marked as nullable in the schema. Files Changed: - `nodejs/lancedb/arrow.ts` (core fix) - `nodejs/__test__/arrow.test.ts` (test coverage) - This ensures proper null handling for nullable fields as expected by users. --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2025-09-23 14:29:52 -07:00
Neha Prasad	1befebf614	fix(node): handle null values in nullable boolean fields (#2657 ) ### Solution Added special handling in `makeVector` function for boolean arrays where all values are null. The fix creates a proper null bitmap using `makeData` and `arrowMakeVector` instead of relying on Apache Arrow's `vectorFromArray` which doesn't handle this edge case correctly. fixes: #2644 ### Changes - Added null value detection for boolean types in `makeVector` function - Creates proper Arrow data structure with null bitmap when all boolean values are null - Preserves existing behavior for non-null boolean values and other data types - Fixes the boolean null value bug while maintaining backward compatibility. --------- Co-authored-by: Will Jones <willjones127@gmail.com>	2025-09-23 14:07:00 -07:00
Will Jones	1ab60fae7f	feat: upgrade Lance to v0.37.0 (#2672 ) Change logs: * https://github.com/lancedb/lance/releases/tag/v0.37.0 * https://github.com/lancedb/lance/releases/tag/v0.36.0	2025-09-23 13:41:47 -07:00
Ayush Chaurasia	e921c90c1b	feat: support mean reciprocal rank reranker (#2671 ) The basic idea of MRR is this - https://www.evidentlyai.com/ranking-metrics/mean-reciprocal-rank-mrr I've implemented a weighted version for allowing user to set weightage between vector and fts. The gist is something like this ### Scenario A: Document at rank 1 in one set, absent from another ``` # Assuming equal weights: weight_vector = 0.5, weight_fts = 0.5 vector_rr = 1.0 # rank 1 → 1/1 = 1.0 fts_rr = 0.0 # absent → 0.0 weighted_mrr = 0.5 × 1.0 + 0.5 × 0.0 = 0.5 ``` ### Scenario B: Document at rank 1 in one set, rank 2 in another ``` # Same weights: weight_vector = 0.5, weight_fts = 0.5 vector_rr = 1.0 # rank 1 → 1/1 = 1.0 fts_rr = 0.5 # rank 2 → 1/2 = 0.5 weighted_mrr = 0.5 × 1.0 + 0.5 × 0.5 = 0.5 + 0.25 = 0.75 ``` And so with `return_score="all"` the result looks something like this (this is from the reranker tests). Because this is a weighted rank based reranker, some results might have the same score ``` text vector _distance _rowid _score _relevance_score 0 I am your father [-0.010703234, 0.069315575, 0.030076642, 0.002... 8.149148e-13 8589934598 10.978719 1.000000 1 the ground beneath my feet [-0.09500901, 0.00092102867, 0.0755851, 0.0372... 1.376896e+00 8589934604 NaN 0.250000 2 I find your lack of faith disturbing [0.07525753, -0.0100010475, 0.09990541, 0.0209... NaN 8589934595 3.483394 0.250000 3 but I don't wanna die [0.033476487, -0.011235877, -0.057625435, -0.0... 1.538222e+00 8589934610 1.130355 0.238095 4 if you strike me down I shall become more powe... [0.00432201, 0.030120496, 5.3317923e-05, 0.033... 1.381086e+00 8589934594 0.715157 0.216667 5 I see a salty message written in the eves [-0.04213107, 0.0016004723, 0.061052393, -0.02... 1.638301e+00 8589934603 1.043785 0.133333 6 but his son was mortal [0.012462767, 0.049041674, -0.057339743, -0.04... 1.421566e+00 8589934620 NaN 0.125000 7 I've got a bad feeling about this [-0.06973199, -0.029960092, 0.02641632, -0.031... NaN 8589934596 1.043785 0.125000 8 now that's a name I haven't heard in a long time [-0.014374257, -0.013588792, -0.07487557, 0.03... 1.597573e+00 8589934593 0.848772 0.118056 9 he was a god [-0.0258895, 0.11925236, -0.029397793, 0.05888... 1.423147e+00 8589934618 NaN 0.100000 10 I wish they would make another one [-0.14737535, -0.015304729, 0.04318139, -0.061... NaN 8589934622 1.043785 0.100000 11 Kratos had a son [-0.057455737, 0.13734367, -0.03537109, -0.000... 1.488075e+00 8589934617 NaN 0.083333 12 I don't wanna live like this [-0.0028891307, 0.015214227, 0.025183653, 0.08... NaN 8589934609 1.043785 0.071429 13 I see a mansard roof through the trees [0.052383978, 0.087759204, 0.014739997, 0.0239... NaN 8589934602 1.043785 0.062500 14 great kid don't get cocky [-0.047043696, 0.054648954, -0.008509666, -0.0... 1.618125e+00 8589934592 NaN 0.055556 ```	2025-09-23 18:25:18 +05:30
Lance Release	05a4ea646a	Bump version: 0.22.1-beta.2 → 0.22.1-beta.3	2025-09-22 04:49:00 +00:00