Bump version: 0.20.0-beta.1 → 0.20.0-beta.2

2025-12-24 05:49:57 +00:00 · 2025-06-04 07:14:06 +00:00
124 changed files with 1476 additions and 4853 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.21.2-beta.1"
+current_version = "0.20.0-beta.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/cargo-publish.yml
+++ b/.github/workflows/cargo-publish.yml
@@ -5,8 +5,8 @@ on:
    tags-ignore:
      # We don't publish pre-releases for Rust. Crates.io is just a source
      # distribution, so we don't need to publish pre-releases.
-      - "v*-beta*"
-      - "*-v*" # for example, python-vX.Y.Z
+      - 'v*-beta*'
+      - '*-v*' # for example, python-vX.Y.Z

 env:
  # This env var is used by Swatinem/rust-cache@v2 for the cache
@@ -19,8 +19,6 @@ env:
 jobs:
  build:
    runs-on: ubuntu-22.04
-    permissions:
-      id-token: write
    timeout-minutes: 30
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
@@ -33,8 +31,6 @@ jobs:
        run: |
          sudo apt update
          sudo apt install -y protobuf-compiler libssl-dev
-      - uses: rust-lang/crates-io-auth-action@v1
-        id: auth
      - name: Publish the package
        run: |
-          cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
+          cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}
--- a/.github/workflows/make-release-commit.yml
+++ b/.github/workflows/make-release-commit.yml
@@ -84,7 +84,7 @@ jobs:
        run: |
          pip install bump-my-version PyGithub packaging
          bash ci/bump_version.sh ${{ inputs.type }} ${{ inputs.bump-minor }} v $COMMIT_BEFORE_BUMP
-          bash ci/update_lockfiles.sh --amend
+          bash ci/update_lockfiles.sh
      - name: Push new version tag
        if: ${{ !inputs.dry_run }}
        uses: ad-m/github-push-action@master
@@ -93,3 +93,11 @@ jobs:
          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
          branch: ${{ github.ref }}
          tags: true
+      - uses: ./.github/workflows/update_package_lock
+        if: ${{ !inputs.dry_run && inputs.other }}
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+      - uses: ./.github/workflows/update_package_lock_nodejs
+        if: ${{ !inputs.dry_run && inputs.other }}
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -505,8 +505,6 @@ jobs:
    name: vectordb NPM Publish
    needs: [node, node-macos, node-linux-gnu, node-windows]
    runs-on: ubuntu-latest
-    permissions:
-      contents: write
    # Only runs on tags that matches the make-release action
    if: startsWith(github.ref, 'refs/tags/v')
    steps:
@@ -539,20 +537,6 @@ jobs:
        # We need to deprecate the old package to avoid confusion.
        # Each time we publish a new version, it gets undeprecated.
        run: npm deprecate vectordb "Use @lancedb/lancedb instead."
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: main
-      - name: Update package-lock.json
-        run: |
-          git config user.name 'Lance Release'
-          git config user.email 'lance-dev@lancedb.com'
-          bash ci/update_lockfiles.sh
-      - name: Push new commit
-        uses: ad-m/github-push-action@master
-        with:
-          github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
-          branch: main
      - name: Notify Slack Action
        uses: ravsamhq/notify-slack-action@2.3.0
        if: ${{ always() }}
--- a/.github/workflows/update_package_lock/action.yml
+++ b/.github/workflows/update_package_lock/action.yml
@@ -0,0 +1,33 @@
+name: update_package_lock
+description: "Update node's package.lock"
+
+inputs:
+  github_token:
+    required: true
+    description: "github token for the repo"
+
+runs:
+  using: "composite"
+  steps:
+    - uses: actions/setup-node@v3
+      with:
+        node-version: 20
+    - name: Set git configs
+      shell: bash
+      run: |
+        git config user.name 'Lance Release'
+        git config user.email 'lance-dev@lancedb.com'
+    - name: Update package-lock.json file
+      working-directory: ./node
+      run: |
+        npm install
+        git add package-lock.json
+        git commit -m "Updating package-lock.json"
+      shell: bash
+    - name: Push changes
+      if: ${{ inputs.dry_run }} == "false"
+      uses: ad-m/github-push-action@master
+      with:
+        github_token: ${{ inputs.github_token }}
+        branch: main
+        tags: true
--- a/.github/workflows/update_package_lock_nodejs/action.yml
+++ b/.github/workflows/update_package_lock_nodejs/action.yml
@@ -0,0 +1,33 @@
+name: update_package_lock_nodejs
+description: "Update nodejs's package.lock"
+
+inputs:
+  github_token:
+    required: true
+    description: "github token for the repo"
+
+runs:
+  using: "composite"
+  steps:
+    - uses: actions/setup-node@v3
+      with:
+        node-version: 20
+    - name: Set git configs
+      shell: bash
+      run: |
+        git config user.name 'Lance Release'
+        git config user.email 'lance-dev@lancedb.com'
+    - name: Update package-lock.json file
+      working-directory: ./nodejs
+      run: |
+        npm install
+        git add package-lock.json
+        git commit -m "Updating package-lock.json"
+      shell: bash
+    - name: Push changes
+      if: ${{ inputs.dry_run }} == "false"
+      uses: ad-m/github-push-action@master
+      with:
+        github_token: ${{ inputs.github_token }}
+        branch: main
+        tags: true
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,24 +0,0 @@
-LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
-It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
-remote (against LanceDB Cloud).
-
-The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
-
-Project layout:
-
-* `rust/lancedb`: The LanceDB core Rust implementation.
-* `python`: The Python bindings, using PyO3.
-* `nodejs`: The Typescript bindings, using napi-rs
-* `java`: The Java bindings
-
-(`rust/ffi` and `node/` are for a deprecated package. You can ignore them.)
-
-Common commands:
-
-* Check for compiler errors: `cargo check --features remote --tests --examples`
-* Run tests: `cargo test --features remote --tests`
-* Run specific test: `cargo test --features remote -p <package_name> --test <test_name>`
-* Lint: `cargo clippy --features remote --tests --examples`
-* Format: `cargo fmt --all`
-
-Before committing changes, run formatting.
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,14 +21,14 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.32.0", "features" = ["dynamodb"] }
-lance-io = "=0.32.0"
-lance-index = "=0.32.0"
-lance-linalg = "=0.32.0"
-lance-table = "=0.32.0"
-lance-testing = "=0.32.0"
-lance-datafusion = "=0.32.0"
-lance-encoding = "=0.32.0"
+lance = { "version" = "=0.29.0", "features" = ["dynamodb"], tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-io = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-index = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-linalg = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-table = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-testing = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-datafusion = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
+lance-encoding = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
 # Note that this one does not include pyarrow
 arrow = { version = "55.1", optional = false }
 arrow-array = "55.1"
@@ -39,20 +39,20 @@ arrow-schema = "55.1"
 arrow-arith = "55.1"
 arrow-cast = "55.1"
 async-trait = "0"
-datafusion = { version = "48.0", default-features = false }
-datafusion-catalog = "48.0"
-datafusion-common = { version = "48.0", default-features = false }
-datafusion-execution = "48.0"
-datafusion-expr = "48.0"
-datafusion-physical-plan = "48.0"
+datafusion = { version = "47.0", default-features = false }
+datafusion-catalog = "47.0"
+datafusion-common = { version = "47.0", default-features = false }
+datafusion-execution = "47.0"
+datafusion-expr = "47.0"
+datafusion-physical-plan = "47.0"
 env_logger = "0.11"
-half = { "version" = "2.6.0", default-features = false, features = [
+half = { "version" = "=2.5.0", default-features = false, features = [
    "num-traits",
 ] }
 futures = "0"
 log = "0.4"
 moka = { version = "0.12", features = ["future"] }
-object_store = "0.12.0"
+object_store = "0.11.0"
 pin-project = "1.0.7"
 snafu = "0.8"
 url = "2"
--- a/ci/set_lance_version.py
+++ b/ci/set_lance_version.py
@@ -1,188 +0,0 @@
-import argparse
-import sys
-import json
-
-
-def run_command(command: str) -> str:
-    """
-    Run a shell command and return stdout as a string.
-    If exit code is not 0, raise an exception with the stderr output.
-    """
-    import subprocess
-
-    result = subprocess.run(command, shell=True, capture_output=True, text=True)
-    if result.returncode != 0:
-        raise Exception(f"Command failed with error: {result.stderr.strip()}")
-    return result.stdout.strip()
-
-
-def get_latest_stable_version() -> str:
-    version_line = run_command("cargo info lance | grep '^version:'")
-    version = version_line.split(" ")[1].strip()
-    return version
-
-
-def get_latest_preview_version() -> str:
-    lance_tags = run_command(
-        "git ls-remote --tags https://github.com/lancedb/lance.git | grep 'refs/tags/v[0-9beta.-]\\+$'"
-    ).splitlines()
-    lance_tags = (
-        tag.split("refs/tags/")[1]
-        for tag in lance_tags
-        if "refs/tags/" in tag and "beta" in tag
-    )
-    from packaging.version import Version
-
-    latest = max(
-        (tag[1:] for tag in lance_tags if tag.startswith("v")), key=lambda t: Version(t)
-    )
-    return str(latest)
-
-
-def extract_features(line: str) -> list:
-    """
-    Extracts the features from a line in Cargo.toml.
-    Example: 'lance = { "version" = "=0.29.0", "features" = ["dynamodb"] }'
-    Returns: ['dynamodb']
-    """
-    import re
-
-    match = re.search(r'"features"\s*=\s*\[\s*(.*?)\s*\]', line, re.DOTALL)
-    if match:
-        features_str = match.group(1)
-        return [f.strip('"') for f in features_str.split(",") if len(f) > 0]
-    return []
-
-
-def update_cargo_toml(line_updater):
-    """
-    Updates the Cargo.toml file by applying the line_updater function to each line.
-    The line_updater function should take a line as input and return the updated line.
-    """
-    with open("Cargo.toml", "r") as f:
-        lines = f.readlines()
-
-    new_lines = []
-    lance_line = ""
-    is_parsing_lance_line = False
-    for line in lines:
-        if line.startswith("lance"):
-            # Update the line using the provided function
-            if line.strip().endswith("}"):
-                new_lines.append(line_updater(line))
-            else:
-                lance_line = line
-                is_parsing_lance_line = True
-        elif is_parsing_lance_line:
-            lance_line += line
-            if line.strip().endswith("}"):
-                new_lines.append(line_updater(lance_line))
-                lance_line = ""
-                is_parsing_lance_line = False
-            else:
-                print("doesn't end with }:", line)
-        else:
-            # Keep the line unchanged
-            new_lines.append(line)
-
-    with open("Cargo.toml", "w") as f:
-        f.writelines(new_lines)
-
-
-def set_stable_version(version: str):
-    """
-    Sets lines to
-    lance = { "version" = "=0.29.0", "features" = ["dynamodb"] }
-    lance-io = "=0.29.0"
-    ...
-    """
-
-    def line_updater(line: str) -> str:
-        package_name = line.split("=", maxsplit=1)[0].strip()
-        features = extract_features(line)
-        if features:
-            return f'{package_name} = {{ "version" = "={version}", "features" = {json.dumps(features)} }}\n'
-        else:
-            return f'{package_name} = "={version}"\n'
-
-    update_cargo_toml(line_updater)
-
-
-def set_preview_version(version: str):
-    """
-    Sets lines to
-    lance = { "version" = "=0.29.0", "features" = ["dynamodb"], tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
-    lance-io = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
-    ...
-    """
-
-    def line_updater(line: str) -> str:
-        package_name = line.split("=", maxsplit=1)[0].strip()
-        features = extract_features(line)
-        base_version = version.split("-")[0]  # Get the base version without beta suffix
-        if features:
-            return f'{package_name} = {{ "version" = "={base_version}", "features" = {json.dumps(features)}, "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
-        else:
-            return f'{package_name} = {{ "version" = "={base_version}", "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
-
-    update_cargo_toml(line_updater)
-
-
-def set_local_version():
-    """
-    Sets lines to
-    lance = { path = "../lance/rust/lance", features = ["dynamodb"] }
-    lance-io = { path = "../lance/rust/lance-io" }
-    ...
-    """
-
-    def line_updater(line: str) -> str:
-        package_name = line.split("=", maxsplit=1)[0].strip()
-        features = extract_features(line)
-        if features:
-            return f'{package_name} = {{ "path" = "../lance/rust/{package_name}", "features" = {json.dumps(features)} }}\n'
-        else:
-            return f'{package_name} = {{ "path" = "../lance/rust/{package_name}" }}\n'
-
-    update_cargo_toml(line_updater)
-
-
-parser = argparse.ArgumentParser(description="Set the version of the Lance package.")
-parser.add_argument(
-    "version",
-    type=str,
-    help="The version to set for the Lance package. Use 'stable' for the latest stable version, 'preview' for latest preview version, or a specific version number (e.g., '0.1.0'). You can also specify 'local' to use a local path.",
-)
-args = parser.parse_args()
-
-if args.version == "stable":
-    latest_stable_version = get_latest_stable_version()
-    print(
-        f"Found latest stable version: \033[1mv{latest_stable_version}\033[0m",
-        file=sys.stderr,
-    )
-    set_stable_version(latest_stable_version)
-elif args.version == "preview":
-    latest_preview_version = get_latest_preview_version()
-    print(
-        f"Found latest preview version: \033[1mv{latest_preview_version}\033[0m",
-        file=sys.stderr,
-    )
-    set_preview_version(latest_preview_version)
-elif args.version == "local":
-    set_local_version()
-else:
-    # Parse the version number.
-    version = args.version
-    # Ignore initial v if present.
-    if version.startswith("v"):
-        version = version[1:]
-
-    if "beta" in version:
-        set_preview_version(version)
-    else:
-        set_stable_version(version)
-
-print("Updating lockfiles...", file=sys.stderr, end="")
-run_command("cargo metadata > /dev/null")
-print(" done.", file=sys.stderr)
--- a/ci/update_lockfiles.sh
+++ b/ci/update_lockfiles.sh
@@ -1,30 +1,18 @@
 #!/usr/bin/env bash
 set -euo pipefail

-AMEND=false
-
-for arg in "$@"; do
-  if [[ "$arg" == "--amend" ]]; then
-    AMEND=true
-  fi
-done
-
 # This updates the lockfile without building
-cargo metadata --quiet > /dev/null
+cargo metadata > /dev/null

 pushd nodejs || exit 1
-npm install --package-lock-only --silent
+npm install --package-lock-only
 popd
 pushd node || exit 1
-npm install --package-lock-only --silent
+npm install --package-lock-only
 popd

 if git diff --quiet --exit-code; then
  echo "No lockfile changes to commit; skipping amend."
-elif $AMEND; then
-  git add Cargo.lock nodejs/package-lock.json node/package-lock.json
-  git commit --amend --no-edit
 else
-  git add Cargo.lock nodejs/package-lock.json node/package-lock.json
-  git commit -m "Update lockfiles"
+  git commit --amend --no-edit
 fi
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -19,7 +19,7 @@
    },
    "../node": {
      "name": "vectordb",
-      "version": "0.21.2-beta.0",
+      "version": "0.12.0",
      "cpu": [
        "x64",
        "arm64"
@@ -65,11 +65,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
-        "@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
-        "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
-        "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
+        "@lancedb/vectordb-darwin-arm64": "0.12.0",
+        "@lancedb/vectordb-darwin-x64": "0.12.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.12.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.12.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
--- a/docs/src/guides/sql_querying.md
+++ b/docs/src/guides/sql_querying.md
@@ -1,9 +1,7 @@
-# SQL Querying
-
 You can use DuckDB and Apache Datafusion to query your LanceDB tables using SQL.
 This guide will show how to query Lance tables them using both.

-We will re-use the dataset [created previously](./tables.md):
+We will re-use the dataset [created previously](./pandas_and_pyarrow.md):

 ```python
 import lancedb
@@ -29,17 +27,21 @@ arrow_table = table.to_lance()
 duckdb.query("SELECT * FROM arrow_table")
 ```

-| vector      | item | price |
-| ----------- | ---- | ----- |
-| [3.1, 4.1]  | foo  | 10.0  |
-| [5.9, 26.5] | bar  | 20.0  |
+```
+┌─────────────┬─────────┬────────┐
+│   vector    │  item   │ price  │
+│   float[]   │ varchar │ double │
+├─────────────┼─────────┼────────┤
+│ [3.1, 4.1]  │ foo     │   10.0 │
+│ [5.9, 26.5] │ bar     │   20.0 │
+└─────────────┴─────────┴────────┘
+```

 ## Querying a LanceDB Table with Apache Datafusion

 Have the required imports before doing any querying.

 === "Python"
-
    ```python
    --8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb"
    --8<-- "python/python/tests/docs/test_guide_tables.py:import-session-context"
@@ -49,12 +51,16 @@ Have the required imports before doing any querying.
 Register the table created with the Datafusion session context.

 === "Python"
-
    ```python
    --8<-- "python/python/tests/docs/test_guide_tables.py:lance_sql_basic"
    ```

-| vector      | item | price |
-| ----------- | ---- | ----- |
-| [3.1, 4.1]  | foo  | 10.0  |
-| [5.9, 26.5] | bar  | 20.0  |
+```
+┌─────────────┬─────────┬────────┐
+│   vector    │  item   │ price  │
+│   float[]   │ varchar │ double │
+├─────────────┼─────────┼────────┤
+│ [3.1, 4.1]  │ foo     │   10.0 │
+│ [5.9, 26.5] │ bar     │   20.0 │
+└─────────────┴─────────┴────────┘
+```
--- a/docs/src/js/classes/BooleanQuery.md
+++ b/docs/src/js/classes/BooleanQuery.md
@@ -1,53 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / BooleanQuery
-
-# Class: BooleanQuery
-
-Represents a full-text query interface.
-This interface defines the structure and behavior for full-text queries,
-including methods to retrieve the query type and convert the query to a dictionary format.
-
-## Implements
-
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
-
-## Constructors
-
-### new BooleanQuery()
-
-```ts
-new BooleanQuery(queries): BooleanQuery
-```
-
-Creates an instance of BooleanQuery.
-
-#### Parameters
-
-* **queries**: [[`Occur`](../enumerations/Occur.md), [`FullTextQuery`](../interfaces/FullTextQuery.md)][]
-    An array of (Occur, FullTextQuery objects) to combine.
-    Occur specifies whether the query must match, or should match.
-
-#### Returns
-
-[`BooleanQuery`](BooleanQuery.md)
-
-## Methods
-
-### queryType()
-
-```ts
-queryType(): FullTextQueryType
-```
-
-The type of the full-text query.
-
-#### Returns
-
-[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
-
-#### Implementation of
-
-[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)
--- a/docs/src/js/classes/MatchQuery.md
+++ b/docs/src/js/classes/MatchQuery.md
@@ -40,8 +40,6 @@ Creates an instance of MatchQuery.
    - `boost`: The boost factor for the query (default is 1.0).
    - `fuzziness`: The fuzziness level for the query (default is 0).
    - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
-    - `operator`: The logical operator to use for combining terms in the query (default is "OR").
-    - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.

 * **options.boost?**: `number`

@@ -49,10 +47,6 @@ Creates an instance of MatchQuery.

 * **options.maxExpansions?**: `number`

-* **options.operator?**: [`Operator`](../enumerations/Operator.md)
-
-* **options.prefixLength?**: `number`
-
 #### Returns

 [`MatchQuery`](MatchQuery.md)
--- a/docs/src/js/classes/MultiMatchQuery.md
+++ b/docs/src/js/classes/MultiMatchQuery.md
@@ -38,12 +38,9 @@ Creates an instance of MultiMatchQuery.
 * **options?**
    Optional parameters for the multi-match query.
    - `boosts`: An array of boost factors for each column (default is 1.0 for all).
-    - `operator`: The logical operator to use for combining terms in the query (default is "OR").

 * **options.boosts?**: `number`[]

-* **options.operator?**: [`Operator`](../enumerations/Operator.md)
-
 #### Returns

 [`MultiMatchQuery`](MultiMatchQuery.md)
--- a/docs/src/js/classes/PhraseQuery.md
+++ b/docs/src/js/classes/PhraseQuery.md
@@ -19,10 +19,7 @@ including methods to retrieve the query type and convert the query to a dictiona
 ### new PhraseQuery()

 ```ts
-new PhraseQuery(
-   query,
-   column,
-   options?): PhraseQuery
+new PhraseQuery(query, column): PhraseQuery
 ```

 Creates an instance of `PhraseQuery`.
@@ -35,12 +32,6 @@ Creates an instance of `PhraseQuery`.
 * **column**: `string`
    The name of the column to search within.

-* **options?**
-    Optional parameters for the phrase query.
-    - `slop`: The maximum number of intervening unmatched positions allowed between words in the phrase (default is 0).
-
-* **options.slop?**: `number`
-
 #### Returns

 [`PhraseQuery`](PhraseQuery.md)
--- a/docs/src/js/classes/Session.md
+++ b/docs/src/js/classes/Session.md
@@ -1,84 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / Session
-
-# Class: Session
-
-A session for managing caches and object stores across LanceDB operations.
-
-Sessions allow you to configure cache sizes for index and metadata caches,
-which can significantly impact performance for large datasets.
-
-## Constructors
-
-### new Session()
-
-```ts
-new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
-```
-
-Create a new session with custom cache sizes.
-
-# Parameters
-
- `index_cache_size_bytes`: The size of the index cache in bytes.
-  Defaults to 6GB if not specified.
- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
-  Defaults to 1GB if not specified.
-
-#### Parameters
-
-* **indexCacheSizeBytes?**: `null` \| `bigint`
-
-* **metadataCacheSizeBytes?**: `null` \| `bigint`
-
-#### Returns
-
-[`Session`](Session.md)
-
-## Methods
-
-### approxNumItems()
-
-```ts
-approxNumItems(): number
-```
-
-Get the approximate number of items cached in the session.
-
-#### Returns
-
-`number`
-
-***
-
-### sizeBytes()
-
-```ts
-sizeBytes(): bigint
-```
-
-Get the current size of the session caches in bytes.
-
-#### Returns
-
-`bigint`
-
-***
-
-### default()
-
-```ts
-static default(): Session
-```
-
-Create a session with default cache sizes.
-
-This is equivalent to creating a session with 6GB index cache
-and 1GB metadata cache.
-
-#### Returns
-
-[`Session`](Session.md)
--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -612,7 +612,7 @@ of the given query

 #### Parameters

-* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
+* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
    the query, a vector or string

 * **queryType?**: `string`
@@ -799,7 +799,7 @@ by `query`.

 #### Parameters

-* **vector**: [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md)
+* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)

 #### Returns

--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -386,53 +386,6 @@ called then every valid row from the table will be returned.

 ***

-### maximumNprobes()
-
-```ts
-maximumNprobes(maximumNprobes): VectorQuery
-```
-
-Set the maximum number of probes used.
-
-This controls the maximum number of partitions that will be searched.  If this
-number is greater than minimumNprobes then the excess partitions will _only_ be
-searched if we have not found enough results.  This can be useful when there is
-a narrow filter to allow these queries to spend more time searching and avoid
-potential false negatives.
-
-#### Parameters
-
-* **maximumNprobes**: `number`
-
-#### Returns
-
-[`VectorQuery`](VectorQuery.md)
-
-***
-
-### minimumNprobes()
-
-```ts
-minimumNprobes(minimumNprobes): VectorQuery
-```
-
-Set the minimum number of probes used.
-
-This controls the minimum number of partitions that will be searched.  This
-parameter will impact every query against a vector index, regardless of the
-filter.  See `nprobes` for more details.  Higher values will increase recall
-but will also increase latency.
-
-#### Parameters
-
-* **minimumNprobes**: `number`
-
-#### Returns
-
-[`VectorQuery`](VectorQuery.md)
-
-***
-
 ### nprobes()

 ```ts
@@ -460,10 +413,6 @@ For best results we recommend tuning this parameter with a benchmark against
 your actual data to find the smallest possible value that will still give
 you the desired recall.

-For more fine grained control over behavior when you have a very narrow filter
-you can use `minimumNprobes` and `maximumNprobes`.  This method sets both
-the minimum and maximum to the same value.
-
 #### Parameters

 * **nprobes**: `number`
--- a/docs/src/js/enumerations/FullTextQueryType.md
+++ b/docs/src/js/enumerations/FullTextQueryType.md
@@ -15,14 +15,6 @@ Enum representing the types of full-text queries supported.

 ## Enumeration Members

-### Boolean
-
-```ts
-Boolean: "boolean";
-```
-
-***
-
 ### Boost

 ```ts
--- a/docs/src/js/enumerations/Occur.md
+++ b/docs/src/js/enumerations/Occur.md
@@ -1,37 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / Occur
-
-# Enumeration: Occur
-
-Enum representing the occurrence of terms in full-text queries.
-
- `Must`: The term must be present in the document.
- `Should`: The term should contribute to the document score, but is not required.
- `MustNot`: The term must not be present in the document.
-
-## Enumeration Members
-
-### Must
-
-```ts
-Must: "MUST";
-```
-
-***
-
-### MustNot
-
-```ts
-MustNot: "MUST_NOT";
-```
-
-***
-
-### Should
-
-```ts
-Should: "SHOULD";
-```
--- a/docs/src/js/enumerations/Operator.md
+++ b/docs/src/js/enumerations/Operator.md
@@ -1,28 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / Operator
-
-# Enumeration: Operator
-
-Enum representing the logical operators used in full-text queries.
-
- `And`: All terms must match.
- `Or`: At least one term must match.
-
-## Enumeration Members
-
-### And
-
-```ts
-And: "AND";
-```
-
-***
-
-### Or
-
-```ts
-Or: "OR";
-```
--- a/docs/src/js/functions/connect.md
+++ b/docs/src/js/functions/connect.md
@@ -6,13 +6,10 @@

 # Function: connect()

-## connect(uri, options, session)
+## connect(uri, options)

 ```ts
-function connect(
-   uri,
-   options?,
-   session?): Promise<Connection>
+function connect(uri, options?): Promise<Connection>
 ```

 Connect to a LanceDB instance at the given URI.
@@ -32,8 +29,6 @@ Accepted formats:
 * **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
    The options to use when connecting to the database

-* **session?**: [`Session`](../classes/Session.md)
-
 ### Returns

 `Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
@@ -82,7 +77,7 @@ Accepted formats:

 [ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.

-### Examples
+### Example

 ```ts
 const conn = await connect({
@@ -90,11 +85,3 @@ const conn = await connect({
  storageOptions: {timeout: "60s"}
 });
 ```
-
-```ts
-const session = Session.default();
-const conn = await connect({
-  uri: "/path/to/database",
-  session: session
-});
-```
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -12,12 +12,9 @@
 ## Enumerations

 - [FullTextQueryType](enumerations/FullTextQueryType.md)
- [Occur](enumerations/Occur.md)
- [Operator](enumerations/Operator.md)

 ## Classes

- [BooleanQuery](classes/BooleanQuery.md)
 - [BoostQuery](classes/BoostQuery.md)
 - [Connection](classes/Connection.md)
 - [Index](classes/Index.md)
@@ -29,7 +26,6 @@
 - [Query](classes/Query.md)
 - [QueryBase](classes/QueryBase.md)
 - [RecordBatchIterator](classes/RecordBatchIterator.md)
- [Session](classes/Session.md)
 - [Table](classes/Table.md)
 - [TagContents](classes/TagContents.md)
 - [Tags](classes/Tags.md)
@@ -85,7 +81,6 @@
 - [FieldLike](type-aliases/FieldLike.md)
 - [IntoSql](type-aliases/IntoSql.md)
 - [IntoVector](type-aliases/IntoVector.md)
- [MultiVector](type-aliases/MultiVector.md)
 - [RecordBatchLike](type-aliases/RecordBatchLike.md)
 - [SchemaLike](type-aliases/SchemaLike.md)
 - [TableLike](type-aliases/TableLike.md)
--- a/docs/src/js/interfaces/ConnectionOptions.md
+++ b/docs/src/js/interfaces/ConnectionOptions.md
@@ -70,17 +70,6 @@ Defaults to 'us-east-1'.

 ***

-### session?
-
-```ts
-optional session: Session;
-```
-
-(For LanceDB OSS only): the session to use for this connection. Holds
-shared caches and other session-specific state.
-
-***
-
 ### storageOptions?

 ```ts
--- a/docs/src/js/interfaces/FtsOptions.md
+++ b/docs/src/js/interfaces/FtsOptions.md
@@ -23,7 +23,7 @@ whether to remove punctuation
 ### baseTokenizer?

 ```ts
-optional baseTokenizer: "raw" | "simple" | "whitespace" | "ngram";
+optional baseTokenizer: "raw" | "simple" | "whitespace";
 ```

 The tokenizer to use when building the index.
@@ -71,36 +71,6 @@ tokens longer than this length will be ignored

 ***

-### ngramMaxLength?
-
-```ts
-optional ngramMaxLength: number;
-```
-
-ngram max length
-
-***
-
-### ngramMinLength?
-
-```ts
-optional ngramMinLength: number;
-```
-
-ngram min length
-
-***
-
-### prefixOnly?
-
-```ts
-optional prefixOnly: boolean;
-```
-
-whether to only index the prefix of the token for ngram tokenizer
-
-***
-
 ### removeStopWords?

 ```ts
--- a/docs/src/js/interfaces/OpenTableOptions.md
+++ b/docs/src/js/interfaces/OpenTableOptions.md
@@ -8,7 +8,7 @@

 ## Properties

-### ~~indexCacheSize?~~
+### indexCacheSize?

 ```ts
 optional indexCacheSize: number;
@@ -16,11 +16,6 @@ optional indexCacheSize: number;

 Set the size of the index cache, specified as a number of entries

-#### Deprecated
-
-Use session-level cache configuration instead.
-Create a Session with custom cache sizes and pass it to the connect() function.
-
 The exact meaning of an "entry" will depend on the type of index:
 - IVF: there is one entry for each IVF partition
 - BTREE: there is one entry for the entire index
--- a/docs/src/js/interfaces/OptimizeOptions.md
+++ b/docs/src/js/interfaces/OptimizeOptions.md
@@ -24,10 +24,10 @@ The default is 7 days
 // Delete all versions older than 1 day
 const olderThan = new Date();
 olderThan.setDate(olderThan.getDate() - 1));
-tbl.optimize({cleanupOlderThan: olderThan});
+tbl.cleanupOlderVersions(olderThan);

 // Delete all versions except the current version
-tbl.optimize({cleanupOlderThan: new Date()});
+tbl.cleanupOlderVersions(new Date());
 ```

 ***
--- a/docs/src/js/type-aliases/MultiVector.md
+++ b/docs/src/js/type-aliases/MultiVector.md
@@ -1,11 +0,0 @@
-[**@lancedb/lancedb**](../README.md) • **Docs**
-
-***
-
-[@lancedb/lancedb](../globals.md) / MultiVector
-
-# Type Alias: MultiVector
-
-```ts
-type MultiVector: IntoVector[];
-```
--- a/docs/src/notebooks/Multivector_on_LanceDB.ipynb
+++ b/docs/src/notebooks/Multivector_on_LanceDB.ipynb
@@ -428,7 +428,7 @@
        "\n",
        "**Why?**  \n",
        "Embedding the UFO dataset and ingesting it into LanceDB takes **~2 hours on a T4 GPU**. To save time:  \n",
-        "- **Use the pre-prepared table with index created** (provided below) to proceed directly to **Step 7**: search.  \n",
+        "- **Use the pre-prepared table with index created ** (provided below) to proceed directly to step7: search.  \n",
        "- **Step 5a** contains the full ingestion code for reference (run it only if necessary).  \n",
        "- **Step 6** contains the details on creating the index on the multivector column"
      ]
--- a/docs/test/md_testing.py
+++ b/docs/test/md_testing.py
@@ -30,8 +30,7 @@ excluded_globs = [
    "../src/rag/advanced_techniques/*.md",
    "../src/guides/scalar_index.md",
    "../src/guides/storage.md",
-    "../src/search.md",
-    "../src/guides/sql_querying.md",
+    "../src/search.md"
 ]

 python_prefix = "py"
--- a/docs/test/requirements.txt
+++ b/docs/test/requirements.txt
@@ -7,4 +7,3 @@ tantivy==0.20.1
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch
 polars>=0.19, <=1.3.0
-datafusion
--- a/java/.mvn/wrapper/maven-wrapper.properties
+++ b/java/.mvn/wrapper/maven-wrapper.properties
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-wrapperVersion=3.3.2
-distributionType=only-script
-distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
--- a/java/README.md
+++ b/java/README.md
@@ -1,37 +0,0 @@
-# LanceDB Java SDK
-
-## Configuration and Initialization
-
-### LanceDB Cloud
-
-For LanceDB Cloud, use the simplified builder API:
-
-```java
-import com.lancedb.lance.namespace.LanceRestNamespace;
-
-// If your DB url is db://example-db, then your database here is example-db
-LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
-    .apiKey("your_lancedb_cloud_api_key")
-    .database("your_database_name")
-    .build();
-```
-
-### LanceDB Enterprise
-
-For Enterprise deployments, use your VPC endpoint:
-
-```java
-LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
-    .apiKey("your_lancedb_enterprise_api_key")
-    .database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/
-    .hostOverride("http://<vpc_endpoint_dns_name>:80")
-    .build();
-```
-
-## Development
-
-Build:
-
-```shell
-./mvnw install
-```
--- a/java/core/lancedb-jni/Cargo.toml
+++ b/java/core/lancedb-jni/Cargo.toml
@@ -19,7 +19,7 @@ lancedb = { path = "../../../rust/lancedb" }
 lance = { workspace = true }
 arrow = { workspace = true, features = ["ffi"] }
 arrow-schema.workspace = true
-tokio = "1.46"
+tokio = "1.23"
 jni = "0.21.1"
 snafu.workspace = true
 lazy_static.workspace = true
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,24 +8,18 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.2-beta.1</version>
+        <version>0.20.0-beta.2</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

    <artifactId>lancedb-core</artifactId>
-    <name>${project.artifactId}</name>
-    <description>LanceDB Core</description>
+    <name>LanceDB Core</name>
    <packaging>jar</packaging>
    <properties>
        <rust.release.build>false</rust.release.build>
    </properties>

    <dependencies>
-        <dependency>
-            <groupId>com.lancedb</groupId>
-            <artifactId>lance-namespace-core</artifactId>
-            <version>0.0.1</version>
-        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-vector</artifactId>
--- a/java/lance-namespace/pom.xml
+++ b/java/lance-namespace/pom.xml
@@ -1,26 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-
-    <parent>
-        <groupId>com.lancedb</groupId>
-        <artifactId>lancedb-parent</artifactId>
-        <version>0.21.2-beta.1</version>
-        <relativePath>../pom.xml</relativePath>
-    </parent>
-
-    <artifactId>lancedb-lance-namespace</artifactId>
-    <name>${project.artifactId}</name>
-    <description>LanceDB Java Integration with Lance Namespace</description>
-    <packaging>jar</packaging>
-
-    <dependencies>
-        <dependency>
-            <groupId>com.lancedb</groupId>
-            <artifactId>lance-namespace-core</artifactId>
-        </dependency>
-    </dependencies>
-</project>
--- a/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java
+++ b/java/lance-namespace/src/main/java/com/lancedb/lancedb/LanceDbRestNamespaces.java
@@ -1,146 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.lancedb.lancedb;
-
-import com.lancedb.lance.namespace.LanceRestNamespace;
-import com.lancedb.lance.namespace.client.apache.ApiClient;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Optional;
-
-/** Util class to help construct a {@link LanceRestNamespace} for LanceDB. */
-public class LanceDbRestNamespaces {
-  private static final String DEFAULT_REGION = "us-east-1";
-  private static final String CLOUD_URL_PATTERN = "https://%s.%s.api.lancedb.com";
-
-  private String apiKey;
-  private String database;
-  private Optional<String> hostOverride = Optional.empty();
-  private Optional<String> region = Optional.empty();
-  private Map<String, String> additionalConfig = new HashMap<>();
-
-  private LanceDbRestNamespaces() {}
-
-  /**
-   * Create a new builder instance.
-   *
-   * @return A new LanceRestNamespaceBuilder
-   */
-  public static LanceDbRestNamespaces builder() {
-    return new LanceDbRestNamespaces();
-  }
-
-  /**
-   * Set the API key (required).
-   *
-   * @param apiKey The LanceDB API key
-   * @return This builder
-   */
-  public LanceDbRestNamespaces apiKey(String apiKey) {
-    if (apiKey == null || apiKey.trim().isEmpty()) {
-      throw new IllegalArgumentException("API key cannot be null or empty");
-    }
-    this.apiKey = apiKey;
-    return this;
-  }
-
-  /**
-   * Set the database name (required).
-   *
-   * @param database The database name
-   * @return This builder
-   */
-  public LanceDbRestNamespaces database(String database) {
-    if (database == null || database.trim().isEmpty()) {
-      throw new IllegalArgumentException("Database cannot be null or empty");
-    }
-    this.database = database;
-    return this;
-  }
-
-  /**
-   * Set a custom host override (optional). When set, this overrides the default LanceDB Cloud URL
-   * construction. Use this for LanceDB Enterprise deployments.
-   *
-   * @param hostOverride The complete base URL (e.g., "http://your-vpc-endpoint:80")
-   * @return This builder
-   */
-  public LanceDbRestNamespaces hostOverride(String hostOverride) {
-    this.hostOverride = Optional.ofNullable(hostOverride);
-    return this;
-  }
-
-  /**
-   * Set the region for LanceDB Cloud (optional). Defaults to "us-east-1" if not specified. This is
-   * ignored when hostOverride is set.
-   *
-   * @param region The AWS region (e.g., "us-east-1", "eu-west-1")
-   * @return This builder
-   */
-  public LanceDbRestNamespaces region(String region) {
-    this.region = Optional.ofNullable(region);
-    return this;
-  }
-
-  /**
-   * Add additional configuration parameters.
-   *
-   * @param key The configuration key
-   * @param value The configuration value
-   * @return This builder
-   */
-  public LanceDbRestNamespaces config(String key, String value) {
-    this.additionalConfig.put(key, value);
-    return this;
-  }
-
-  /**
-   * Build the LanceRestNamespace instance.
-   *
-   * @return A configured LanceRestNamespace
-   * @throws IllegalStateException if required parameters are missing
-   */
-  public LanceRestNamespace build() {
-    // Validate required fields
-    if (apiKey == null) {
-      throw new IllegalStateException("API key is required");
-    }
-    if (database == null) {
-      throw new IllegalStateException("Database is required");
-    }
-
-    // Build configuration map
-    Map<String, String> config = new HashMap<>(additionalConfig);
-    config.put("headers.x-lancedb-database", database);
-    config.put("headers.x-api-key", apiKey);
-
-    // Determine base URL
-    String baseUrl;
-    if (hostOverride.isPresent()) {
-      baseUrl = hostOverride.get();
-      config.put("host_override", hostOverride.get());
-    } else {
-      String effectiveRegion = region.orElse(DEFAULT_REGION);
-      baseUrl = String.format(CLOUD_URL_PATTERN, database, effectiveRegion);
-      config.put("region", effectiveRegion);
-    }
-
-    // Create and configure ApiClient
-    ApiClient apiClient = new ApiClient();
-    apiClient.setBasePath(baseUrl);
-
-    return new LanceRestNamespace(apiClient, config);
-  }
-}
--- a/java/mvnw
+++ b/java/mvnw
@@ -1,259 +0,0 @@
-#!/bin/sh
-# ----------------------------------------------------------------------------
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# ----------------------------------------------------------------------------
-
-# ----------------------------------------------------------------------------
-# Apache Maven Wrapper startup batch script, version 3.3.2
-#
-# Optional ENV vars
-# -----------------
-#   JAVA_HOME - location of a JDK home dir, required when download maven via java source
-#   MVNW_REPOURL - repo url base for downloading maven distribution
-#   MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven
-#   MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output
-# ----------------------------------------------------------------------------
-
-set -euf
-[ "${MVNW_VERBOSE-}" != debug ] || set -x
-
-# OS specific support.
-native_path() { printf %s\\n "$1"; }
-case "$(uname)" in
-CYGWIN* | MINGW*)
-  [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")"
-  native_path() { cygpath --path --windows "$1"; }
-  ;;
-esac
-
-# set JAVACMD and JAVACCMD
-set_java_home() {
-  # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched
-  if [ -n "${JAVA_HOME-}" ]; then
-    if [ -x "$JAVA_HOME/jre/sh/java" ]; then
-      # IBM's JDK on AIX uses strange locations for the executables
-      JAVACMD="$JAVA_HOME/jre/sh/java"
-      JAVACCMD="$JAVA_HOME/jre/sh/javac"
-    else
-      JAVACMD="$JAVA_HOME/bin/java"
-      JAVACCMD="$JAVA_HOME/bin/javac"
-
-      if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then
-        echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2
-        echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2
-        return 1
-      fi
-    fi
-  else
-    JAVACMD="$(
-      'set' +e
-      'unset' -f command 2>/dev/null
-      'command' -v java
-    )" || :
-    JAVACCMD="$(
-      'set' +e
-      'unset' -f command 2>/dev/null
-      'command' -v javac
-    )" || :
-
-    if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then
-      echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2
-      return 1
-    fi
-  fi
-}
-
-# hash string like Java String::hashCode
-hash_string() {
-  str="${1:-}" h=0
-  while [ -n "$str" ]; do
-    char="${str%"${str#?}"}"
-    h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296))
-    str="${str#?}"
-  done
-  printf %x\\n $h
-}
-
-verbose() { :; }
-[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; }
-
-die() {
-  printf %s\\n "$1" >&2
-  exit 1
-}
-
-trim() {
-  # MWRAPPER-139:
-  #   Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds.
-  #   Needed for removing poorly interpreted newline sequences when running in more
-  #   exotic environments such as mingw bash on Windows.
-  printf "%s" "${1}" | tr -d '[:space:]'
-}
-
-# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties
-while IFS="=" read -r key value; do
-  case "${key-}" in
-  distributionUrl) distributionUrl=$(trim "${value-}") ;;
-  distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;;
-  esac
-done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties"
-[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties"
-
-case "${distributionUrl##*/}" in
-maven-mvnd-*bin.*)
-  MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/
-  case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in
-  *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;;
-  :Darwin*x86_64) distributionPlatform=darwin-amd64 ;;
-  :Darwin*arm64) distributionPlatform=darwin-aarch64 ;;
-  :Linux*x86_64*) distributionPlatform=linux-amd64 ;;
-  *)
-    echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2
-    distributionPlatform=linux-amd64
-    ;;
-  esac
-  distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip"
-  ;;
-maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;;
-*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;;
-esac
-
-# apply MVNW_REPOURL and calculate MAVEN_HOME
-# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-<version>,maven-mvnd-<version>-<platform>}/<hash>
-[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}"
-distributionUrlName="${distributionUrl##*/}"
-distributionUrlNameMain="${distributionUrlName%.*}"
-distributionUrlNameMain="${distributionUrlNameMain%-bin}"
-MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}"
-MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")"
-
-exec_maven() {
-  unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || :
-  exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD"
-}
-
-if [ -d "$MAVEN_HOME" ]; then
-  verbose "found existing MAVEN_HOME at $MAVEN_HOME"
-  exec_maven "$@"
-fi
-
-case "${distributionUrl-}" in
-*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;;
-*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;;
-esac
-
-# prepare tmp dir
-if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then
-  clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; }
-  trap clean HUP INT TERM EXIT
-else
-  die "cannot create temp dir"
-fi
-
-mkdir -p -- "${MAVEN_HOME%/*}"
-
-# Download and Install Apache Maven
-verbose "Couldn't find MAVEN_HOME, downloading and installing it ..."
-verbose "Downloading from: $distributionUrl"
-verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName"
-
-# select .zip or .tar.gz
-if ! command -v unzip >/dev/null; then
-  distributionUrl="${distributionUrl%.zip}.tar.gz"
-  distributionUrlName="${distributionUrl##*/}"
-fi
-
-# verbose opt
-__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR=''
-[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v
-
-# normalize http auth
-case "${MVNW_PASSWORD:+has-password}" in
-'') MVNW_USERNAME='' MVNW_PASSWORD='' ;;
-has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;;
-esac
-
-if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then
-  verbose "Found wget ... using wget"
-  wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl"
-elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then
-  verbose "Found curl ... using curl"
-  curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl"
-elif set_java_home; then
-  verbose "Falling back to use Java to download"
-  javaSource="$TMP_DOWNLOAD_DIR/Downloader.java"
-  targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName"
-  cat >"$javaSource" <<-END
-	public class Downloader extends java.net.Authenticator
-	{
-	  protected java.net.PasswordAuthentication getPasswordAuthentication()
-	  {
-	    return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() );
-	  }
-	  public static void main( String[] args ) throws Exception
-	  {
-	    setDefault( new Downloader() );
-	    java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() );
-	  }
-	}
-	END
-  # For Cygwin/MinGW, switch paths to Windows format before running javac and java
-  verbose " - Compiling Downloader.java ..."
-  "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java"
-  verbose " - Running Downloader.java ..."
-  "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")"
-fi
-
-# If specified, validate the SHA-256 sum of the Maven distribution zip file
-if [ -n "${distributionSha256Sum-}" ]; then
-  distributionSha256Result=false
-  if [ "$MVN_CMD" = mvnd.sh ]; then
-    echo "Checksum validation is not supported for maven-mvnd." >&2
-    echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
-    exit 1
-  elif command -v sha256sum >/dev/null; then
-    if echo "$distributionSha256Sum  $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then
-      distributionSha256Result=true
-    fi
-  elif command -v shasum >/dev/null; then
-    if echo "$distributionSha256Sum  $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then
-      distributionSha256Result=true
-    fi
-  else
-    echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2
-    echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2
-    exit 1
-  fi
-  if [ $distributionSha256Result = false ]; then
-    echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2
-    echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2
-    exit 1
-  fi
-fi
-
-# unzip and move
-if command -v unzip >/dev/null; then
-  unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip"
-else
-  tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar"
-fi
-printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url"
-mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME"
-
-clean || :
-exec_maven "$@"
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,10 +6,11 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.21.2-beta.1</version>
+    <version>0.20.0-beta.2</version>
    <packaging>pom</packaging>
-    <name>${project.artifactId}</name>
-    <description>LanceDB Java SDK Parent POM</description>
+
+    <name>LanceDB Parent</name>
+    <description>LanceDB vector database Java API</description>
    <url>http://lancedb.com/</url>

    <developers>
@@ -28,7 +29,6 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-namespace.verison>0.0.1</lance-namespace.verison>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
@@ -52,7 +52,6 @@

    <modules>
        <module>core</module>
-        <module>lance-namespace</module>
    </modules>

    <scm>
@@ -63,11 +62,6 @@

    <dependencyManagement>
        <dependencies>
-            <dependency>
-                <groupId>com.lancedb</groupId>
-                <artifactId>lance-namespace-core</artifactId>
-                <version>${lance-namespace.verison}</version>
-            </dependency>
            <dependency>
                <groupId>org.apache.arrow</groupId>
                <artifactId>arrow-vector</artifactId>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.21.2-beta.1",
+  "version": "0.20.0-beta.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.21.2-beta.1",
+      "version": "0.20.0-beta.1",
      "cpu": [
        "x64",
        "arm64"
@@ -52,11 +52,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.1",
-        "@lancedb/vectordb-darwin-x64": "0.21.2-beta.1",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.1",
-        "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.1",
-        "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.1"
+        "@lancedb/vectordb-darwin-arm64": "0.20.0-beta.1",
+        "@lancedb/vectordb-darwin-x64": "0.20.0-beta.1",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.20.0-beta.1",
+        "@lancedb/vectordb-linux-x64-gnu": "0.20.0-beta.1",
+        "@lancedb/vectordb-win32-x64-msvc": "0.20.0-beta.1"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -327,60 +327,65 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.21.2-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.2-beta.1.tgz",
-      "integrity": "sha512-7QXVJNTei7PMuXRyyc+F3WGiudRNq9HfeOaMmMOJJpuCAO0zLq1pM9DCl5aPF5MddrodPHJxi+IWV+iAFH7zcg==",
+      "version": "0.20.0-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.20.0-beta.1.tgz",
+      "integrity": "sha512-yds8wFjni68RfA+KziTz/8v4YKku1i6q4JF8I2EhpzDI8tT0fk1YqGlVhtdn9fHDWq/9m1M05kGVuyzLypZ2Yw==",
      "cpu": [
        "arm64"
      ],
+      "license": "Apache-2.0",
      "optional": true,
      "os": [
        "darwin"
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.21.2-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.2-beta.1.tgz",
-      "integrity": "sha512-M/TWcJ3WVc6DNFgG/lWI7L5tQ05IF3WoWuZfRfbbimGhRvY7xf1O3uOt+jMcNJCa5mHFGCg2SZDA8mebd/mL7g==",
+      "version": "0.20.0-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.20.0-beta.1.tgz",
+      "integrity": "sha512-oF2MNtkWaJQWyUSIKU/zrbgygK94MzomUKc/Z9CYs7Ar3PI4CIfG72e5o/Zbhjpl318BkR4AbQQYX8BZaNIPVw==",
      "cpu": [
        "x64"
      ],
+      "license": "Apache-2.0",
      "optional": true,
      "os": [
        "darwin"
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.21.2-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.2-beta.1.tgz",
-      "integrity": "sha512-OEsM9znf9DDmdwGuTg2EVu+ebwuWQ1lCx0cYy4+hNy3ntolwMC39ePg2H9WD9SsEnQ2vcGJgBJTQLPKgXww+iQ==",
+      "version": "0.20.0-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.20.0-beta.1.tgz",
+      "integrity": "sha512-3Si0+K5T4awMiUVu0dD9NizcqIiGnEdsTu4YxbKKq1aI4xoaHrYGERkz58mtIFoBQHfre42ujPDoahTkAQ1j/Q==",
      "cpu": [
        "arm64"
      ],
+      "license": "Apache-2.0",
      "optional": true,
      "os": [
        "linux"
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.21.2-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.2-beta.1.tgz",
-      "integrity": "sha512-7FTq/O1zNzD71rgX2PEVmkct4jk2wc+ADU3rss+0VqoBSO9XeMqZEVD2WgZWuSTg6bYai//FHGDHSaknHBNsdw==",
+      "version": "0.20.0-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.20.0-beta.1.tgz",
+      "integrity": "sha512-5umO9XaDIxmqUiFnWaHxJtgkCO7oFWtEvLtzM4hG1mkEnwnE3bmXEO+cm+jPro7zwdKEzsnXh0GoCSUvuHk0tA==",
      "cpu": [
        "x64"
      ],
+      "license": "Apache-2.0",
      "optional": true,
      "os": [
        "linux"
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.21.2-beta.1",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.2-beta.1.tgz",
-      "integrity": "sha512-mN1p/J0kdqy6MrlKtmA8set/PibqFPyytQJFAuxSLXC/rwD7vgqUCt0SI0zVWPGG7J5Y65kvdc99l7Yl7lJtwQ==",
+      "version": "0.20.0-beta.1",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.20.0-beta.1.tgz",
+      "integrity": "sha512-EKyDamAi3RmDTu+BFYxr41eGLggZ3FVGu289gCprzljk38d8uxdgKhvDtYN9FWoMew4VvVk/EJQJx6L8sJJRng==",
      "cpu": [
        "x64"
      ],
+      "license": "Apache-2.0",
      "optional": true,
      "os": [
        "win32"
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.21.2-beta.1",
+  "version": "0.20.0-beta.2",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
@@ -89,10 +89,10 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.21.2-beta.1",
-    "@lancedb/vectordb-darwin-arm64": "0.21.2-beta.1",
-    "@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.1",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.1",
-    "@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.1"
+    "@lancedb/vectordb-darwin-x64": "0.20.0-beta.2",
+    "@lancedb/vectordb-darwin-arm64": "0.20.0-beta.2",
+    "@lancedb/vectordb-linux-x64-gnu": "0.20.0-beta.2",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.20.0-beta.2",
+    "@lancedb/vectordb-win32-x64-msvc": "0.20.0-beta.2"
  }
 }
--- a/node/src/integration_test/test.ts
+++ b/node/src/integration_test/test.ts
@@ -49,7 +49,7 @@ describe('LanceDB Mirrored Store Integration test', function () {
  it('s3://...?mirroredStore=... param is processed correctly', async function () {
    this.timeout(600000)

-    const dir = await fs.promises.mkdtemp(path.join(tmpdir(), 'lancedb-mirror-'))
+    const dir = tmpdir()
    console.log(dir)
    const conn = await lancedb.connect({ uri: `s3://lancedb-integtest?mirroredStore=${dir}`, storageOptions: { allowHttp: 'true' } })
    const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 })
@@ -63,93 +63,118 @@ describe('LanceDB Mirrored Store Integration test', function () {
    const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite })

    const mirroredPath = path.join(dir, `${tableName}.lance`)
+    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
+      if (err != null) throw err
+      // there should be three dirs
+      assert.equal(files.length, 3)
+      assert.isTrue(files[0].isDirectory())
+      assert.isTrue(files[1].isDirectory())

-    const files = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
-    // there should be three dirs
-    assert.equal(files.length, 3, 'files after table creation')
-    assert.isTrue(files[0].isDirectory())
-    assert.isTrue(files[1].isDirectory())
+      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.txn'))
+      })

-    const transactionFiles = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
-    assert.equal(transactionFiles.length, 1, 'transactionFiles after table creation')
-    assert.isTrue(transactionFiles[0].name.endsWith('.txn'))
+      fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.manifest'))
+      })

-    const versionFiles = await fs.promises.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true })
-    assert.equal(versionFiles.length, 1, 'versionFiles after table creation')
-    assert.isTrue(versionFiles[0].name.endsWith('.manifest'))
-
-    const dataFiles = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
-    assert.equal(dataFiles.length, 1, 'dataFiles after table creation')
-    assert.isTrue(dataFiles[0].name.endsWith('.lance'))
+      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.lance'))
+      })
+    })

    // try create index and check if it's mirrored
    await t.createIndex({ column: 'vector', type: 'ivf_pq' })

-    const filesAfterIndex = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
-    // there should be four dirs
-    assert.equal(filesAfterIndex.length, 4, 'filesAfterIndex')
-    assert.isTrue(filesAfterIndex[0].isDirectory())
-    assert.isTrue(filesAfterIndex[1].isDirectory())
-    assert.isTrue(filesAfterIndex[2].isDirectory())
+    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
+      if (err != null) throw err
+      // there should be four dirs
+      assert.equal(files.length, 4)
+      assert.isTrue(files[0].isDirectory())
+      assert.isTrue(files[1].isDirectory())
+      assert.isTrue(files[2].isDirectory())

-    // Two TXs now
-    const transactionFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
-    assert.equal(transactionFilesAfterIndex.length, 2, 'transactionFilesAfterIndex')
-    assert.isTrue(transactionFilesAfterIndex[0].name.endsWith('.txn'))
-    assert.isTrue(transactionFilesAfterIndex[1].name.endsWith('.txn'))
+      // Two TXs now
+      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 2)
+        assert.isTrue(files[0].name.endsWith('.txn'))
+        assert.isTrue(files[1].name.endsWith('.txn'))
+      })

-    const dataFilesAfterIndex = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
-    assert.equal(dataFilesAfterIndex.length, 1, 'dataFilesAfterIndex')
-    assert.isTrue(dataFilesAfterIndex[0].name.endsWith('.lance'))
+      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.lance'))
+      })

-    const indicesFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
-    assert.equal(indicesFiles.length, 1, 'indicesFiles')
-    assert.isTrue(indicesFiles[0].isDirectory())
+      fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].isDirectory())

-    const indexFiles = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFiles[0].name), { withFileTypes: true })
-    console.log(`DEBUG indexFiles in ${indicesFiles[0].name}:`, indexFiles.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
-    assert.equal(indexFiles.length, 2, 'indexFiles')
-    const fileNames = indexFiles.map(f => f.name).sort()
-    assert.isTrue(fileNames.includes('auxiliary.idx'), 'auxiliary.idx should be present')
-    assert.isTrue(fileNames.includes('index.idx'), 'index.idx should be present')
-    assert.isTrue(indexFiles.every(f => f.isFile()), 'all index files should be files')
+        fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
+          if (err != null) throw err
+
+          assert.equal(files.length, 1)
+          assert.isTrue(files[0].isFile())
+          assert.isTrue(files[0].name.endsWith('.idx'))
+        })
+      })
+    })

    // try delete and check if it's mirrored
    await t.delete('id = 0')

-    const filesAfterDelete = await fs.promises.readdir(mirroredPath, { withFileTypes: true })
-    // there should be five dirs
-    assert.equal(filesAfterDelete.length, 5, 'filesAfterDelete')
-    assert.isTrue(filesAfterDelete[0].isDirectory())
-    assert.isTrue(filesAfterDelete[1].isDirectory())
-    assert.isTrue(filesAfterDelete[2].isDirectory())
-    assert.isTrue(filesAfterDelete[3].isDirectory())
-    assert.isTrue(filesAfterDelete[4].isDirectory())
+    fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => {
+      if (err != null) throw err
+      // there should be five dirs
+      assert.equal(files.length, 5)
+      assert.isTrue(files[0].isDirectory())
+      assert.isTrue(files[1].isDirectory())
+      assert.isTrue(files[2].isDirectory())
+      assert.isTrue(files[3].isDirectory())
+      assert.isTrue(files[4].isDirectory())

-    // Three TXs now
-    const transactionFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true })
-    assert.equal(transactionFilesAfterDelete.length, 3, 'transactionFilesAfterDelete')
-    assert.isTrue(transactionFilesAfterDelete[0].name.endsWith('.txn'))
-    assert.isTrue(transactionFilesAfterDelete[1].name.endsWith('.txn'))
+      // Three TXs now
+      fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 3)
+        assert.isTrue(files[0].name.endsWith('.txn'))
+        assert.isTrue(files[1].name.endsWith('.txn'))
+      })

-    const dataFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true })
-    assert.equal(dataFilesAfterDelete.length, 1, 'dataFilesAfterDelete')
-    assert.isTrue(dataFilesAfterDelete[0].name.endsWith('.lance'))
+      fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.lance'))
+      })

-    const indicesFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true })
-    assert.equal(indicesFilesAfterDelete.length, 1, 'indicesFilesAfterDelete')
-    assert.isTrue(indicesFilesAfterDelete[0].isDirectory())
+      fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].isDirectory())

-    const indexFilesAfterDelete = await fs.promises.readdir(path.join(mirroredPath, '_indices', indicesFilesAfterDelete[0].name), { withFileTypes: true })
-    console.log(`DEBUG indexFilesAfterDelete in ${indicesFilesAfterDelete[0].name}:`, indexFilesAfterDelete.map(f => `${f.name} (${f.isFile() ? 'file' : 'dir'})`))
-    assert.equal(indexFilesAfterDelete.length, 2, 'indexFilesAfterDelete')
-    const fileNamesAfterDelete = indexFilesAfterDelete.map(f => f.name).sort()
-    assert.isTrue(fileNamesAfterDelete.includes('auxiliary.idx'), 'auxiliary.idx should be present after delete')
-    assert.isTrue(fileNamesAfterDelete.includes('index.idx'), 'index.idx should be present after delete')
-    assert.isTrue(indexFilesAfterDelete.every(f => f.isFile()), 'all index files should be files after delete')
+        fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => {
+          if (err != null) throw err

-    const deletionFiles = await fs.promises.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true })
-    assert.equal(deletionFiles.length, 1, 'deletionFiles')
-    assert.isTrue(deletionFiles[0].name.endsWith('.arrow'))
+          assert.equal(files.length, 1)
+          assert.isTrue(files[0].isFile())
+          assert.isTrue(files[0].name.endsWith('.idx'))
+        })
+      })
+
+      fs.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true }, (err, files) => {
+        if (err != null) throw err
+        assert.equal(files.length, 1)
+        assert.isTrue(files[0].name.endsWith('.arrow'))
+      })
+    })
  })
 })
--- a/nodejs/CLAUDE.md
+++ b/nodejs/CLAUDE.md
@@ -1,13 +0,0 @@
-These are the typescript bindings of LanceDB.
-The core Rust library is in the `../rust/lancedb` directory, the rust binding
-code is in the `src/` directory and the typescript bindings are in
-the `lancedb/` directory.
-
-Whenever you change the Rust code, you will need to recompile: `npm run build`.
-
-Common commands:
-* Build: `npm run build`
-* Lint: `npm run lint`
-* Fix lints: `npm run lint-fix`
-* Test: `npm test`
-* Run single test file: `npm test __test__/arrow.test.ts`
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.21.2-beta.1"
+version = "0.20.0-beta.2"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-import { Bool, Field, Int32, List, Schema, Struct, Utf8 } from "apache-arrow";
+import { Schema } from "apache-arrow";

 import * as arrow15 from "apache-arrow-15";
 import * as arrow16 from "apache-arrow-16";
@@ -11,12 +11,10 @@ import * as arrow18 from "apache-arrow-18";
 import {
  convertToTable,
  fromBufferToRecordBatch,
-  fromDataToBuffer,
  fromRecordBatchToBuffer,
  fromTableToBuffer,
  makeArrowTable,
  makeEmptyTable,
-  tableFromIPC,
 } from "../lancedb/arrow";
 import {
  EmbeddingFunction,
@@ -377,221 +375,8 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(table2.schema).toEqual(schema);
      });

-      it("will handle missing columns in schema alignment when using embeddings", async function () {
-        const schema = new Schema(
-          [
-            new Field("domain", new Utf8(), true),
-            new Field("name", new Utf8(), true),
-            new Field("description", new Utf8(), true),
-          ],
-          new Map([["embedding_functions", JSON.stringify([])]]),
-        );
-
-        const data = [
-          { domain: "google.com", name: "Google" },
-          { domain: "facebook.com", name: "Facebook" },
-        ];
-
-        const table = await convertToTable(data, undefined, { schema });
-
-        expect(table.numCols).toBe(3);
-        expect(table.numRows).toBe(2);
-
-        const descriptionColumn = table.getChild("description");
-        expect(descriptionColumn).toBeDefined();
-        expect(descriptionColumn?.nullCount).toBe(2);
-        expect(descriptionColumn?.toArray()).toEqual([null, null]);
-
-        expect(table.getChild("domain")?.toArray()).toEqual([
-          "google.com",
-          "facebook.com",
-        ]);
-        expect(table.getChild("name")?.toArray()).toEqual([
-          "Google",
-          "Facebook",
-        ]);
-      });
-
-      it("will handle completely missing nested struct columns", async function () {
-        const schema = new Schema(
-          [
-            new Field("id", new Utf8(), true),
-            new Field("name", new Utf8(), true),
-            new Field(
-              "metadata",
-              new Struct([
-                new Field("version", new Int32(), true),
-                new Field("author", new Utf8(), true),
-                new Field(
-                  "tags",
-                  new List(new Field("item", new Utf8(), true)),
-                  true,
-                ),
-              ]),
-              true,
-            ),
-          ],
-          new Map([["embedding_functions", JSON.stringify([])]]),
-        );
-
-        const data = [
-          { id: "doc1", name: "Document 1" },
-          { id: "doc2", name: "Document 2" },
-        ];
-
-        const table = await convertToTable(data, undefined, { schema });
-
-        expect(table.numCols).toBe(3);
-        expect(table.numRows).toBe(2);
-
-        const buf = await fromTableToBuffer(table);
-        const retrievedTable = tableFromIPC(buf);
-
-        const rows = [];
-        for (let i = 0; i < retrievedTable.numRows; i++) {
-          rows.push(retrievedTable.get(i));
-        }
-
-        expect(rows[0].metadata.version).toBe(null);
-        expect(rows[0].metadata.author).toBe(null);
-        expect(rows[0].metadata.tags).toBe(null);
-        expect(rows[0].id).toBe("doc1");
-        expect(rows[0].name).toBe("Document 1");
-      });
-
-      it("will handle partially missing nested struct fields", async function () {
-        const schema = new Schema(
-          [
-            new Field("id", new Utf8(), true),
-            new Field(
-              "metadata",
-              new Struct([
-                new Field("version", new Int32(), true),
-                new Field("author", new Utf8(), true),
-                new Field("created_at", new Utf8(), true),
-              ]),
-              true,
-            ),
-          ],
-          new Map([["embedding_functions", JSON.stringify([])]]),
-        );
-
-        const data = [
-          { id: "doc1", metadata: { version: 1, author: "Alice" } },
-          { id: "doc2", metadata: { version: 2 } },
-        ];
-
-        const table = await convertToTable(data, undefined, { schema });
-
-        expect(table.numCols).toBe(2);
-        expect(table.numRows).toBe(2);
-
-        const metadataColumn = table.getChild("metadata");
-        expect(metadataColumn).toBeDefined();
-        expect(metadataColumn?.type.toString()).toBe(
-          "Struct<{version:Int32, author:Utf8, created_at:Utf8}>",
-        );
-      });
-
-      it("will handle multiple levels of nested structures", async function () {
-        const schema = new Schema(
-          [
-            new Field("id", new Utf8(), true),
-            new Field(
-              "config",
-              new Struct([
-                new Field("database", new Utf8(), true),
-                new Field(
-                  "connection",
-                  new Struct([
-                    new Field("host", new Utf8(), true),
-                    new Field("port", new Int32(), true),
-                    new Field(
-                      "ssl",
-                      new Struct([
-                        new Field("enabled", new Bool(), true),
-                        new Field("cert_path", new Utf8(), true),
-                      ]),
-                      true,
-                    ),
-                  ]),
-                  true,
-                ),
-              ]),
-              true,
-            ),
-          ],
-          new Map([["embedding_functions", JSON.stringify([])]]),
-        );
-
-        const data = [
-          {
-            id: "config1",
-            config: {
-              database: "postgres",
-              connection: { host: "localhost" },
-            },
-          },
-          {
-            id: "config2",
-            config: { database: "mysql" },
-          },
-          {
-            id: "config3",
-          },
-        ];
-
-        const table = await convertToTable(data, undefined, { schema });
-
-        expect(table.numCols).toBe(2);
-        expect(table.numRows).toBe(3);
-
-        const configColumn = table.getChild("config");
-        expect(configColumn).toBeDefined();
-        expect(configColumn?.type.toString()).toBe(
-          "Struct<{database:Utf8, connection:Struct<{host:Utf8, port:Int32, ssl:Struct<{enabled:Bool, cert_path:Utf8}>}>}>",
-        );
-      });
-
-      it("will handle missing columns in Arrow table input when using embeddings", async function () {
-        const incompleteTable = makeArrowTable([
-          { domain: "google.com", name: "Google" },
-          { domain: "facebook.com", name: "Facebook" },
-        ]);
-
-        const schema = new Schema(
-          [
-            new Field("domain", new Utf8(), true),
-            new Field("name", new Utf8(), true),
-            new Field("description", new Utf8(), true),
-          ],
-          new Map([["embedding_functions", JSON.stringify([])]]),
-        );
-
-        const buf = await fromDataToBuffer(incompleteTable, undefined, schema);
-
-        expect(buf.byteLength).toBeGreaterThan(0);
-
-        const retrievedTable = tableFromIPC(buf);
-        expect(retrievedTable.numCols).toBe(3);
-        expect(retrievedTable.numRows).toBe(2);
-
-        const descriptionColumn = retrievedTable.getChild("description");
-        expect(descriptionColumn).toBeDefined();
-        expect(descriptionColumn?.nullCount).toBe(2);
-        expect(descriptionColumn?.toArray()).toEqual([null, null]);
-
-        expect(retrievedTable.getChild("domain")?.toArray()).toEqual([
-          "google.com",
-          "facebook.com",
-        ]);
-        expect(retrievedTable.getChild("name")?.toArray()).toEqual([
-          "Google",
-          "Facebook",
-        ]);
-      });
-
      it("should correctly retain values in nested struct fields", async function () {
+        // Define test data with nested struct
        const testData = [
          {
            id: "doc1",
@@ -615,8 +400,10 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          },
        ];

+        // Create Arrow table from the data
        const table = makeArrowTable(testData);

+        // Verify schema has the nested struct fields
        const metadataField = table.schema.fields.find(
          (f) => f.name === "metadata",
        );
@@ -630,17 +417,23 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
          "text",
        ]);

+        // Convert to buffer and back (simulating storage and retrieval)
        const buf = await fromTableToBuffer(table);
        const retrievedTable = tableFromIPC(buf);

+        // Verify the retrieved table has the same structure
        const rows = [];
        for (let i = 0; i < retrievedTable.numRows; i++) {
          rows.push(retrievedTable.get(i));
        }

+        // Check values in the first row
        const firstRow = rows[0];
        expect(firstRow.id).toBe("doc1");
        expect(firstRow.vector.toJSON()).toEqual([1, 2, 3]);
+
+        // Verify metadata values are preserved (this is where the bug is)
+        expect(firstRow.metadata).toBeDefined();
        expect(firstRow.metadata.filePath).toBe("/path/to/file1.ts");
        expect(firstRow.metadata.startLine).toBe(10);
        expect(firstRow.metadata.endLine).toBe(20);
@@ -799,14 +592,14 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        ).rejects.toThrow("column vector was missing");
      });

-      it("will skip embedding application if already applied", async function () {
+      it("will provide a nice error if run twice", async function () {
        const records = sampleRecords();
        const table = await convertToTable(records, dummyEmbeddingConfig);

        // fromTableToBuffer will try and apply the embeddings again
-        // but should skip since the column already has non-null values
-        const result = await fromTableToBuffer(table, dummyEmbeddingConfig);
-        expect(result.byteLength).toBeGreaterThan(0);
+        await expect(
+          fromTableToBuffer(table, dummyEmbeddingConfig),
+        ).rejects.toThrow("already existed");
      });
    });

--- a/nodejs/test/session.test.ts
+++ b/nodejs/test/session.test.ts
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-import * as tmp from "tmp";
-import { Session, connect } from "../lancedb";
-
-describe("Session", () => {
-  let tmpDir: tmp.DirResult;
-  beforeEach(() => {
-    tmpDir = tmp.dirSync({ unsafeCleanup: true });
-  });
-  afterEach(() => tmpDir.removeCallback());
-
-  it("should configure cache sizes and work with database operations", async () => {
-    // Create session with small cache limits for testing
-    const indexCacheSize = BigInt(1024 * 1024); // 1MB
-    const metadataCacheSize = BigInt(512 * 1024); // 512KB
-
-    const session = new Session(indexCacheSize, metadataCacheSize);
-
-    // Record initial cache state
-    const initialCacheSize = session.sizeBytes();
-    const initialCacheItems = session.approxNumItems();
-
-    // Test session works with database connection
-    const db = await connect({ uri: tmpDir.name, session: session });
-
-    // Create and use a table to exercise the session
-    const data = Array.from({ length: 100 }, (_, i) => ({
-      id: i,
-      text: `item ${i}`,
-    }));
-    const table = await db.createTable("test", data);
-    const results = await table.query().limit(5).toArray();
-
-    expect(results).toHaveLength(5);
-
-    // Verify cache usage increased after operations
-    const finalCacheSize = session.sizeBytes();
-    const finalCacheItems = session.approxNumItems();
-
-    expect(finalCacheSize).toBeGreaterThan(initialCacheSize); // Cache should have grown
-    expect(finalCacheItems).toBeGreaterThanOrEqual(initialCacheItems); // Items should not decrease
-    expect(initialCacheSize).toBeLessThan(indexCacheSize + metadataCacheSize); // Within limits
-  });
-});
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -33,12 +33,7 @@ import {
  register,
 } from "../lancedb/embedding";
 import { Index } from "../lancedb/indices";
-import {
-  BooleanQuery,
-  Occur,
-  Operator,
-  instanceOfFullTextQuery,
-} from "../lancedb/query";
+import { instanceOfFullTextQuery } from "../lancedb/query";
 import exp = require("constants");

 describe.each([arrow15, arrow16, arrow17, arrow18])(
@@ -368,9 +363,9 @@ describe("merge insert", () => {
      { a: 4, b: "z" },
    ];

-    const result = (await table.toArrow()).toArray().sort((a, b) => a.a - b.a);
-
-    expect(result.map((row) => ({ ...row }))).toEqual(expected);
+    expect(
+      JSON.parse(JSON.stringify((await table.toArrow()).toArray())),
+    ).toEqual(expected);
  });
  test("conditional update", async () => {
    const newData = [
@@ -559,32 +554,6 @@ describe("When creating an index", () => {
    rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
    expect(rst.numRows).toBe(1);

-    // test nprobes
-    rst = await tbl.query().nearestTo(queryVec).limit(2).nprobes(50).toArrow();
-    expect(rst.numRows).toBe(2);
-    rst = await tbl
-      .query()
-      .nearestTo(queryVec)
-      .limit(2)
-      .minimumNprobes(15)
-      .toArrow();
-    expect(rst.numRows).toBe(2);
-    rst = await tbl
-      .query()
-      .nearestTo(queryVec)
-      .limit(2)
-      .minimumNprobes(10)
-      .maximumNprobes(20)
-      .toArrow();
-    expect(rst.numRows).toBe(2);
-
-    expect(() => tbl.query().nearestTo(queryVec).minimumNprobes(0)).toThrow(
-      "Invalid input, minimum_nprobes must be greater than 0",
-    );
-    expect(() => tbl.query().nearestTo(queryVec).maximumNprobes(5)).toThrow(
-      "Invalid input, maximum_nprobes must be greater than minimum_nprobes",
-    );
-
    await tbl.dropIndex("vec_idx");
    const indices2 = await tbl.listIndices();
    expect(indices2.length).toBe(0);
@@ -1562,18 +1531,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(

      const results = await table.search("hello").toArray();
      expect(results[0].text).toBe(data[0].text);
-
-      const results2 = await table
-        .search(new MatchQuery("hello world", "text"))
-        .toArray();
-      expect(results2.length).toBe(2);
-
-      const results3 = await table
-        .search(
-          new MatchQuery("hello world", "text", { operator: Operator.And }),
-        )
-        .toArray();
-      expect(results3.length).toBe(1);
    });

    test("full text search without lowercase", async () => {
@@ -1650,114 +1607,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      expect(resultSet.has("fob")).toBe(true);
      expect(resultSet.has("fo")).toBe(true);
      expect(resultSet.has("food")).toBe(true);
-
-      const prefixResults = await table
-        .search(
-          new MatchQuery("foo", "text", { fuzziness: 3, prefixLength: 3 }),
-        )
-        .toArray();
-      expect(prefixResults.length).toBe(2);
-      const resultSet2 = new Set(prefixResults.map((r) => r.text));
-      expect(resultSet2.has("foo")).toBe(true);
-      expect(resultSet2.has("food")).toBe(true);
-    });
-
-    test("full text search boolean query", async () => {
-      const db = await connect(tmpDir.name);
-      const data = [
-        { text: "The cat and dog are playing" },
-        { text: "The cat is sleeping" },
-        { text: "The dog is barking" },
-        { text: "The dog chases the cat" },
-      ];
-      const table = await db.createTable("test", data);
-      await table.createIndex("text", {
-        config: Index.fts({ withPosition: false }),
-      });
-
-      const shouldResults = await table
-        .search(
-          new BooleanQuery([
-            [Occur.Should, new MatchQuery("cat", "text")],
-            [Occur.Should, new MatchQuery("dog", "text")],
-          ]),
-        )
-        .toArray();
-      expect(shouldResults.length).toBe(4);
-
-      const mustResults = await table
-        .search(
-          new BooleanQuery([
-            [Occur.Must, new MatchQuery("cat", "text")],
-            [Occur.Must, new MatchQuery("dog", "text")],
-          ]),
-        )
-        .toArray();
-      expect(mustResults.length).toBe(2);
-
-      const mustNotResults = await table
-        .search(
-          new BooleanQuery([
-            [Occur.Must, new MatchQuery("cat", "text")],
-            [Occur.MustNot, new MatchQuery("dog", "text")],
-          ]),
-        )
-        .toArray();
-      expect(mustNotResults.length).toBe(1);
-    });
-
-    test("full text search ngram", async () => {
-      const db = await connect(tmpDir.name);
-      const data = [
-        { text: "hello world", vector: [0.1, 0.2, 0.3] },
-        { text: "lance database", vector: [0.4, 0.5, 0.6] },
-        { text: "lance is cool", vector: [0.7, 0.8, 0.9] },
-      ];
-      const table = await db.createTable("test", data);
-      await table.createIndex("text", {
-        config: Index.fts({ baseTokenizer: "ngram" }),
-      });
-
-      const results = await table.search("lan").toArray();
-      expect(results.length).toBe(2);
-      const resultSet = new Set(results.map((r) => r.text));
-      expect(resultSet.has("lance database")).toBe(true);
-      expect(resultSet.has("lance is cool")).toBe(true);
-
-      const results2 = await table.search("nce").toArray(); // spellchecker:disable-line
-      expect(results2.length).toBe(2);
-      const resultSet2 = new Set(results2.map((r) => r.text));
-      expect(resultSet2.has("lance database")).toBe(true);
-      expect(resultSet2.has("lance is cool")).toBe(true);
-
-      // the default min_ngram_length is 3, so "la" should not match
-      const results3 = await table.search("la").toArray();
-      expect(results3.length).toBe(0);
-
-      // test setting min_ngram_length and prefix_only
-      await table.createIndex("text", {
-        config: Index.fts({
-          baseTokenizer: "ngram",
-          ngramMinLength: 2,
-          prefixOnly: true,
-        }),
-        replace: true,
-      });
-
-      const results4 = await table.search("lan").toArray();
-      expect(results4.length).toBe(2);
-      const resultSet4 = new Set(results4.map((r) => r.text));
-      expect(resultSet4.has("lance database")).toBe(true);
-      expect(resultSet4.has("lance is cool")).toBe(true);
-
-      const results5 = await table.search("nce").toArray(); // spellchecker:disable-line
-      expect(results5.length).toBe(0);
-
-      const results6 = await table.search("la").toArray();
-      expect(results6.length).toBe(2);
-      const resultSet6 = new Set(results6.map((r) => r.text));
-      expect(resultSet6.has("lance database")).toBe(true);
-      expect(resultSet6.has("lance is cool")).toBe(true);
    });

    test.each([
@@ -1863,43 +1712,4 @@ describe("column name options", () => {
    expect(results[0].query_index).toBe(0);
    expect(results[1].query_index).toBe(1);
  });
-
-  test("index and search multivectors", async () => {
-    const db = await connect(tmpDir.name);
-    const data = [];
-    // generate 512 random multivectors
-    for (let i = 0; i < 256; i++) {
-      data.push({
-        multivector: Array.from({ length: 10 }, () =>
-          Array(2).fill(Math.random()),
-        ),
-      });
-    }
-    const table = await db.createTable("multivectors", data, {
-      schema: new Schema([
-        new Field(
-          "multivector",
-          new List(
-            new Field(
-              "item",
-              new FixedSizeList(2, new Field("item", new Float32())),
-            ),
-          ),
-        ),
-      ]),
-    });
-
-    const results = await table.search(data[0].multivector).limit(10).toArray();
-    expect(results.length).toBe(10);
-
-    await table.createIndex("multivector", {
-      config: Index.ivfPq({ numPartitions: 2, distanceType: "cosine" }),
-    });
-
-    const results2 = await table
-      .search(data[0].multivector)
-      .limit(10)
-      .toArray();
-    expect(results2.length).toBe(10);
-  });
 });
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -107,20 +107,6 @@ export type IntoVector =
  | number[]
  | Promise<Float32Array | Float64Array | number[]>;

-export type MultiVector = IntoVector[];
-
-export function isMultiVector(value: unknown): value is MultiVector {
-  return Array.isArray(value) && isIntoVector(value[0]);
-}
-
-export function isIntoVector(value: unknown): value is IntoVector {
-  return (
-    value instanceof Float32Array ||
-    value instanceof Float64Array ||
-    (Array.isArray(value) && !Array.isArray(value[0]))
-  );
-}
-
 export function isArrowTable(value: object): value is TableLike {
  if (value instanceof ArrowTable) return true;
  return "schema" in value && "batches" in value;
@@ -431,9 +417,7 @@ function inferSchema(
        } else {
          const inferredType = inferType(value, path, opts);
          if (inferredType === undefined) {
-            throw new Error(`Failed to infer data type for field ${path.join(
-              ".",
-            )} at row ${rowI}. \
+            throw new Error(`Failed to infer data type for field ${path.join(".")} at row ${rowI}. \
                             Consider providing an explicit schema.`);
          }
          pathTree.set(path, inferredType);
@@ -815,17 +799,11 @@ async function applyEmbeddingsFromMetadata(
        `Cannot apply embedding function because the source column '${functionEntry.sourceColumn}' was not present in the data`,
      );
    }
-
-    // Check if destination column exists and handle accordingly
    if (columns[destColumn] !== undefined) {
-      const existingColumn = columns[destColumn];
-      // If the column exists but is all null, we can fill it with embeddings
-      if (existingColumn.nullCount !== existingColumn.length) {
-        // Column has non-null values, skip embedding application
-        continue;
-      }
+      throw new Error(
+        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
+      );
    }
-
    if (table.batches.length > 1) {
      throw new Error(
        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
@@ -853,15 +831,6 @@ async function applyEmbeddingsFromMetadata(
    const vector = makeVector(vectors, destType);
    columns[destColumn] = vector;
  }
-
-  // Add any missing columns from the schema as null vectors
-  for (const field of schema.fields) {
-    if (!(field.name in columns)) {
-      const nullValues = new Array(table.numRows).fill(null);
-      columns[field.name] = makeVector(nullValues, field.type);
-    }
-  }
-
  const newTable = new ArrowTable(columns);
  return alignTable(newTable, schema);
 }
@@ -934,23 +903,11 @@ async function applyEmbeddings<T>(
      );
    }
  } else {
-    // Check if destination column exists and handle accordingly
    if (Object.prototype.hasOwnProperty.call(newColumns, destColumn)) {
-      const existingColumn = newColumns[destColumn];
-      // If the column exists but is all null, we can fill it with embeddings
-      if (existingColumn.nullCount !== existingColumn.length) {
-        // Column has non-null values, skip embedding application and return table as-is
-        let newTable = new ArrowTable(newColumns);
-        if (schema != null) {
-          newTable = alignTable(newTable, schema as Schema);
-        }
-        return new ArrowTable(
-          new Schema(newTable.schema.fields, schemaMetadata),
-          newTable.batches,
-        );
-      }
+      throw new Error(
+        `Attempt to apply embeddings to table failed because column ${destColumn} already existed`,
+      );
    }
-
    if (table.batches.length > 1) {
      throw new Error(
        "Internal error: `makeArrowTable` unexpectedly created a table with more than one batch",
@@ -1010,21 +967,7 @@ export async function convertToTable(
  embeddings?: EmbeddingFunctionConfig,
  makeTableOptions?: Partial<MakeArrowTableOptions>,
 ): Promise<ArrowTable> {
-  let processedData = data;
-
-  // If we have a schema with embedding metadata, we need to preprocess the data
-  // to ensure all nested fields are present
-  if (
-    makeTableOptions?.schema &&
-    makeTableOptions.schema.metadata?.has("embedding_functions")
-  ) {
-    processedData = ensureNestedFieldsExist(
-      data,
-      makeTableOptions.schema as Schema,
-    );
-  }
-
-  const table = makeArrowTable(processedData, makeTableOptions);
+  const table = makeArrowTable(data, makeTableOptions);
  return await applyEmbeddings(table, embeddings, makeTableOptions?.schema);
 }

@@ -1117,16 +1060,7 @@ export async function fromDataToBuffer(
    schema = sanitizeSchema(schema);
  }
  if (isArrowTable(data)) {
-    const table = sanitizeTable(data);
-    // If we have a schema with embedding functions, we need to ensure all columns exist
-    // before applying embeddings, since applyEmbeddingsFromMetadata expects all columns
-    // to be present in the table
-    if (schema && schema.metadata?.has("embedding_functions")) {
-      const alignedTable = alignTableToSchema(table, schema);
-      return fromTableToBuffer(alignedTable, embeddings, schema);
-    } else {
-      return fromTableToBuffer(table, embeddings, schema);
-    }
+    return fromTableToBuffer(sanitizeTable(data), embeddings, schema);
  } else {
    const table = await convertToTable(data, embeddings, { schema });
    return fromTableToBuffer(table);
@@ -1195,7 +1129,7 @@ function alignBatch(batch: RecordBatch, schema: Schema): RecordBatch {
    type: new Struct(schema.fields),
    length: batch.numRows,
    nullCount: batch.nullCount,
-    children: alignedChildren as unknown as ArrowData<DataType>[],
+    children: alignedChildren,
  });
  return new RecordBatch(schema, newData);
 }
@@ -1267,79 +1201,6 @@ function validateSchemaEmbeddings(
  return new Schema(fields, schema.metadata);
 }

-/**
- * Ensures that all nested fields defined in the schema exist in the data,
- * filling missing fields with null values.
- */
-export function ensureNestedFieldsExist(
-  data: Array<Record<string, unknown>>,
-  schema: Schema,
-): Array<Record<string, unknown>> {
-  return data.map((row) => {
-    const completeRow: Record<string, unknown> = {};
-
-    for (const field of schema.fields) {
-      if (field.name in row) {
-        if (
-          field.type.constructor.name === "Struct" &&
-          row[field.name] !== null &&
-          row[field.name] !== undefined
-        ) {
-          // Handle nested struct
-          const nestedValue = row[field.name] as Record<string, unknown>;
-          completeRow[field.name] = ensureStructFieldsExist(
-            nestedValue,
-            field.type,
-          );
-        } else {
-          // Non-struct field or null struct value
-          completeRow[field.name] = row[field.name];
-        }
-      } else {
-        // Field is missing from the data - set to null
-        completeRow[field.name] = null;
-      }
-    }
-
-    return completeRow;
-  });
-}
-
-/**
- * Recursively ensures that all fields in a struct type exist in the data,
- * filling missing fields with null values.
- */
-function ensureStructFieldsExist(
-  data: Record<string, unknown>,
-  structType: Struct,
-): Record<string, unknown> {
-  const completeStruct: Record<string, unknown> = {};
-
-  for (const childField of structType.children) {
-    if (childField.name in data) {
-      if (
-        childField.type.constructor.name === "Struct" &&
-        data[childField.name] !== null &&
-        data[childField.name] !== undefined
-      ) {
-        // Recursively handle nested struct
-        completeStruct[childField.name] = ensureStructFieldsExist(
-          data[childField.name] as Record<string, unknown>,
-          childField.type,
-        );
-      } else {
-        // Non-struct field or null struct value
-        completeStruct[childField.name] = data[childField.name];
-      }
-    } else {
-      // Field is missing - set to null
-      completeStruct[childField.name] = null;
-    }
-  }
-
-  return completeStruct;
-}
-
 interface JsonDataType {
  type: string;
  fields?: JsonField[];
@@ -1473,64 +1334,3 @@ function fieldToJson(field: Field): JsonField {
    metadata: field.metadata,
  };
 }
-
-function alignTableToSchema(
-  table: ArrowTable,
-  targetSchema: Schema,
-): ArrowTable {
-  const existingColumns = new Map<string, Vector>();
-
-  // Map existing columns
-  for (const field of table.schema.fields) {
-    existingColumns.set(field.name, table.getChild(field.name)!);
-  }
-
-  // Create vectors for all fields in target schema
-  const alignedColumns: Record<string, Vector> = {};
-
-  for (const field of targetSchema.fields) {
-    if (existingColumns.has(field.name)) {
-      // Column exists, use it
-      alignedColumns[field.name] = existingColumns.get(field.name)!;
-    } else {
-      // Column missing, create null vector
-      alignedColumns[field.name] = createNullVector(field, table.numRows);
-    }
-  }
-
-  // Create new table with aligned schema and columns
-  return new ArrowTable(targetSchema, alignedColumns);
-}
-
-function createNullVector(field: Field, numRows: number): Vector {
-  if (field.type.constructor.name === "Struct") {
-    // For struct types, create a struct with null fields
-    const structType = field.type as Struct;
-    const childVectors = structType.children.map((childField) =>
-      createNullVector(childField, numRows),
-    );
-
-    // Create struct data
-    const structData = makeData({
-      type: structType,
-      length: numRows,
-      nullCount: 0,
-      children: childVectors.map((v) => v.data[0]),
-    });
-
-    return arrowMakeVector(structData);
-  } else {
-    // For other types, create a vector of nulls
-    const nullBitmap = new Uint8Array(Math.ceil(numRows / 8));
-    // All bits are 0, meaning all values are null
-
-    const data = makeData({
-      type: field.type,
-      length: numRows,
-      nullCount: numRows,
-      nullBitmap,
-    });
-
-    return arrowMakeVector(data);
-  }
-}
--- a/nodejs/lancedb/connection.ts
+++ b/nodejs/lancedb/connection.ts
@@ -85,9 +85,6 @@ export interface OpenTableOptions {
  /**
   * Set the size of the index cache, specified as a number of entries
   *
-   * @deprecated Use session-level cache configuration instead.
-   * Create a Session with custom cache sizes and pass it to the connect() function.
-   *
   * The exact meaning of an "entry" will depend on the type of index:
   * - IVF: there is one entry for each IVF partition
   * - BTREE: there is one entry for the entire index
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -10,7 +10,6 @@ import {
 import {
  ConnectionOptions,
  Connection as LanceDbConnection,
-  Session,
 } from "./native.js";

 export {
@@ -52,8 +51,6 @@ export {
  OpenTableOptions,
 } from "./connection";

-export { Session } from "./native.js";
-
 export {
  ExecutableQuery,
  Query,
@@ -67,10 +64,7 @@ export {
  PhraseQuery,
  BoostQuery,
  MultiMatchQuery,
-  BooleanQuery,
  FullTextQueryType,
-  Operator,
-  Occur,
 } from "./query";

 export {
@@ -103,7 +97,6 @@ export {
  RecordBatchLike,
  DataLike,
  IntoVector,
-  MultiVector,
 } from "./arrow";
 export { IntoSql, packBits } from "./util";

@@ -134,7 +127,6 @@ export { IntoSql, packBits } from "./util";
 export async function connect(
  uri: string,
  options?: Partial<ConnectionOptions>,
-  session?: Session,
 ): Promise<Connection>;
 /**
 * Connect to a LanceDB instance at the given URI.
@@ -153,43 +145,31 @@ export async function connect(
 *   storageOptions: {timeout: "60s"}
 * });
 * ```
- *
- * @example
- * ```ts
- * const session = Session.default();
- * const conn = await connect({
- *   uri: "/path/to/database",
- *   session: session
- * });
- * ```
 */
 export async function connect(
  options: Partial<ConnectionOptions> & { uri: string },
 ): Promise<Connection>;
 export async function connect(
  uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
-  options?: Partial<ConnectionOptions>,
+  options: Partial<ConnectionOptions> = {},
 ): Promise<Connection> {
  let uri: string | undefined;
-  let finalOptions: Partial<ConnectionOptions> = {};
-
  if (typeof uriOrOptions !== "string") {
    const { uri: uri_, ...opts } = uriOrOptions;
    uri = uri_;
-    finalOptions = opts;
+    options = opts;
  } else {
    uri = uriOrOptions;
-    finalOptions = options || {};
  }

  if (!uri) {
    throw new Error("uri is required");
  }

-  finalOptions = (finalOptions as ConnectionOptions) ?? {};
-  (<ConnectionOptions>finalOptions).storageOptions = cleanseStorageOptions(
-    (<ConnectionOptions>finalOptions).storageOptions,
+  options = (options as ConnectionOptions) ?? {};
+  (<ConnectionOptions>options).storageOptions = cleanseStorageOptions(
+    (<ConnectionOptions>options).storageOptions,
  );
-  const nativeConn = await LanceDbConnection.new(uri, finalOptions);
+  const nativeConn = await LanceDbConnection.new(uri, options);
  return new LocalConnection(nativeConn);
 }
--- a/nodejs/lancedb/indices.ts
+++ b/nodejs/lancedb/indices.ts
@@ -439,7 +439,7 @@ export interface FtsOptions {
   *
   * "raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token.
   */
-  baseTokenizer?: "simple" | "whitespace" | "raw" | "ngram";
+  baseTokenizer?: "simple" | "whitespace" | "raw";

  /**
   * language for stemming and stop words
@@ -472,21 +472,6 @@ export interface FtsOptions {
   * whether to remove punctuation
   */
  asciiFolding?: boolean;
-
-  /**
-   * ngram min length
-   */
-  ngramMinLength?: number;
-
-  /**
-   * ngram max length
-   */
-  ngramMaxLength?: number;
-
-  /**
-   * whether to only index the prefix of the token for ngram tokenizer
-   */
-  prefixOnly?: boolean;
 }

 export class Index {
@@ -623,9 +608,6 @@ export class Index {
        options?.stem,
        options?.removeStopWords,
        options?.asciiFolding,
-        options?.ngramMinLength,
-        options?.ngramMaxLength,
-        options?.prefixOnly,
      ),
    );
  }
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -448,10 +448,6 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
   * For best results we recommend tuning this parameter with a benchmark against
   * your actual data to find the smallest possible value that will still give
   * you the desired recall.
-   *
-   * For more fine grained control over behavior when you have a very narrow filter
-   * you can use `minimumNprobes` and `maximumNprobes`.  This method sets both
-   * the minimum and maximum to the same value.
   */
  nprobes(nprobes: number): VectorQuery {
    super.doCall((inner) => inner.nprobes(nprobes));
@@ -459,33 +455,6 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
    return this;
  }

-  /**
-   * Set the minimum number of probes used.
-   *
-   * This controls the minimum number of partitions that will be searched.  This
-   * parameter will impact every query against a vector index, regardless of the
-   * filter.  See `nprobes` for more details.  Higher values will increase recall
-   * but will also increase latency.
-   */
-  minimumNprobes(minimumNprobes: number): VectorQuery {
-    super.doCall((inner) => inner.minimumNprobes(minimumNprobes));
-    return this;
-  }
-
-  /**
-   * Set the maximum number of probes used.
-   *
-   * This controls the maximum number of partitions that will be searched.  If this
-   * number is greater than minimumNprobes then the excess partitions will _only_ be
-   * searched if we have not found enough results.  This can be useful when there is
-   * a narrow filter to allow these queries to spend more time searching and avoid
-   * potential false negatives.
-   */
-  maximumNprobes(maximumNprobes: number): VectorQuery {
-    super.doCall((inner) => inner.maximumNprobes(maximumNprobes));
-    return this;
-  }
-
  /*
   * Set the distance range to use
   *
@@ -793,31 +762,6 @@ export enum FullTextQueryType {
  MatchPhrase = "match_phrase",
  Boost = "boost",
  MultiMatch = "multi_match",
-  Boolean = "boolean",
-}
-
-/**
- * Enum representing the logical operators used in full-text queries.
- *
- * - `And`: All terms must match.
- * - `Or`: At least one term must match.
- */
-export enum Operator {
-  And = "AND",
-  Or = "OR",
-}
-
-/**
- * Enum representing the occurrence of terms in full-text queries.
- *
- * - `Must`: The term must be present in the document.
- * - `Should`: The term should contribute to the document score, but is not required.
- * - `MustNot`: The term must not be present in the document.
- */
-export enum Occur {
-  Should = "SHOULD",
-  Must = "MUST",
-  MustNot = "MUST_NOT",
 }

 /**
@@ -847,7 +791,6 @@ export function instanceOfFullTextQuery(obj: any): obj is FullTextQuery {
 export class MatchQuery implements FullTextQuery {
  /** @ignore */
  public readonly inner: JsFullTextQuery;
-
  /**
   * Creates an instance of MatchQuery.
   *
@@ -857,8 +800,6 @@ export class MatchQuery implements FullTextQuery {
   *   - `boost`: The boost factor for the query (default is 1.0).
   *   - `fuzziness`: The fuzziness level for the query (default is 0).
   *   - `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
-   *   - `operator`: The logical operator to use for combining terms in the query (default is "OR").
-   *   - `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
   */
  constructor(
    query: string,
@@ -867,8 +808,6 @@ export class MatchQuery implements FullTextQuery {
      boost?: number;
      fuzziness?: number;
      maxExpansions?: number;
-      operator?: Operator;
-      prefixLength?: number;
    },
  ) {
    let fuzziness = options?.fuzziness;
@@ -881,8 +820,6 @@ export class MatchQuery implements FullTextQuery {
      options?.boost ?? 1.0,
      fuzziness,
      options?.maxExpansions ?? 50,
-      options?.operator ?? Operator.Or,
-      options?.prefixLength ?? 0,
    );
  }

@@ -899,11 +836,9 @@ export class PhraseQuery implements FullTextQuery {
   *
   * @param query - The phrase to search for in the specified column.
   * @param column - The name of the column to search within.
-   * @param options - Optional parameters for the phrase query.
-   *   - `slop`: The maximum number of intervening unmatched positions allowed between words in the phrase (default is 0).
   */
-  constructor(query: string, column: string, options?: { slop?: number }) {
-    this.inner = JsFullTextQuery.phraseQuery(query, column, options?.slop ?? 0);
+  constructor(query: string, column: string) {
+    this.inner = JsFullTextQuery.phraseQuery(query, column);
  }

  queryType(): FullTextQueryType {
@@ -954,21 +889,18 @@ export class MultiMatchQuery implements FullTextQuery {
   * @param columns - An array of column names to search within.
   * @param options - Optional parameters for the multi-match query.
   *  - `boosts`: An array of boost factors for each column (default is 1.0 for all).
-   *  - `operator`: The logical operator to use for combining terms in the query (default is "OR").
   */
  constructor(
    query: string,
    columns: string[],
    options?: {
      boosts?: number[];
-      operator?: Operator;
    },
  ) {
    this.inner = JsFullTextQuery.multiMatchQuery(
      query,
      columns,
      options?.boosts,
-      options?.operator ?? Operator.Or,
    );
  }

@@ -976,23 +908,3 @@ export class MultiMatchQuery implements FullTextQuery {
    return FullTextQueryType.MultiMatch;
  }
 }
-
-export class BooleanQuery implements FullTextQuery {
-  /** @ignore */
-  public readonly inner: JsFullTextQuery;
-  /**
-   * Creates an instance of BooleanQuery.
-   *
-   * @param queries - An array of (Occur, FullTextQuery objects) to combine.
-   * Occur specifies whether the query must match, or should match.
-   */
-  constructor(queries: [Occur, FullTextQuery][]) {
-    this.inner = JsFullTextQuery.booleanQuery(
-      queries.map(([occur, query]) => [occur, query.inner]),
-    );
-  }
-
-  queryType(): FullTextQueryType {
-    return FullTextQueryType.Boolean;
-  }
-}
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -6,11 +6,9 @@ import {
  Data,
  DataType,
  IntoVector,
-  MultiVector,
  Schema,
  dataTypeToJson,
  fromDataToBuffer,
-  isMultiVector,
  tableFromIPC,
 } from "./arrow";

@@ -77,10 +75,10 @@ export interface OptimizeOptions {
   * // Delete all versions older than 1 day
   * const olderThan = new Date();
   * olderThan.setDate(olderThan.getDate() - 1));
-   * tbl.optimize({cleanupOlderThan: olderThan});
+   * tbl.cleanupOlderVersions(olderThan);
   *
   * // Delete all versions except the current version
-   * tbl.optimize({cleanupOlderThan: new Date()});
+   * tbl.cleanupOlderVersions(new Date());
   */
  cleanupOlderThan: Date;
  deleteUnverified: boolean;
@@ -348,7 +346,7 @@ export abstract class Table {
   * if the query is a string and no embedding function is defined, it will be treated as a full text search query
   */
  abstract search(
-    query: string | IntoVector | MultiVector | FullTextQuery,
+    query: string | IntoVector | FullTextQuery,
    queryType?: string,
    ftsColumns?: string | string[],
  ): VectorQuery | Query;
@@ -359,7 +357,7 @@ export abstract class Table {
   * is the same thing as calling `nearestTo` on the builder returned
   * by `query`.  @see {@link Query#nearestTo} for more details.
   */
-  abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
+  abstract vectorSearch(vector: IntoVector): VectorQuery;
  /**
   * Add new columns with defined values.
   * @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
@@ -670,7 +668,7 @@ export class LocalTable extends Table {
  }

  search(
-    query: string | IntoVector | MultiVector | FullTextQuery,
+    query: string | IntoVector | FullTextQuery,
    queryType: string = "auto",
    ftsColumns?: string | string[],
  ): VectorQuery | Query {
@@ -717,15 +715,7 @@ export class LocalTable extends Table {
    return this.query().nearestTo(queryPromise);
  }

-  vectorSearch(vector: IntoVector | MultiVector): VectorQuery {
-    if (isMultiVector(vector)) {
-      const query = this.query().nearestTo(vector[0]);
-      for (const v of vector.slice(1)) {
-        query.addQueryVector(v);
-      }
-      return query;
-    }
-
+  vectorSearch(vector: IntoVector): VectorQuery {
    return this.query().nearestTo(vector);
  }

--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.21.2-beta.1",
+	"version": "0.20.0-beta.2",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.21.2-beta.1",
+	"version": "0.20.0-beta.2",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.21.2-beta.1",
+	"version": "0.20.0-beta.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.21.2-beta.1",
+	"version": "0.20.0-beta.2",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.21.2-beta.1",
+	"version": "0.20.0-beta.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.21.2-beta.1",
+	"version": "0.20.0-beta.2",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.21.2-beta.1",
+  "version": "0.20.0-beta.2",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.21.2-beta.1",
+	"version": "0.20.0-beta.2",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.21.2-beta.1",
+  "version": "0.20.0-beta.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.21.2-beta.1",
+      "version": "0.20.0-beta.1",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.21.2-beta.1",
+  "version": "0.20.0-beta.2",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/connection.rs
+++ b/nodejs/src/connection.rs
@@ -74,10 +74,6 @@ impl Connection {
            builder = builder.host_override(&host_override);
        }

-        if let Some(session) = options.session {
-            builder = builder.session(session.inner.clone());
-        }
-
        Ok(Self::inner_new(builder.execute().await.default_error()?))
    }

--- a/nodejs/src/index.rs
+++ b/nodejs/src/index.rs
@@ -123,9 +123,6 @@ impl Index {
        stem: Option<bool>,
        remove_stop_words: Option<bool>,
        ascii_folding: Option<bool>,
-        ngram_min_length: Option<u32>,
-        ngram_max_length: Option<u32>,
-        prefix_only: Option<bool>,
    ) -> Self {
        let mut opts = FtsIndexBuilder::default();
        if let Some(with_position) = with_position {
@@ -152,15 +149,6 @@ impl Index {
        if let Some(ascii_folding) = ascii_folding {
            opts = opts.ascii_folding(ascii_folding);
        }
-        if let Some(ngram_min_length) = ngram_min_length {
-            opts = opts.ngram_min_length(ngram_min_length);
-        }
-        if let Some(ngram_max_length) = ngram_max_length {
-            opts = opts.ngram_max_length(ngram_max_length);
-        }
-        if let Some(prefix_only) = prefix_only {
-            opts = opts.ngram_prefix_only(prefix_only);
-        }

        Self {
            inner: Mutex::new(Some(LanceDbIndex::FTS(opts))),
--- a/nodejs/src/lib.rs
+++ b/nodejs/src/lib.rs
@@ -14,7 +14,6 @@ pub mod merge;
 mod query;
 pub mod remote;
 mod rerankers;
-mod session;
 mod table;
 mod util;

@@ -35,9 +34,6 @@ pub struct ConnectionOptions {
    ///
    /// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
    pub storage_options: Option<HashMap<String, String>>,
-    /// (For LanceDB OSS only): the session to use for this connection. Holds
-    /// shared caches and other session-specific state.
-    pub session: Option<session::Session>,

    /// (For LanceDB cloud only): configuration for the remote HTTP client.
    pub client_config: Option<remote::ClientConfig>,
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -4,8 +4,7 @@
 use std::sync::Arc;

 use lancedb::index::scalar::{
-    BooleanQuery, BoostQuery, FtsQuery, FullTextSearchQuery, MatchQuery, MultiMatchQuery, Occur,
-    Operator, PhraseQuery,
+    BoostQuery, FtsQuery, FullTextSearchQuery, MatchQuery, MultiMatchQuery, PhraseQuery,
 };
 use lancedb::query::ExecutableQuery;
 use lancedb::query::Query as LanceDbQuery;
@@ -178,31 +177,6 @@ impl VectorQuery {
        self.inner = self.inner.clone().nprobes(nprobe as usize);
    }

-    #[napi]
-    pub fn minimum_nprobes(&mut self, minimum_nprobe: u32) -> napi::Result<()> {
-        self.inner = self
-            .inner
-            .clone()
-            .minimum_nprobes(minimum_nprobe as usize)
-            .default_error()?;
-        Ok(())
-    }
-
-    #[napi]
-    pub fn maximum_nprobes(&mut self, maximum_nprobes: u32) -> napi::Result<()> {
-        let maximum_nprobes = if maximum_nprobes == 0 {
-            None
-        } else {
-            Some(maximum_nprobes as usize)
-        };
-        self.inner = self
-            .inner
-            .clone()
-            .maximum_nprobes(maximum_nprobes)
-            .default_error()?;
-        Ok(())
-    }
-
    #[napi]
    pub fn distance_range(&mut self, lower_bound: Option<f64>, upper_bound: Option<f64>) {
        // napi doesn't support f32, so we have to convert to f32
@@ -334,8 +308,6 @@ impl JsFullTextQuery {
        boost: f64,
        fuzziness: Option<u32>,
        max_expansions: u32,
-        operator: String,
-        prefix_length: u32,
    ) -> napi::Result<Self> {
        Ok(Self {
            inner: MatchQuery::new(query)
@@ -343,23 +315,14 @@ impl JsFullTextQuery {
                .with_boost(boost as f32)
                .with_fuzziness(fuzziness)
                .with_max_expansions(max_expansions as usize)
-                .with_operator(
-                    Operator::try_from(operator.as_str()).map_err(|e| {
-                        napi::Error::from_reason(format!("Invalid operator: {}", e))
-                    })?,
-                )
-                .with_prefix_length(prefix_length)
                .into(),
        })
    }

    #[napi(factory)]
-    pub fn phrase_query(query: String, column: String, slop: u32) -> napi::Result<Self> {
+    pub fn phrase_query(query: String, column: String) -> napi::Result<Self> {
        Ok(Self {
-            inner: PhraseQuery::new(query)
-                .with_column(Some(column))
-                .with_slop(slop)
-                .into(),
+            inner: PhraseQuery::new(query).with_column(Some(column)).into(),
        })
    }

@@ -385,7 +348,6 @@ impl JsFullTextQuery {
        query: String,
        columns: Vec<String>,
        boosts: Option<Vec<f64>>,
-        operator: String,
    ) -> napi::Result<Self> {
        let q = match boosts {
            Some(boosts) => MultiMatchQuery::try_new(query, columns)
@@ -396,37 +358,7 @@ impl JsFullTextQuery {
            napi::Error::from_reason(format!("Failed to create multi match query: {}", e))
        })?;

-        let operator = Operator::try_from(operator.as_str()).map_err(|e| {
-            napi::Error::from_reason(format!("Invalid operator for multi match query: {}", e))
-        })?;
-
-        Ok(Self {
-            inner: q.with_operator(operator).into(),
-        })
-    }
-
-    #[napi(factory)]
-    pub fn boolean_query(queries: Vec<(String, &JsFullTextQuery)>) -> napi::Result<Self> {
-        let mut sub_queries = Vec::with_capacity(queries.len());
-        for (occur, q) in queries {
-            let occur = Occur::try_from(occur.as_str())
-                .map_err(|e| napi::Error::from_reason(e.to_string()))?;
-            sub_queries.push((occur, q.inner.clone()));
-        }
-        Ok(Self {
-            inner: BooleanQuery::new(sub_queries).into(),
-        })
-    }
-
-    #[napi(getter)]
-    pub fn query_type(&self) -> String {
-        match self.inner {
-            FtsQuery::Match(_) => "match".to_string(),
-            FtsQuery::Phrase(_) => "phrase".to_string(),
-            FtsQuery::Boost(_) => "boost".to_string(),
-            FtsQuery::MultiMatch(_) => "multi_match".to_string(),
-            FtsQuery::Boolean(_) => "boolean".to_string(),
-        }
+        Ok(Self { inner: q.into() })
    }
 }

--- a/nodejs/src/session.rs
+++ b/nodejs/src/session.rs
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0
-// SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
-use std::sync::Arc;
-
-use lancedb::{ObjectStoreRegistry, Session as LanceSession};
-use napi::bindgen_prelude::*;
-use napi_derive::*;
-
-/// A session for managing caches and object stores across LanceDB operations.
-///
-/// Sessions allow you to configure cache sizes for index and metadata caches,
-/// which can significantly impact memory use and performance. They can
-/// also be re-used across multiple connections to share the same cache state.
-#[napi]
-#[derive(Clone)]
-pub struct Session {
-    pub(crate) inner: Arc<LanceSession>,
-}
-
-impl std::fmt::Debug for Session {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("Session")
-            .field("size_bytes", &self.inner.size_bytes())
-            .field("approx_num_items", &self.inner.approx_num_items())
-            .finish()
-    }
-}
-
-#[napi]
-impl Session {
-    /// Create a new session with custom cache sizes.
-    ///
-    /// # Parameters
-    ///
-    /// - `index_cache_size_bytes`: The size of the index cache in bytes.
-    ///   Index data is stored in memory in this cache to speed up queries.
-    ///   Defaults to 6GB if not specified.
-    /// - `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
-    ///   The metadata cache stores file metadata and schema information in memory.
-    ///   This cache improves scan and write performance.
-    ///   Defaults to 1GB if not specified.
-    #[napi(constructor)]
-    pub fn new(
-        index_cache_size_bytes: Option<BigInt>,
-        metadata_cache_size_bytes: Option<BigInt>,
-    ) -> napi::Result<Self> {
-        let index_cache_size = index_cache_size_bytes
-            .map(|size| size.get_u64().1 as usize)
-            .unwrap_or(6 * 1024 * 1024 * 1024); // 6GB default
-
-        let metadata_cache_size = metadata_cache_size_bytes
-            .map(|size| size.get_u64().1 as usize)
-            .unwrap_or(1024 * 1024 * 1024); // 1GB default
-
-        let session = LanceSession::new(
-            index_cache_size,
-            metadata_cache_size,
-            Arc::new(ObjectStoreRegistry::default()),
-        );
-
-        Ok(Self {
-            inner: Arc::new(session),
-        })
-    }
-
-    /// Create a session with default cache sizes.
-    ///
-    /// This is equivalent to creating a session with 6GB index cache
-    /// and 1GB metadata cache.
-    #[napi(factory)]
-    pub fn default() -> Self {
-        Self {
-            inner: Arc::new(LanceSession::default()),
-        }
-    }
-
-    /// Get the current size of the session caches in bytes.
-    #[napi]
-    pub fn size_bytes(&self) -> BigInt {
-        BigInt::from(self.inner.size_bytes())
-    }
-
-    /// Get the approximate number of items cached in the session.
-    #[napi]
-    pub fn approx_num_items(&self) -> u32 {
-        self.inner.approx_num_items() as u32
-    }
-}
-
-// Implement FromNapiValue for Session to work with napi(object)
-impl napi::bindgen_prelude::FromNapiValue for Session {
-    unsafe fn from_napi_value(
-        env: napi::sys::napi_env,
-        napi_val: napi::sys::napi_value,
-    ) -> napi::Result<Self> {
-        let object: napi::bindgen_prelude::ClassInstance<Session> =
-            napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
-        let copy = object.clone();
-        Ok(copy)
-    }
-}
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.24.2"
+current_version = "0.23.0-beta.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/CLAUDE.md
+++ b/python/CLAUDE.md
@@ -1,19 +0,0 @@
-These are the Python bindings of LanceDB.
-The core Rust library is in the `../rust/lancedb` directory, the rust binding
-code is in the `src/` directory and the Python bindings are in the `lancedb/` directory.
-
-Common commands:
-
-* Build: `make develop`
-* Format: `make format`
-* Lint: `make check`
-* Fix lints: `make fix`
-* Test: `make test`
-* Doc test: `make doctest`
-
-Before committing changes, run lints and then formatting.
-
-When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
-
-When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
-with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.24.2"
+version = "0.23.0-beta.2"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -85,8 +85,8 @@ embeddings = [
    "boto3>=1.28.57",
    "awscli>=1.29.57",
    "botocore>=1.31.57",
-    'ibm-watsonx-ai>=1.1.2; python_version >= "3.10"',
-    "ollama>=0.3.0",
+    "ollama",
+    "ibm-watsonx-ai>=1.1.2",
 ]
 azure = ["adlfs>=2024.2.0"]

--- a/python/python/lancedb/init.py
+++ b/python/python/lancedb/init.py
@@ -18,7 +18,6 @@ from .remote import ClientConfig
 from .remote.db import RemoteDBConnection
 from .schema import vector
 from .table import AsyncTable
-from ._lancedb import Session


 def connect(
@@ -31,7 +30,6 @@ def connect(
    request_thread_pool: Optional[Union[int, ThreadPoolExecutor]] = None,
    client_config: Union[ClientConfig, Dict[str, Any], None] = None,
    storage_options: Optional[Dict[str, str]] = None,
-    session: Optional[Session] = None,
    **kwargs: Any,
 ) -> DBConnection:
    """Connect to a LanceDB database.
@@ -66,12 +64,6 @@ def connect(
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
        <https://lancedb.github.io/lancedb/guides/storage/>
-    session: Session, optional
-        (For LanceDB OSS only)
-        A session to use for this connection. Sessions allow you to configure
-        cache sizes for index and metadata caches, which can significantly
-        impact memory use and performance. They can also be re-used across
-        multiple connections to share the same cache state.

    Examples
    --------
@@ -100,7 +92,7 @@ def connect(
        if api_key is None:
            api_key = os.environ.get("LANCEDB_API_KEY")
        if api_key is None:
-            raise ValueError(f"api_key is required to connect to LanceDB cloud: {uri}")
+            raise ValueError(f"api_key is required to connected LanceDB cloud: {uri}")
        if isinstance(request_thread_pool, int):
            request_thread_pool = ThreadPoolExecutor(request_thread_pool)
        return RemoteDBConnection(
@@ -121,7 +113,6 @@ def connect(
        uri,
        read_consistency_interval=read_consistency_interval,
        storage_options=storage_options,
-        session=session,
    )


@@ -134,7 +125,6 @@ async def connect_async(
    read_consistency_interval: Optional[timedelta] = None,
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
    storage_options: Optional[Dict[str, str]] = None,
-    session: Optional[Session] = None,
 ) -> AsyncConnection:
    """Connect to a LanceDB database.

@@ -168,12 +158,6 @@ async def connect_async(
    storage_options: dict, optional
        Additional options for the storage backend. See available options at
        <https://lancedb.github.io/lancedb/guides/storage/>
-    session: Session, optional
-        (For LanceDB OSS only)
-        A session to use for this connection. Sessions allow you to configure
-        cache sizes for index and metadata caches, which can significantly
-        impact memory use and performance. They can also be re-used across
-        multiple connections to share the same cache state.

    Examples
    --------
@@ -213,7 +197,6 @@ async def connect_async(
            read_consistency_interval_secs,
            client_config,
            storage_options,
-            session,
        )
    )

@@ -229,7 +212,6 @@ __all__ = [
    "DBConnection",
    "LanceDBConnection",
    "RemoteDBConnection",
-    "Session",
    "__version__",
 ]

--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -6,19 +6,6 @@ import pyarrow as pa
 from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS
 from .remote import ClientConfig

-class Session:
-    def __init__(
-        self,
-        index_cache_size_bytes: Optional[int] = None,
-        metadata_cache_size_bytes: Optional[int] = None,
-    ): ...
-    @staticmethod
-    def default() -> "Session": ...
-    @property
-    def size_bytes(self) -> int: ...
-    @property
-    def approx_num_items(self) -> int: ...
-
 class Connection(object):
    uri: str
    async def table_names(
@@ -102,7 +89,6 @@ async def connect(
    read_consistency_interval: Optional[float],
    client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
    storage_options: Optional[Dict[str, str]],
-    session: Optional[Session],
 ) -> Connection: ...

 class RecordBatchStream:
@@ -157,8 +143,6 @@ class VectorQuery:
    def postfilter(self): ...
    def refine_factor(self, refine_factor: int): ...
    def nprobes(self, nprobes: int): ...
-    def minimum_nprobes(self, minimum_nprobes: int): ...
-    def maximum_nprobes(self, maximum_nprobes: int): ...
    def bypass_vector_index(self): ...
    def nearest_to_text(self, query: dict) -> HybridQuery: ...
    def to_query_request(self) -> PyQueryRequest: ...
@@ -174,8 +158,6 @@ class HybridQuery:
    def distance_type(self, distance_type: str): ...
    def refine_factor(self, refine_factor: int): ...
    def nprobes(self, nprobes: int): ...
-    def minimum_nprobes(self, minimum_nprobes: int): ...
-    def maximum_nprobes(self, maximum_nprobes: int): ...
    def bypass_vector_index(self): ...
    def to_vector_query(self) -> VectorQuery: ...
    def to_fts_query(self) -> FTSQuery: ...
@@ -183,21 +165,23 @@ class HybridQuery:
    def get_with_row_id(self) -> bool: ...
    def to_query_request(self) -> PyQueryRequest: ...

-class FullTextQuery:
-    pass
+class PyFullTextSearchQuery:
+    columns: Optional[List[str]]
+    query: str
+    limit: Optional[int]
+    wand_factor: Optional[float]

 class PyQueryRequest:
    limit: Optional[int]
    offset: Optional[int]
    filter: Optional[Union[str, bytes]]
-    full_text_search: Optional[FullTextQuery]
+    full_text_search: Optional[PyFullTextSearchQuery]
    select: Optional[Union[str, List[str]]]
    fast_search: Optional[bool]
    with_row_id: Optional[bool]
    column: Optional[str]
    query_vector: Optional[List[pa.Array]]
-    minimum_nprobes: Optional[int]
-    maximum_nprobes: Optional[int]
+    nprobes: Optional[int]
    lower_bound: Optional[float]
    upper_bound: Optional[float]
    ef: Optional[int]
--- a/python/python/lancedb/common.py
+++ b/python/python/lancedb/common.py
@@ -94,9 +94,9 @@ def data_to_reader(
    else:
        raise TypeError(
            f"Unknown data type {type(data)}. "
-            "Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
-            "pyarrow Table/RecordBatch, or Pydantic models. "
-            "See https://lancedb.github.io/lancedb/guides/tables/ for examples."
+            "Please check "
+            "https://lancedb.github.io/lance/read_and_write.html "
+            "to see supported types."
        )


--- a/python/python/lancedb/db.py
+++ b/python/python/lancedb/db.py
@@ -37,7 +37,6 @@ if TYPE_CHECKING:
    from ._lancedb import Connection as LanceDbConnection
    from .common import DATA, URI
    from .embeddings import EmbeddingFunctionConfig
-    from ._lancedb import Session


 class DBConnection(EnforceOverrides):
@@ -248,9 +247,6 @@ class DBConnection(EnforceOverrides):
        name: str
            The name of the table.
        index_cache_size: int, default 256
-            **Deprecated**: Use session-level cache configuration instead.
-            Create a Session with custom cache sizes and pass it to lancedb.connect().
-
            Set the size of the index cache, specified as a number of entries

            The exact meaning of an "entry" will depend on the type of index:
@@ -358,7 +354,6 @@ class LanceDBConnection(DBConnection):
        *,
        read_consistency_interval: Optional[timedelta] = None,
        storage_options: Optional[Dict[str, str]] = None,
-        session: Optional[Session] = None,
    ):
        if not isinstance(uri, Path):
            scheme = get_uri_scheme(uri)
@@ -372,7 +367,6 @@ class LanceDBConnection(DBConnection):
        self._entered = False
        self.read_consistency_interval = read_consistency_interval
        self.storage_options = storage_options
-        self.session = session

        if read_consistency_interval is not None:
            read_consistency_interval_secs = read_consistency_interval.total_seconds()
@@ -388,7 +382,6 @@ class LanceDBConnection(DBConnection):
                read_consistency_interval_secs,
                None,
                storage_options,
-                session,
            )

        self._conn = AsyncConnection(LOOP.run(do_connect()))
@@ -482,17 +475,6 @@ class LanceDBConnection(DBConnection):
        -------
        A LanceTable object representing the table.
        """
-        if index_cache_size is not None:
-            import warnings
-
-            warnings.warn(
-                "index_cache_size is deprecated. Use session-level cache "
-                "configuration instead. Create a Session with custom cache sizes "
-                "and pass it to lancedb.connect().",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
        return LanceTable.open(
            self,
            name,
@@ -838,9 +820,6 @@ class AsyncConnection(object):
            See available options at
            <https://lancedb.github.io/lancedb/guides/storage/>
        index_cache_size: int, default 256
-            **Deprecated**: Use session-level cache configuration instead.
-            Create a Session with custom cache sizes and pass it to lancedb.connect().
-
            Set the size of the index cache, specified as a number of entries

            The exact meaning of an "entry" will depend on the type of index:
--- a/python/python/lancedb/embeddings/init.py
+++ b/python/python/lancedb/embeddings/init.py
@@ -11,7 +11,7 @@ from .instructor import InstructorEmbeddingFunction
 from .ollama import OllamaEmbeddings
 from .open_clip import OpenClipEmbeddings
 from .openai import OpenAIEmbeddings
-from .registry import EmbeddingFunctionRegistry, get_registry, register
+from .registry import EmbeddingFunctionRegistry, get_registry
 from .sentence_transformers import SentenceTransformerEmbeddings
 from .gte import GteEmbeddings
 from .transformers import TransformersEmbeddingFunction, ColbertEmbeddings
--- a/python/python/lancedb/embeddings/gte_mlx_model.py
+++ b/python/python/lancedb/embeddings/gte_mlx_model.py
@@ -9,14 +9,11 @@ from huggingface_hub import snapshot_download
 from pydantic import BaseModel
 from transformers import BertTokenizer

-from .utils import create_import_stub
-
 try:
    import mlx.core as mx
    import mlx.nn as nn
 except ImportError:
-    mx = create_import_stub("mlx.core", "mlx")
-    nn = create_import_stub("mlx.nn", "mlx")
+    raise ImportError("You need to install MLX to use this model use - pip install mlx")


 def average_pool(last_hidden_state: mx.array, attention_mask: mx.array) -> mx.array:
@@ -75,7 +72,7 @@ class TransformerEncoder(nn.Module):
        super().__init__()
        self.layers = [
            TransformerEncoderLayer(dims, num_heads, mlp_dims)
-            for _ in range(num_layers)
+            for i in range(num_layers)
        ]

    def __call__(self, x, mask):
--- a/python/python/lancedb/embeddings/ollama.py
+++ b/python/python/lancedb/embeddings/ollama.py
@@ -2,15 +2,14 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 from functools import cached_property
-from typing import TYPE_CHECKING, List, Optional, Sequence, Union
-
-import numpy as np
+from typing import TYPE_CHECKING, List, Optional, Union

 from ..util import attempt_import_or_raise
 from .base import TextEmbeddingFunction
 from .registry import register

 if TYPE_CHECKING:
+    import numpy as np
    import ollama


@@ -29,21 +28,23 @@ class OllamaEmbeddings(TextEmbeddingFunction):
    keep_alive: Optional[Union[float, str]] = None
    ollama_client_kwargs: Optional[dict] = {}

-    def ndims(self) -> int:
+    def ndims(self):
        return len(self.generate_embeddings(["foo"])[0])

-    def _compute_embedding(self, text: Sequence[str]) -> Sequence[Sequence[float]]:
-        response = self._ollama_client.embed(
-            model=self.name,
-            input=text,
-            options=self.options,
-            keep_alive=self.keep_alive,
+    def _compute_embedding(self, text) -> Union["np.array", None]:
+        return (
+            self._ollama_client.embeddings(
+                model=self.name,
+                prompt=text,
+                options=self.options,
+                keep_alive=self.keep_alive,
+            )["embedding"]
+            or None
        )
-        return response.embeddings

    def generate_embeddings(
-        self, texts: Union[List[str], np.ndarray]
-    ) -> list[Union[np.array, None]]:
+        self, texts: Union[List[str], "np.ndarray"]
+    ) -> list[Union["np.array", None]]:
        """
        Get the embeddings for the given texts

@@ -53,8 +54,8 @@ class OllamaEmbeddings(TextEmbeddingFunction):
            The texts to embed
        """
        # TODO retry, rate limit, token limit
-        embeddings = self._compute_embedding(texts)
-        return list(embeddings)
+        embeddings = [self._compute_embedding(text) for text in texts]
+        return embeddings

    @cached_property
    def _ollama_client(self) -> "ollama.Client":
--- a/python/python/lancedb/embeddings/registry.py
+++ b/python/python/lancedb/embeddings/registry.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors

 import json
-from typing import Dict, Optional, Type
+from typing import Dict, Optional

 from .base import EmbeddingFunction, EmbeddingFunctionConfig

@@ -43,7 +43,7 @@ class EmbeddingFunctionRegistry:
        self._functions = {}
        self._variables = {}

-    def register(self, alias: Optional[str] = None):
+    def register(self, alias: str = None):
        """
        This creates a decorator that can be used to register
        an EmbeddingFunction.
@@ -75,7 +75,7 @@ class EmbeddingFunctionRegistry:
        """
        self._functions = {}

-    def get(self, name: str) -> Type[EmbeddingFunction]:
+    def get(self, name: str):
        """
        Fetch an embedding function class by name

--- a/python/python/lancedb/embeddings/utils.py
+++ b/python/python/lancedb/embeddings/utils.py
@@ -21,36 +21,6 @@ from ..dependencies import pandas as pd
 from ..util import attempt_import_or_raise


-def create_import_stub(module_name: str, package_name: str = None):
-    """
-    Create a stub module that allows class definition but fails when used.
-    This allows modules to be imported for doctest collection even when
-    optional dependencies are not available.
-
-    Parameters
-    ----------
-    module_name : str
-        The name of the module to create a stub for
-    package_name : str, optional
-        The package name to suggest in the error message
-
-    Returns
-    -------
-    object
-        A stub object that can be used in place of the module
-    """
-
-    class _ImportStub:
-        def __getattr__(self, name):
-            return _ImportStub  # Return stub for chained access like nn.Module
-
-        def __call__(self, *args, **kwargs):
-            pkg = package_name or module_name
-            raise ImportError(f"You need to install {pkg} to use this functionality")
-
-    return _ImportStub()
-
-
 # ruff: noqa: PERF203
 def retry(tries=10, delay=1, max_delay=30, backoff=3, jitter=1):
    def wrapper(fn):
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -137,9 +137,6 @@ class FTS:
    stem: bool = True
    remove_stop_words: bool = True
    ascii_folding: bool = True
-    ngram_min_length: int = 3
-    ngram_max_length: int = 3
-    prefix_only: bool = False


@dataclass
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -4,6 +4,7 @@
 from __future__ import annotations

 from abc import ABC, abstractmethod
+import abc
 from concurrent.futures import ThreadPoolExecutor
 from enum import Enum
 from datetime import timedelta
@@ -14,7 +15,7 @@ from typing import (
    Literal,
    Optional,
    Tuple,
-    TypeVar,
+    Type,
    Union,
    Any,
 )
@@ -58,8 +59,6 @@ if TYPE_CHECKING:
    else:
        from typing_extensions import Self

-T = TypeVar("T", bound="LanceModel")
-

 # Pydantic validation function for vector queries
 def ensure_vector_query(
@@ -89,28 +88,15 @@ def ensure_vector_query(
        return val


-class FullTextQueryType(str, Enum):
+class FullTextQueryType(Enum):
    MATCH = "match"
    MATCH_PHRASE = "match_phrase"
    BOOST = "boost"
    MULTI_MATCH = "multi_match"
-    BOOLEAN = "boolean"


-class FullTextOperator(str, Enum):
-    AND = "AND"
-    OR = "OR"
-
-
-class Occur(str, Enum):
-    SHOULD = "SHOULD"
-    MUST = "MUST"
-    MUST_NOT = "MUST_NOT"
-
-
-@pydantic.dataclasses.dataclass
-class FullTextQuery(ABC):
-    @abstractmethod
+class FullTextQuery(abc.ABC, pydantic.BaseModel):
+    @abc.abstractmethod
    def query_type(self) -> FullTextQueryType:
        """
        Get the query type of the query.
@@ -120,178 +106,193 @@ class FullTextQuery(ABC):
        str
            The type of the query.
        """
-        pass

-    def __and__(self, other: "FullTextQuery") -> "FullTextQuery":
+    @abc.abstractmethod
+    def to_dict(self) -> dict:
        """
-        Combine two queries with a logical AND operation.
-
-        Parameters
-        ----------
-        other : FullTextQuery
-            The other query to combine with.
+        Convert the query to a dictionary.

        Returns
        -------
-        FullTextQuery
-            A new query that combines both queries with AND.
+        dict
+            The query as a dictionary.
        """
-        return BooleanQuery([(Occur.MUST, self), (Occur.MUST, other)])
-
-    def __or__(self, other: "FullTextQuery") -> "FullTextQuery":
-        """
-        Combine two queries with a logical OR operation.
-
-        Parameters
-        ----------
-        other : FullTextQuery
-            The other query to combine with.
-
-        Returns
-        -------
-        FullTextQuery
-            A new query that combines both queries with OR.
-        """
-        return BooleanQuery([(Occur.SHOULD, self), (Occur.SHOULD, other)])


-@pydantic.dataclasses.dataclass
 class MatchQuery(FullTextQuery):
-    """
-    Match query for full-text search.
-
-    Parameters
-    ----------
-    query : str
-        The query string to match against.
-    column : str
-        The name of the column to match against.
-    boost : float, default 1.0
-        The boost factor for the query.
-        The score of each matching document is multiplied by this value.
-    fuzziness : int, optional
-        The maximum edit distance for each term in the match query.
-        Defaults to 0 (exact match).
-        If None, fuzziness is applied automatically by the rules:
-            - 0 for terms with length <= 2
-            - 1 for terms with length <= 5
-            - 2 for terms with length > 5
-    max_expansions : int, optional
-        The maximum number of terms to consider for fuzzy matching.
-        Defaults to 50.
-    operator : FullTextOperator, default OR
-        The operator to use for combining the query results.
-        Can be either `AND` or `OR`.
-        If `AND`, all terms in the query must match.
-        If `OR`, at least one term in the query must match.
-    prefix_length : int, optional
-        The number of beginning characters being unchanged for fuzzy matching.
-        This is useful to achieve prefix matching.
-    """
-
    query: str
    column: str
-    boost: float = pydantic.Field(1.0, kw_only=True)
-    fuzziness: int = pydantic.Field(0, kw_only=True)
-    max_expansions: int = pydantic.Field(50, kw_only=True)
-    operator: FullTextOperator = pydantic.Field(FullTextOperator.OR, kw_only=True)
-    prefix_length: int = pydantic.Field(0, kw_only=True)
+    boost: float = 1.0
+    fuzziness: int = 0
+    max_expansions: int = 50
+
+    def __init__(
+        self,
+        query: str,
+        column: str,
+        *,
+        boost: float = 1.0,
+        fuzziness: int = 0,
+        max_expansions: int = 50,
+    ):
+        """
+        Match query for full-text search.
+
+        Parameters
+        ----------
+        query : str
+            The query string to match against.
+        column : str
+            The name of the column to match against.
+        boost : float, default 1.0
+            The boost factor for the query.
+            The score of each matching document is multiplied by this value.
+        fuzziness : int, optional
+            The maximum edit distance for each term in the match query.
+            Defaults to 0 (exact match).
+            If None, fuzziness is applied automatically by the rules:
+                - 0 for terms with length <= 2
+                - 1 for terms with length <= 5
+                - 2 for terms with length > 5
+        max_expansions : int, optional
+            The maximum number of terms to consider for fuzzy matching.
+            Defaults to 50.
+        """
+        super().__init__(
+            query=query,
+            column=column,
+            boost=boost,
+            fuzziness=fuzziness,
+            max_expansions=max_expansions,
+        )

    def query_type(self) -> FullTextQueryType:
        return FullTextQueryType.MATCH

+    def to_dict(self) -> dict:
+        return {
+            "match": {
+                self.column: {
+                    "query": self.query,
+                    "boost": self.boost,
+                    "fuzziness": self.fuzziness,
+                    "max_expansions": self.max_expansions,
+                }
+            }
+        }
+

-@pydantic.dataclasses.dataclass
 class PhraseQuery(FullTextQuery):
-    """
-    Phrase query for full-text search.
-
-    Parameters
-    ----------
-    query : str
-        The query string to match against.
-    column : str
-        The name of the column to match against.
-    """
-
    query: str
    column: str
-    slop: int = pydantic.Field(0, kw_only=True)
+
+    def __init__(self, query: str, column: str):
+        """
+        Phrase query for full-text search.
+
+        Parameters
+        ----------
+        query : str
+            The query string to match against.
+        column : str
+            The name of the column to match against.
+        """
+        super().__init__(query=query, column=column)

    def query_type(self) -> FullTextQueryType:
        return FullTextQueryType.MATCH_PHRASE

+    def to_dict(self) -> dict:
+        return {
+            "match_phrase": {
+                self.column: self.query,
+            }
+        }
+

-@pydantic.dataclasses.dataclass
 class BoostQuery(FullTextQuery):
-    """
-    Boost query for full-text search.
-
-    Parameters
-    ----------
-    positive : dict
-        The positive query object.
-    negative : dict
-        The negative query object.
-    negative_boost : float, default 0.5
-        The boost factor for the negative query.
-    """
-
    positive: FullTextQuery
    negative: FullTextQuery
-    negative_boost: float = pydantic.Field(0.5, kw_only=True)
+    negative_boost: float = 0.5
+
+    def __init__(
+        self,
+        positive: FullTextQuery,
+        negative: FullTextQuery,
+        *,
+        negative_boost: float = 0.5,
+    ):
+        """
+        Boost query for full-text search.
+
+        Parameters
+        ----------
+        positive : dict
+            The positive query object.
+        negative : dict
+            The negative query object.
+        negative_boost : float
+            The boost factor for the negative query.
+        """
+        super().__init__(
+            positive=positive, negative=negative, negative_boost=negative_boost
+        )

    def query_type(self) -> FullTextQueryType:
        return FullTextQueryType.BOOST

+    def to_dict(self) -> dict:
+        return {
+            "boost": {
+                "positive": self.positive.to_dict(),
+                "negative": self.negative.to_dict(),
+                "negative_boost": self.negative_boost,
+            }
+        }
+

-@pydantic.dataclasses.dataclass
 class MultiMatchQuery(FullTextQuery):
-    """
-    Multi-match query for full-text search.
-
-    Parameters
-    ----------
-    query : str | list[Query]
-        If a string, the query string to match against.
-    columns : list[str]
-        The list of columns to match against.
-    boosts : list[float], optional
-        The list of boost factors for each column. If not provided,
-        all columns will have the same boost factor.
-    operator : FullTextOperator, default OR
-        The operator to use for combining the query results.
-        Can be either `AND` or `OR`.
-        It would be applied to all columns individually.
-        For example, if the operator is `AND`,
-        then the query "hello world" is equal to
-        `match("hello AND world", column1) OR match("hello AND world", column2)`.
-    """
-
    query: str
    columns: list[str]
-    boosts: Optional[list[float]] = pydantic.Field(None, kw_only=True)
-    operator: FullTextOperator = pydantic.Field(FullTextOperator.OR, kw_only=True)
+    boosts: list[float]
+
+    def __init__(
+        self,
+        query: str,
+        columns: list[str],
+        *,
+        boosts: Optional[list[float]] = None,
+    ):
+        """
+        Multi-match query for full-text search.
+
+        Parameters
+        ----------
+        query : str
+            The query string to match against.
+
+        columns : list[str]
+            The list of columns to match against.
+
+        boosts : list[float], optional
+            The list of boost factors for each column. If not provided,
+            all columns will have the same boost factor.
+        """
+        if boosts is None:
+            boosts = [1.0] * len(columns)
+        super().__init__(query=query, columns=columns, boosts=boosts)

    def query_type(self) -> FullTextQueryType:
        return FullTextQueryType.MULTI_MATCH

-
-@pydantic.dataclasses.dataclass
-class BooleanQuery(FullTextQuery):
-    """
-    Boolean query for full-text search.
-
-    Parameters
-    ----------
-    queries : list[tuple(Occur, FullTextQuery)]
-        The list of queries with their occurrence requirements.
-    """
-
-    queries: list[tuple[Occur, FullTextQuery]]
-
-    def query_type(self) -> FullTextQueryType:
-        return FullTextQueryType.BOOLEAN
+    def to_dict(self) -> dict:
+        return {
+            "multi_match": {
+                "query": self.query,
+                "columns": self.columns,
+                "boost": self.boosts,
+            }
+        }


 class FullTextSearchQuery(pydantic.BaseModel):
@@ -444,18 +445,8 @@ class Query(pydantic.BaseModel):
    # which columns to return in the results
    columns: Optional[Union[List[str], Dict[str, str]]] = None

-    # minimum number of IVF partitions to search
-    #
-    # If None then a default value (20) will be used.
-    minimum_nprobes: Optional[int] = None
-
-    # maximum number of IVF partitions to search
-    #
-    # If None then a default value (20) will be used.
-    #
-    # If 0 then no limit will be applied and all partitions could be searched
-    # if needed to satisfy the limit.
-    maximum_nprobes: Optional[int] = None
+    # number of IVF partitions to search
+    nprobes: Optional[int] = None

    # lower bound for distance search
    lower_bound: Optional[float] = None
@@ -493,8 +484,7 @@ class Query(pydantic.BaseModel):
        query.vector_column = req.column
        query.vector = req.query_vector
        query.distance_type = req.distance_type
-        query.minimum_nprobes = req.minimum_nprobes
-        query.maximum_nprobes = req.maximum_nprobes
+        query.nprobes = req.nprobes
        query.lower_bound = req.lower_bound
        query.upper_bound = req.upper_bound
        query.ef = req.ef
@@ -503,8 +493,10 @@ class Query(pydantic.BaseModel):
        query.postfilter = req.postfilter
        if req.full_text_search is not None:
            query.full_text_query = FullTextSearchQuery(
-                columns=None,
-                query=req.full_text_search,
+                columns=req.full_text_search.columns,
+                query=req.full_text_search.query,
+                limit=req.full_text_search.limit,
+                wand_factor=req.full_text_search.wand_factor,
            )
        return query

@@ -748,8 +740,8 @@ class LanceQueryBuilder(ABC):
        return self.to_arrow(timeout=timeout).to_pylist()

    def to_pydantic(
-        self, model: type[T], *, timeout: Optional[timedelta] = None
-    ) -> list[T]:
+        self, model: Type[LanceModel], *, timeout: Optional[timedelta] = None
+    ) -> List[LanceModel]:
        """Return the table as a list of pydantic models.

        Parameters
@@ -908,11 +900,11 @@ class LanceQueryBuilder(ABC):
        >>> plan = table.search(query).explain_plan(True)
        >>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
-          GlobalLimitExec: skip=0, fetch=10
-            FilterExec: _distance@2 IS NOT NULL
-              SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
-                KNNVectorDistance: metric=l2
-                  LanceRead: uri=..., projection=[vector], ...
+        GlobalLimitExec: skip=0, fetch=10
+          FilterExec: _distance@2 IS NOT NULL
+            SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
+              KNNVectorDistance: metric=l2
+                LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false

        Parameters
        ----------
@@ -942,19 +934,19 @@ class LanceQueryBuilder(ABC):
        >>> plan = table.search(query).analyze_plan()
        >>> print(plan)  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        AnalyzeExec verbose=true, metrics=[]
-          TracedExec, metrics=[]
-            ProjectionExec: expr=[...], metrics=[...]
-              GlobalLimitExec: skip=0, fetch=10, metrics=[...]
-                FilterExec: _distance@2 IS NOT NULL,
-                metrics=[output_rows=..., elapsed_compute=...]
-                  SortExec: TopK(fetch=10), expr=[...],
-                  preserve_partitioning=[...],
-                  metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
-                    KNNVectorDistance: metric=l2,
-                    metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
-                      LanceRead: uri=..., projection=[vector], ...
-                      metrics=[output_rows=..., elapsed_compute=...,
-                      bytes_read=..., iops=..., requests=...]
+          ProjectionExec: expr=[...], metrics=[...]
+            GlobalLimitExec: skip=0, fetch=10, metrics=[...]
+              FilterExec: _distance@2 IS NOT NULL,
+              metrics=[output_rows=..., elapsed_compute=...]
+                SortExec: TopK(fetch=10), expr=[...],
+                preserve_partitioning=[...],
+                metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
+                  KNNVectorDistance: metric=l2,
+                  metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
+                    LanceScan: uri=..., projection=[vector], row_id=true,
+                    row_addr=false, ordered=false,
+                    metrics=[output_rows=..., elapsed_compute=...,
+                    bytes_read=..., iops=..., requests=...]

        Returns
        -------
@@ -1055,8 +1047,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        super().__init__(table)
        self._query = query
        self._distance_type = None
-        self._minimum_nprobes = None
-        self._maximum_nprobes = None
+        self._nprobes = None
        self._lower_bound = None
        self._upper_bound = None
        self._refine_factor = None
@@ -1119,10 +1110,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        See discussion in [Querying an ANN Index][querying-an-ann-index] for
        tuning advice.

-        This method sets both the minimum and maximum number of probes to the same
-        value. See `minimum_nprobes` and `maximum_nprobes` for more fine-grained
-        control.
-
        Parameters
        ----------
        nprobes: int
@@ -1133,36 +1120,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        LanceVectorQueryBuilder
            The LanceQueryBuilder object.
        """
-        self._minimum_nprobes = nprobes
-        self._maximum_nprobes = nprobes
-        return self
-
-    def minimum_nprobes(self, minimum_nprobes: int) -> LanceVectorQueryBuilder:
-        """Set the minimum number of probes to use.
-
-        See `nprobes` for more details.
-
-        These partitions will be searched on every vector query and will increase recall
-        at the expense of latency.
-        """
-        self._minimum_nprobes = minimum_nprobes
-        return self
-
-    def maximum_nprobes(self, maximum_nprobes: int) -> LanceVectorQueryBuilder:
-        """Set the maximum number of probes to use.
-
-        See `nprobes` for more details.
-
-        If this value is greater than `minimum_nprobes` then the excess partitions
-        will be searched only if we have not found enough results.
-
-        This can be useful when there is a narrow filter to allow these queries to
-        spend more time searching and avoid potential false negatives.
-
-        If this value is 0 then no limit will be applied and all partitions could be
-        searched if needed to satisfy the limit.
-        """
-        self._maximum_nprobes = maximum_nprobes
+        self._nprobes = nprobes
        return self

    def distance_range(
@@ -1266,8 +1224,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
            limit=self._limit,
            distance_type=self._distance_type,
            columns=self._columns,
-            minimum_nprobes=self._minimum_nprobes,
-            maximum_nprobes=self._maximum_nprobes,
+            nprobes=self._nprobes,
            lower_bound=self._lower_bound,
            upper_bound=self._upper_bound,
            refine_factor=self._refine_factor,
@@ -1376,8 +1333,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        if query_string is not None and not isinstance(query_string, str):
            raise ValueError("Reranking currently only supports string queries")
        self._str_query = query_string if query_string is not None else self._str_query
-        if reranker.score == "all":
-            self.with_row_id(True)
        return self

    def bypass_vector_index(self) -> LanceVectorQueryBuilder:
@@ -1455,13 +1410,10 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):

        query = self._query
        if self._phrase_query:
-            if isinstance(query, str):
-                if not query.startswith('"') or not query.endswith('"'):
-                    query = f'"{query}"'
-            elif isinstance(query, FullTextQuery) and not isinstance(
-                query, PhraseQuery
-            ):
-                raise TypeError("Please use PhraseQuery for phrase queries.")
+            raise NotImplementedError(
+                "Phrase query is not yet supported in Lance FTS. "
+                "Use tantivy-based index instead for now."
+            )
        query = self.to_query_object()
        results = self._table._execute_query(query, timeout=timeout)
        results = results.read_all()
@@ -1573,8 +1525,6 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
            The LanceQueryBuilder object.
        """
        self._reranker = reranker
-        if reranker.score == "all":
-            self.with_row_id(True)
        return self


@@ -1638,8 +1588,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        self._fts_columns = fts_columns
        self._norm = None
        self._reranker = None
-        self._minimum_nprobes = None
-        self._maximum_nprobes = None
+        self._nprobes = None
        self._refine_factor = None
        self._distance_type = None
        self._phrase_query = None
@@ -1851,8 +1800,6 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):

        self._norm = normalize
        self._reranker = reranker
-        if reranker.score == "all":
-            self.with_row_id(True)

        return self

@@ -1873,24 +1820,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        LanceHybridQueryBuilder
            The LanceHybridQueryBuilder object.
        """
-        self._minimum_nprobes = nprobes
-        self._maximum_nprobes = nprobes
-        return self
-
-    def minimum_nprobes(self, minimum_nprobes: int) -> LanceHybridQueryBuilder:
-        """Set the minimum number of probes to use.
-
-        See `nprobes` for more details.
-        """
-        self._minimum_nprobes = minimum_nprobes
-        return self
-
-    def maximum_nprobes(self, maximum_nprobes: int) -> LanceHybridQueryBuilder:
-        """Set the maximum number of probes to use.
-
-        See `nprobes` for more details.
-        """
-        self._maximum_nprobes = maximum_nprobes
+        self._nprobes = nprobes
        return self

    def distance_range(
@@ -2045,7 +1975,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
          FilterExec: _distance@2 IS NOT NULL
            SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
              KNNVectorDistance: metric=l2
-                LanceRead: uri=..., projection=[vector], ...
+                LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false

        Parameters
        ----------
@@ -2119,10 +2049,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
            self._fts_query.phrase_query(True)
        if self._distance_type:
            self._vector_query.metric(self._distance_type)
-        if self._minimum_nprobes:
-            self._vector_query.minimum_nprobes(self._minimum_nprobes)
-        if self._maximum_nprobes is not None:
-            self._vector_query.maximum_nprobes(self._maximum_nprobes)
+        if self._nprobes:
+            self._vector_query.nprobes(self._nprobes)
        if self._refine_factor:
            self._vector_query.refine_factor(self._refine_factor)
        if self._ef:
@@ -2431,7 +2359,7 @@ class AsyncQueryBase(object):
            FilterExec: _distance@2 IS NOT NULL
              SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
                KNNVectorDistance: metric=l2
-                  LanceRead: uri=..., projection=[vector], ...
+                  LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false

        Parameters
        ----------
@@ -2585,7 +2513,7 @@ class AsyncQuery(AsyncQueryBase):
                self._inner.nearest_to_text({"query": query, "columns": columns})
            )
        # FullTextQuery object
-        return AsyncFTSQuery(self._inner.nearest_to_text({"query": query}))
+        return AsyncFTSQuery(self._inner.nearest_to_text({"query": query.to_dict()}))


 class AsyncFTSQuery(AsyncQueryBase):
@@ -2733,34 +2661,6 @@ class AsyncVectorQueryBase:
        self._inner.nprobes(nprobes)
        return self

-    def minimum_nprobes(self, minimum_nprobes: int) -> Self:
-        """Set the minimum number of probes to use.
-
-        See `nprobes` for more details.
-
-        These partitions will be searched on every indexed vector query and will
-        increase recall at the expense of latency.
-        """
-        self._inner.minimum_nprobes(minimum_nprobes)
-        return self
-
-    def maximum_nprobes(self, maximum_nprobes: int) -> Self:
-        """Set the maximum number of probes to use.
-
-        See `nprobes` for more details.
-
-        If this value is greater than `minimum_nprobes` then the excess partitions
-        will be searched only if we have not found enough results.
-
-        This can be useful when there is a narrow filter to allow these queries to
-        spend more time searching and avoid potential false negatives.
-
-        If this value is 0 then no limit will be applied and all partitions could be
-        searched if needed to satisfy the limit.
-        """
-        self._inner.maximum_nprobes(maximum_nprobes)
-        return self
-
    def distance_range(
        self, lower_bound: Optional[float] = None, upper_bound: Optional[float] = None
    ) -> Self:
@@ -2935,7 +2835,7 @@ class AsyncVectorQuery(AsyncQueryBase, AsyncVectorQueryBase):
                self._inner.nearest_to_text({"query": query, "columns": columns})
            )
        # FullTextQuery object
-        return AsyncHybridQuery(self._inner.nearest_to_text({"query": query}))
+        return AsyncHybridQuery(self._inner.nearest_to_text({"query": query.to_dict()}))

    async def to_batches(
        self,
@@ -3050,21 +2950,15 @@ class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
        >>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
        Vector Search Plan:
        ProjectionExec: expr=[vector@0 as vector, text@3 as text, _distance@2 as _distance]
-          Take: columns="vector, _rowid, _distance, (text)"
-            CoalesceBatchesExec: target_batch_size=1024
-              GlobalLimitExec: skip=0, fetch=10
-                FilterExec: _distance@2 IS NOT NULL
-                  SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
-                    KNNVectorDistance: metric=l2
-                      LanceRead: uri=..., projection=[vector], ...
-        <BLANKLINE>
+            Take: columns="vector, _rowid, _distance, (text)"
+                CoalesceBatchesExec: target_batch_size=1024
+                GlobalLimitExec: skip=0, fetch=10
+                    FilterExec: _distance@2 IS NOT NULL
+                    SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
+                        KNNVectorDistance: metric=l2
+                        LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
        FTS Search Plan:
-        ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
-          Take: columns="_rowid, _score, (vector), (text)"
-            CoalesceBatchesExec: target_batch_size=1024
-              GlobalLimitExec: skip=0, fetch=10
-                MatchQuery: query=hello
-        <BLANKLINE>
+        LanceScan: uri=..., projection=[vector, text], row_id=false, row_addr=false, ordered=true

        Parameters
        ----------
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -18,7 +18,7 @@ from lancedb._lancedb import (
    UpdateResult,
 )
 from lancedb.embeddings.base import EmbeddingFunctionConfig
-from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, LabelList
+from lancedb.index import FTS, BTree, Bitmap, HnswPq, HnswSq, IvfFlat, IvfPq, LabelList
 from lancedb.remote.db import LOOP
 import pyarrow as pa

@@ -89,7 +89,7 @@ class RemoteTable(Table):

    def to_pandas(self):
        """to_pandas() is not yet supported on LanceDB cloud."""
-        raise NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
+        return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")

    def checkout(self, version: Union[int, str]):
        return LOOP.run(self._table.checkout(version))
@@ -158,9 +158,6 @@ class RemoteTable(Table):
        stem: bool = True,
        remove_stop_words: bool = True,
        ascii_folding: bool = True,
-        ngram_min_length: int = 3,
-        ngram_max_length: int = 3,
-        prefix_only: bool = False,
    ):
        config = FTS(
            with_position=with_position,
@@ -171,9 +168,6 @@ class RemoteTable(Table):
            stem=stem,
            remove_stop_words=remove_stop_words,
            ascii_folding=ascii_folding,
-            ngram_min_length=ngram_min_length,
-            ngram_max_length=ngram_max_length,
-            prefix_only=prefix_only,
        )
        LOOP.run(
            self._table.create_index(
@@ -192,8 +186,6 @@ class RemoteTable(Table):
        accelerator: Optional[str] = None,
        index_type="vector",
        wait_timeout: Optional[timedelta] = None,
-        *,
-        num_bits: int = 8,
    ):
        """Create an index on the table.
        Currently, the only parameters that matter are
@@ -228,6 +220,11 @@ class RemoteTable(Table):
        >>> table.create_index("l2", "vector") # doctest: +SKIP
        """

+        if num_partitions is not None:
+            logging.warning(
+                "num_partitions is not supported on LanceDB cloud."
+                "This parameter will be tuned automatically."
+            )
        if num_sub_vectors is not None:
            logging.warning(
                "num_sub_vectors is not supported on LanceDB cloud."
@@ -247,21 +244,13 @@ class RemoteTable(Table):

        index_type = index_type.upper()
        if index_type == "VECTOR" or index_type == "IVF_PQ":
-            config = IvfPq(
-                distance_type=metric,
-                num_partitions=num_partitions,
-                num_sub_vectors=num_sub_vectors,
-                num_bits=num_bits,
-            )
+            config = IvfPq(distance_type=metric)
        elif index_type == "IVF_HNSW_PQ":
-            raise ValueError(
-                "IVF_HNSW_PQ is not supported on LanceDB cloud."
-                "Please use IVF_HNSW_SQ instead."
-            )
+            config = HnswPq(distance_type=metric)
        elif index_type == "IVF_HNSW_SQ":
-            config = HnswSq(distance_type=metric, num_partitions=num_partitions)
+            config = HnswSq(distance_type=metric)
        elif index_type == "IVF_FLAT":
-            config = IvfFlat(distance_type=metric, num_partitions=num_partitions)
+            config = IvfFlat(distance_type=metric)
        else:
            raise ValueError(
                f"Unknown vector index type: {index_type}. Valid options are"
--- a/python/python/lancedb/rerankers/answerdotai.py
+++ b/python/python/lancedb/rerankers/answerdotai.py
@@ -74,7 +74,9 @@ class AnswerdotaiRerankers(Reranker):
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
        elif self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
+            raise NotImplementedError(
+                "Answerdotai Reranker does not support score='all' yet"
+            )
        combined_results = combined_results.sort_by(
            [("_relevance_score", "descending")]
        )
--- a/python/python/lancedb/rerankers/base.py
+++ b/python/python/lancedb/rerankers/base.py
@@ -232,39 +232,6 @@ class Reranker(ABC):

        return deduped_table

-    def _merge_and_keep_scores(self, vector_results: pa.Table, fts_results: pa.Table):
-        """
-        Merge the results from the vector and FTS search and keep the scores.
-        This op is slower than just keeping relevance score but can be useful
-        for debugging.
-        """
-        # add nulls to fts results for _distance
-        if "_distance" not in fts_results.column_names:
-            fts_results = fts_results.append_column(
-                "_distance",
-                pa.array([None] * len(fts_results), type=pa.float32()),
-            )
-        # add nulls to vector results for _score
-        if "_score" not in vector_results.column_names:
-            vector_results = vector_results.append_column(
-                "_score",
-                pa.array([None] * len(vector_results), type=pa.float32()),
-            )
-
-        # combine them and fill the scores
-        vector_results_dict = {row["_rowid"]: row for row in vector_results.to_pylist()}
-        fts_results_dict = {row["_rowid"]: row for row in fts_results.to_pylist()}
-
-        # merge them into vector_results
-        for key, value in fts_results_dict.items():
-            if key in vector_results_dict:
-                vector_results_dict[key]["_score"] = value["_score"]
-            else:
-                vector_results_dict[key] = value
-
-        combined = pa.Table.from_pylist(list(vector_results_dict.values()))
-        return combined
-
    def _keep_relevance_score(self, combined_results: pa.Table):
        if self.score == "relevance":
            if "_score" in combined_results.column_names:
--- a/python/python/lancedb/rerankers/cohere.py
+++ b/python/python/lancedb/rerankers/cohere.py
@@ -92,14 +92,14 @@ class CohereReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for cohere reranker"
+            )
        return combined_results

    def rerank_vector(self, query: str, vector_results: pa.Table):
--- a/python/python/lancedb/rerankers/cross_encoder.py
+++ b/python/python/lancedb/rerankers/cross_encoder.py
@@ -81,15 +81,15 @@ class CrossEncoderReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        # sort the results by _score
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for CrossEncoderReranker"
+            )
        combined_results = combined_results.sort_by(
            [("_relevance_score", "descending")]
        )
--- a/python/python/lancedb/rerankers/jinaai.py
+++ b/python/python/lancedb/rerankers/jinaai.py
@@ -97,14 +97,14 @@ class JinaReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for JinaReranker"
+            )
        return combined_results

    def rerank_vector(self, query: str, vector_results: pa.Table):
--- a/python/python/lancedb/rerankers/openai.py
+++ b/python/python/lancedb/rerankers/openai.py
@@ -88,13 +88,14 @@ class OpenaiReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
+        elif self.score == "all":
+            raise NotImplementedError(
+                "OpenAI Reranker does not support score='all' yet"
+            )

        combined_results = combined_results.sort_by(
            [("_relevance_score", "descending")]
--- a/python/python/lancedb/rerankers/voyageai.py
+++ b/python/python/lancedb/rerankers/voyageai.py
@@ -94,14 +94,14 @@ class VoyageAIReranker(Reranker):
        vector_results: pa.Table,
        fts_results: pa.Table,
    ):
-        if self.score == "all":
-            combined_results = self._merge_and_keep_scores(vector_results, fts_results)
-        else:
-            combined_results = self.merge_results(vector_results, fts_results)
+        combined_results = self.merge_results(vector_results, fts_results)
        combined_results = self._rerank(combined_results, query)
        if self.score == "relevance":
            combined_results = self._keep_relevance_score(combined_results)
-
+        elif self.score == "all":
+            raise NotImplementedError(
+                "return_score='all' not implemented for voyageai reranker"
+            )
        return combined_results

    def rerank_vector(self, query: str, vector_results: pa.Table):
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -102,9 +102,7 @@ if TYPE_CHECKING:
    )


-def _into_pyarrow_reader(
-    data, schema: Optional[pa.Schema] = None
-) -> pa.RecordBatchReader:
+def _into_pyarrow_reader(data) -> pa.RecordBatchReader:
    from lancedb.dependencies import datasets

    if _check_for_hugging_face(data):
@@ -125,12 +123,6 @@ def _into_pyarrow_reader(
        raise ValueError("Cannot add a single dictionary to a table. Use a list.")

    if isinstance(data, list):
-        # Handle empty list case
-        if not data:
-            if schema is None:
-                raise ValueError("Cannot create table from empty list without a schema")
-            return pa.Table.from_pylist(data, schema=schema).to_reader()
-
        # convert to list of dict if data is a bunch of LanceModels
        if isinstance(data[0], LanceModel):
            schema = data[0].__class__.to_arrow_schema()
@@ -173,9 +165,9 @@ def _into_pyarrow_reader(
    else:
        raise TypeError(
            f"Unknown data type {type(data)}. "
-            "Supported types: list of dicts, pandas DataFrame, polars DataFrame, "
-            "pyarrow Table/RecordBatch, or Pydantic models. "
-            "See https://lancedb.github.io/lancedb/guides/tables/ for examples."
+            "Please check "
+            "https://lancedb.github.io/lancedb/python/python/ "
+            "to see supported types."
        )


@@ -244,7 +236,7 @@ def _sanitize_data(
    # 1. There might be embedding columns missing that will be added
    #    in the add_embeddings step.
    # 2. If `allow_subschemas` is True, there might be columns missing.
-    reader = _into_pyarrow_reader(data, target_schema)
+    reader = _into_pyarrow_reader(data)

    reader = _append_vector_columns(reader, target_schema, metadata=metadata)

@@ -835,7 +827,7 @@ class Table(ABC):
        ordering_field_names: Optional[Union[str, List[str]]] = None,
        replace: bool = False,
        writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
-        use_tantivy: bool = False,
+        use_tantivy: bool = True,
        tokenizer_name: Optional[str] = None,
        with_position: bool = False,
        # tokenizer configs:
@@ -846,9 +838,6 @@ class Table(ABC):
        stem: bool = True,
        remove_stop_words: bool = True,
        ascii_folding: bool = True,
-        ngram_min_length: int = 3,
-        ngram_max_length: int = 3,
-        prefix_only: bool = False,
        wait_timeout: Optional[timedelta] = None,
    ):
        """Create a full-text search index on the table.
@@ -875,7 +864,7 @@ class Table(ABC):
            The tokenizer to use for the index. Can be "raw", "default" or the 2 letter
            language code followed by "_stem". So for english it would be "en_stem".
            For available languages see: https://docs.rs/tantivy/latest/tantivy/tokenizer/enum.Language.html
-        use_tantivy: bool, default False
+        use_tantivy: bool, default True
            If True, use the legacy full-text search implementation based on tantivy.
            If False, use the new full-text search implementation based on lance-index.
        with_position: bool, default False
@@ -888,7 +877,6 @@ class Table(ABC):
            - "simple": Splits text by whitespace and punctuation.
            - "whitespace": Split text by whitespace, but not punctuation.
            - "raw": No tokenization. The entire text is treated as a single token.
-            - "ngram": N-Gram tokenizer.
        language : str, default "English"
            The language to use for tokenization.
        max_token_length : int, default 40
@@ -906,12 +894,6 @@ class Table(ABC):
        ascii_folding : bool, default True
            Whether to fold ASCII characters. This converts accented characters to
            their ASCII equivalent. For example, "café" would be converted to "cafe".
-        ngram_min_length: int, default 3
-            The minimum length of an n-gram.
-        ngram_max_length: int, default 3
-            The maximum length of an n-gram.
-        prefix_only: bool, default False
-            Whether to only index the prefix of the token for ngram tokenizer.
        wait_timeout: timedelta, optional
            The timeout to wait if indexing is asynchronous.
        """
@@ -1988,7 +1970,7 @@ class LanceTable(Table):
        ordering_field_names: Optional[Union[str, List[str]]] = None,
        replace: bool = False,
        writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
-        use_tantivy: bool = False,
+        use_tantivy: bool = True,
        tokenizer_name: Optional[str] = None,
        with_position: bool = False,
        # tokenizer configs:
@@ -1999,9 +1981,6 @@ class LanceTable(Table):
        stem: bool = True,
        remove_stop_words: bool = True,
        ascii_folding: bool = True,
-        ngram_min_length: int = 3,
-        ngram_max_length: int = 3,
-        prefix_only: bool = False,
    ):
        if not use_tantivy:
            if not isinstance(field_names, str):
@@ -2017,9 +1996,6 @@ class LanceTable(Table):
                    "stem": stem,
                    "remove_stop_words": remove_stop_words,
                    "ascii_folding": ascii_folding,
-                    "ngram_min_length": ngram_min_length,
-                    "ngram_max_length": ngram_max_length,
-                    "prefix_only": prefix_only,
                }
            else:
                tokenizer_configs = self.infer_tokenizer_configs(tokenizer_name)
@@ -2089,9 +2065,6 @@ class LanceTable(Table):
                "stem": False,
                "remove_stop_words": False,
                "ascii_folding": False,
-                "ngram_min_length": 3,
-                "ngram_max_length": 3,
-                "prefix_only": False,
            }
        elif tokenizer_name == "raw":
            return {
@@ -2102,9 +2075,6 @@ class LanceTable(Table):
                "stem": False,
                "remove_stop_words": False,
                "ascii_folding": False,
-                "ngram_min_length": 3,
-                "ngram_max_length": 3,
-                "prefix_only": False,
            }
        elif tokenizer_name == "whitespace":
            return {
@@ -2115,9 +2085,6 @@ class LanceTable(Table):
                "stem": False,
                "remove_stop_words": False,
                "ascii_folding": False,
-                "ngram_min_length": 3,
-                "ngram_max_length": 3,
-                "prefix_only": False,
            }

        # or it's with language stemming with pattern like "en_stem"
@@ -2136,9 +2103,6 @@ class LanceTable(Table):
            "stem": True,
            "remove_stop_words": False,
            "ascii_folding": False,
-            "ngram_min_length": 3,
-            "ngram_max_length": 3,
-            "prefix_only": False,
        }

    def add(
@@ -3673,10 +3637,8 @@ class AsyncTable:
            )
            if query.distance_type is not None:
                async_query = async_query.distance_type(query.distance_type)
-            if query.minimum_nprobes is not None:
-                async_query = async_query.minimum_nprobes(query.minimum_nprobes)
-            if query.maximum_nprobes is not None:
-                async_query = async_query.maximum_nprobes(query.maximum_nprobes)
+            if query.nprobes is not None:
+                async_query = async_query.nprobes(query.nprobes)
            if query.refine_factor is not None:
                async_query = async_query.refine_factor(query.refine_factor)
            if query.vector_column:
--- a/python/python/lancedb/types.py
+++ b/python/python/lancedb/types.py
@@ -25,4 +25,4 @@ IndexType = Literal[
 ]

 # Tokenizer literals
-BaseTokenizerType = Literal["simple", "raw", "whitespace", "ngram"]
+BaseTokenizerType = Literal["simple", "raw", "whitespace"]
--- a/python/python/tests/docs/test_search.py
+++ b/python/python/tests/docs/test_search.py
@@ -6,7 +6,7 @@ import lancedb

 # --8<-- [end:import-lancedb]
 # --8<-- [start:import-numpy]
-from lancedb.query import BooleanQuery, BoostQuery, MatchQuery, Occur
+from lancedb.query import BoostQuery, MatchQuery
 import numpy as np
 import pyarrow as pa

@@ -191,15 +191,6 @@ def test_fts_fuzzy_query():
        "food",  # 1 insertion
    }

-    results = table.search(
-        MatchQuery("foo", "text", fuzziness=1, prefix_length=3)
-    ).to_pandas()
-    assert len(results) == 2
-    assert set(results["text"].to_list()) == {
-        "foo",
-        "food",
-    }
-

@pytest.mark.skipif(
    os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
@@ -249,60 +240,6 @@ def test_fts_boost_query():
    )


-@pytest.mark.skipif(
-    os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
-)
-def test_fts_boolean_query(tmp_path):
-    uri = tmp_path / "boolean-example"
-    db = lancedb.connect(uri)
-    table = db.create_table(
-        "my_table_fts_boolean",
-        data=[
-            {"text": "The cat and dog are playing"},
-            {"text": "The cat is sleeping"},
-            {"text": "The dog is barking"},
-            {"text": "The dog chases the cat"},
-        ],
-        mode="overwrite",
-    )
-    table.create_fts_index("text", use_tantivy=False, replace=True)
-
-    # SHOULD
-    results = table.search(
-        MatchQuery("cat", "text") | MatchQuery("dog", "text")
-    ).to_pandas()
-    assert len(results) == 4
-    assert set(results["text"].to_list()) == {
-        "The cat and dog are playing",
-        "The cat is sleeping",
-        "The dog is barking",
-        "The dog chases the cat",
-    }
-    # MUST
-    results = table.search(
-        MatchQuery("cat", "text") & MatchQuery("dog", "text")
-    ).to_pandas()
-    assert len(results) == 2
-    assert set(results["text"].to_list()) == {
-        "The cat and dog are playing",
-        "The dog chases the cat",
-    }
-
-    # MUST NOT
-    results = table.search(
-        BooleanQuery(
-            [
-                (Occur.MUST, MatchQuery("cat", "text")),
-                (Occur.MUST_NOT, MatchQuery("dog", "text")),
-            ]
-        )
-    ).to_pandas()
-    assert len(results) == 1
-    assert set(results["text"].to_list()) == {
-        "The cat is sleeping",
-    }
-
-
@pytest.mark.skipif(
    os.name == "nt", reason="Need to fix https://github.com/lancedb/lance/issues/3905"
 )
--- a/python/python/tests/test_fts.py
+++ b/python/python/tests/test_fts.py
@@ -33,11 +33,8 @@ tantivy = pytest.importorskip("tantivy")

@pytest.fixture
 def table(tmp_path) -> ldb.table.LanceTable:
-    # Use local random state to avoid affecting other tests
-    rng = np.random.RandomState(42)
-    local_random = random.Random(42)
    db = ldb.connect(tmp_path)
-    vectors = [rng.randn(128) for _ in range(100)]
+    vectors = [np.random.randn(128) for _ in range(100)]

    text_nouns = ("puppy", "car")
    text2_nouns = ("rabbit", "girl", "monkey")
@@ -47,10 +44,10 @@ def table(tmp_path) -> ldb.table.LanceTable:
    text = [
        " ".join(
            [
-                text_nouns[local_random.randrange(0, len(text_nouns))],
-                verbs[local_random.randrange(0, 5)],
-                adv[local_random.randrange(0, 5)],
-                adj[local_random.randrange(0, 5)],
+                text_nouns[random.randrange(0, len(text_nouns))],
+                verbs[random.randrange(0, 5)],
+                adv[random.randrange(0, 5)],
+                adj[random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
@@ -58,15 +55,15 @@ def table(tmp_path) -> ldb.table.LanceTable:
    text2 = [
        " ".join(
            [
-                text2_nouns[local_random.randrange(0, len(text2_nouns))],
-                verbs[local_random.randrange(0, 5)],
-                adv[local_random.randrange(0, 5)],
-                adj[local_random.randrange(0, 5)],
+                text2_nouns[random.randrange(0, len(text2_nouns))],
+                verbs[random.randrange(0, 5)],
+                adv[random.randrange(0, 5)],
+                adj[random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
    ]
-    count = [local_random.randint(1, 10000) for _ in range(100)]
+    count = [random.randint(1, 10000) for _ in range(100)]
    table = db.create_table(
        "test",
        data=pd.DataFrame(
@@ -85,11 +82,8 @@ def table(tmp_path) -> ldb.table.LanceTable:

@pytest.fixture
 async def async_table(tmp_path) -> ldb.table.AsyncTable:
-    # Use local random state to avoid affecting other tests
-    rng = np.random.RandomState(42)
-    local_random = random.Random(42)
    db = await ldb.connect_async(tmp_path)
-    vectors = [rng.randn(128) for _ in range(100)]
+    vectors = [np.random.randn(128) for _ in range(100)]

    text_nouns = ("puppy", "car")
    text2_nouns = ("rabbit", "girl", "monkey")
@@ -99,10 +93,10 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
    text = [
        " ".join(
            [
-                text_nouns[local_random.randrange(0, len(text_nouns))],
-                verbs[local_random.randrange(0, 5)],
-                adv[local_random.randrange(0, 5)],
-                adj[local_random.randrange(0, 5)],
+                text_nouns[random.randrange(0, len(text_nouns))],
+                verbs[random.randrange(0, 5)],
+                adv[random.randrange(0, 5)],
+                adj[random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
@@ -110,15 +104,15 @@ async def async_table(tmp_path) -> ldb.table.AsyncTable:
    text2 = [
        " ".join(
            [
-                text2_nouns[local_random.randrange(0, len(text2_nouns))],
-                verbs[local_random.randrange(0, 5)],
-                adv[local_random.randrange(0, 5)],
-                adj[local_random.randrange(0, 5)],
+                text2_nouns[random.randrange(0, len(text2_nouns))],
+                verbs[random.randrange(0, 5)],
+                adv[random.randrange(0, 5)],
+                adj[random.randrange(0, 5)],
            ]
        )
        for _ in range(100)
    ]
-    count = [local_random.randint(1, 10000) for _ in range(100)]
+    count = [random.randint(1, 10000) for _ in range(100)]
    table = await db.create_table(
        "test",
        data=pd.DataFrame(
@@ -221,19 +215,6 @@ def test_search_fts(table, use_tantivy):
        assert len(results) == 5
        assert len(results[0]) == 3  # id, text, _score

-        # Test boolean query
-        results = (
-            table.search(MatchQuery("puppy", "text") & MatchQuery("runs", "text"))
-            .select(["id", "text"])
-            .limit(5)
-            .to_list()
-        )
-        assert len(results) == 5
-        assert len(results[0]) == 3  # id, text, _score
-        for r in results:
-            assert "puppy" in r["text"]
-            assert "runs" in r["text"]
-

@pytest.mark.asyncio
 async def test_fts_select_async(async_table):
@@ -675,46 +656,3 @@ def test_fts_on_list(mem_db: DBConnection):

    res = table.search(PhraseQuery("lance database", "text")).limit(5).to_list()
    assert len(res) == 2
-
-
-def test_fts_ngram(mem_db: DBConnection):
-    data = pa.table({"text": ["hello world", "lance database", "lance is cool"]})
-    table = mem_db.create_table("test", data=data)
-    table.create_fts_index("text", use_tantivy=False, base_tokenizer="ngram")
-
-    results = table.search("lan", query_type="fts").limit(10).to_list()
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
-
-    results = (
-        table.search("nce", query_type="fts").limit(10).to_list()
-    )  # spellchecker:disable-line
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
-
-    # the default min_ngram_length is 3, so "la" should not match
-    results = table.search("la", query_type="fts").limit(10).to_list()
-    assert len(results) == 0
-
-    # test setting min_ngram_length and prefix_only
-    table.create_fts_index(
-        "text",
-        use_tantivy=False,
-        base_tokenizer="ngram",
-        replace=True,
-        ngram_min_length=2,
-        prefix_only=True,
-    )
-
-    results = table.search("lan", query_type="fts").limit(10).to_list()
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
-
-    results = (
-        table.search("nce", query_type="fts").limit(10).to_list()
-    )  # spellchecker:disable-line
-    assert len(results) == 0
-
-    results = table.search("la", query_type="fts").limit(10).to_list()
-    assert len(results) == 2
-    assert set(r["text"] for r in results) == {"lance database", "lance is cool"}
--- a/python/python/tests/test_hybrid_query.py
+++ b/python/python/tests/test_hybrid_query.py
@@ -166,7 +166,7 @@ async def test_explain_plan(table: AsyncTable):
    assert "Vector Search Plan" in plan
    assert "KNNVectorDistance" in plan
    assert "FTS Search Plan" in plan
-    assert "LanceRead" in plan
+    assert "LanceScan" in plan


@pytest.mark.asyncio
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -25,8 +25,6 @@ from lancedb.query import (
    AsyncQueryBase,
    AsyncVectorQuery,
    LanceVectorQueryBuilder,
-    MatchQuery,
-    PhraseQuery,
    Query,
    FullTextSearchQuery,
 )
@@ -272,9 +270,7 @@ async def test_distance_range_with_new_rows_async():
    # append more rows so that execution plan would be mixed with ANN & Flat KNN
    new_data = pa.table(
        {
-            "vector": pa.FixedShapeTensorArray.from_numpy_ndarray(
-                np.random.rand(4, 2) + 1
-            ),
+            "vector": pa.FixedShapeTensorArray.from_numpy_ndarray(np.random.rand(4, 2)),
        }
    )
    await table.add(new_data)
@@ -441,33 +437,6 @@ def test_query_builder_with_filter(table):
    assert all(np.array(rs[0]["vector"]) == [3, 4])


-def test_invalid_nprobes_sync(table):
-    with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
-        LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(0).to_list()
-    with pytest.raises(
-        ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
-    ):
-        LanceVectorQueryBuilder(table, [0, 0], "vector").maximum_nprobes(5).to_list()
-    with pytest.raises(
-        ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
-    ):
-        LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(100).to_list()
-
-
-@pytest.mark.asyncio
-async def test_invalid_nprobes_async(table_async: AsyncTable):
-    with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
-        await table_async.vector_search([0, 0]).minimum_nprobes(0).to_list()
-    with pytest.raises(
-        ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
-    ):
-        await table_async.vector_search([0, 0]).maximum_nprobes(5).to_list()
-    with pytest.raises(
-        ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
-    ):
-        await table_async.vector_search([0, 0]).minimum_nprobes(100).to_list()
-
-
 def test_query_builder_with_prefilter(table):
    df = (
        LanceVectorQueryBuilder(table, [0, 0], "vector")
@@ -614,21 +583,6 @@ async def test_query_async(table_async: AsyncTable):
        table_async.query().nearest_to(pa.array([1, 2])).nprobes(10),
        expected_num_rows=2,
    )
-    await check_query(
-        table_async.query().nearest_to(pa.array([1, 2])).minimum_nprobes(10),
-        expected_num_rows=2,
-    )
-    await check_query(
-        table_async.query().nearest_to(pa.array([1, 2])).maximum_nprobes(30),
-        expected_num_rows=2,
-    )
-    await check_query(
-        table_async.query()
-        .nearest_to(pa.array([1, 2]))
-        .minimum_nprobes(10)
-        .maximum_nprobes(20),
-        expected_num_rows=2,
-    )
    await check_query(
        table_async.query().nearest_to(pa.array([1, 2])).bypass_vector_index(),
        expected_num_rows=2,
@@ -777,83 +731,6 @@ async def test_explain_plan_async(table_async: AsyncTable):
    assert "KNN" in plan


-@pytest.mark.asyncio
-async def test_explain_plan_fts(table_async: AsyncTable):
-    """Test explain plan for FTS queries"""
-    # Create FTS index
-    from lancedb.index import FTS
-
-    await table_async.create_index("text", config=FTS())
-
-    # Test pure FTS query
-    query = await table_async.search("dog", query_type="fts", fts_columns="text")
-    plan = await query.explain_plan()
-    # Should show FTS details (issue #2465 is now fixed)
-    assert "MatchQuery: query=dog" in plan
-    assert "GlobalLimitExec" in plan  # Default limit
-
-    # Test FTS query with limit
-    query_with_limit = await table_async.search(
-        "dog", query_type="fts", fts_columns="text"
-    )
-    plan_with_limit = await query_with_limit.limit(1).explain_plan()
-    assert "MatchQuery: query=dog" in plan_with_limit
-    assert "GlobalLimitExec: skip=0, fetch=1" in plan_with_limit
-
-    # Test FTS query with offset and limit
-    query_with_offset = await table_async.search(
-        "dog", query_type="fts", fts_columns="text"
-    )
-    plan_with_offset = await query_with_offset.offset(1).limit(1).explain_plan()
-    assert "MatchQuery: query=dog" in plan_with_offset
-    assert "GlobalLimitExec: skip=1, fetch=1" in plan_with_offset
-
-
-@pytest.mark.asyncio
-async def test_explain_plan_vector_with_limit_offset(table_async: AsyncTable):
-    """Test explain plan for vector queries with limit and offset"""
-    # Test vector query with limit
-    plan_with_limit = await (
-        table_async.query().nearest_to(pa.array([1, 2])).limit(1).explain_plan()
-    )
-    assert "KNN" in plan_with_limit
-    assert "GlobalLimitExec: skip=0, fetch=1" in plan_with_limit
-
-    # Test vector query with offset and limit
-    plan_with_offset = await (
-        table_async.query()
-        .nearest_to(pa.array([1, 2]))
-        .offset(1)
-        .limit(1)
-        .explain_plan()
-    )
-    assert "KNN" in plan_with_offset
-    assert "GlobalLimitExec: skip=1, fetch=1" in plan_with_offset
-
-
-@pytest.mark.asyncio
-async def test_explain_plan_with_filters(table_async: AsyncTable):
-    """Test explain plan for queries with filters"""
-    # Test vector query with filter
-    plan_with_filter = await (
-        table_async.query().nearest_to(pa.array([1, 2])).where("id = 1").explain_plan()
-    )
-    assert "KNN" in plan_with_filter
-    assert "LanceRead" in plan_with_filter
-
-    # Test FTS query with filter
-    from lancedb.index import FTS
-
-    await table_async.create_index("text", config=FTS())
-    query_fts_filter = await table_async.search(
-        "dog", query_type="fts", fts_columns="text"
-    )
-    plan_fts_filter = await query_fts_filter.where("id = 1").explain_plan()
-    assert "MatchQuery: query=dog" in plan_fts_filter
-    assert "LanceRead" in plan_fts_filter
-    assert "full_filter=id = Int64(1)" in plan_fts_filter  # Should show filter details
-
-
@pytest.mark.asyncio
 async def test_query_camelcase_async(tmp_path):
    db = await lancedb.connect_async(tmp_path)
@@ -1032,39 +909,7 @@ def test_query_serialization_sync(table: lancedb.table.Table):

    q = table.search([5.0, 6.0]).nprobes(10).refine_factor(5).to_query_object()
    check_set_props(
-        q,
-        vector_column="vector",
-        vector=[5.0, 6.0],
-        minimum_nprobes=10,
-        maximum_nprobes=10,
-        refine_factor=5,
-    )
-
-    q = table.search([5.0, 6.0]).minimum_nprobes(10).to_query_object()
-    check_set_props(
-        q,
-        vector_column="vector",
-        vector=[5.0, 6.0],
-        minimum_nprobes=10,
-        maximum_nprobes=None,
-    )
-
-    q = table.search([5.0, 6.0]).nprobes(50).to_query_object()
-    check_set_props(
-        q,
-        vector_column="vector",
-        vector=[5.0, 6.0],
-        minimum_nprobes=50,
-        maximum_nprobes=50,
-    )
-
-    q = table.search([5.0, 6.0]).maximum_nprobes(10).to_query_object()
-    check_set_props(
-        q,
-        vector_column="vector",
-        vector=[5.0, 6.0],
-        maximum_nprobes=10,
-        minimum_nprobes=None,
+        q, vector_column="vector", vector=[5.0, 6.0], nprobes=10, refine_factor=5
    )

    q = table.search([5.0, 6.0]).distance_range(0.0, 1.0).to_query_object()
@@ -1116,8 +961,7 @@ async def test_query_serialization_async(table_async: AsyncTable):
        limit=10,
        vector=sample_vector,
        postfilter=False,
-        minimum_nprobes=20,
-        maximum_nprobes=20,
+        nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
    )
@@ -1127,20 +971,7 @@ async def test_query_serialization_async(table_async: AsyncTable):
        q,
        vector=sample_vector,
        postfilter=False,
-        minimum_nprobes=20,
-        maximum_nprobes=20,
-        with_row_id=False,
-        bypass_vector_index=False,
-        limit=10,
-    )
-
-    q = (await table_async.search([5.0, 6.0])).nprobes(50).to_query_object()
-    check_set_props(
-        q,
-        vector=sample_vector,
-        postfilter=False,
-        minimum_nprobes=50,
-        maximum_nprobes=50,
+        nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
        limit=10,
@@ -1159,8 +990,7 @@ async def test_query_serialization_async(table_async: AsyncTable):
        filter="id = 1",
        postfilter=True,
        vector=sample_vector,
-        minimum_nprobes=20,
-        maximum_nprobes=20,
+        nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
    )
@@ -1174,8 +1004,7 @@ async def test_query_serialization_async(table_async: AsyncTable):
    check_set_props(
        q,
        vector=sample_vector,
-        minimum_nprobes=10,
-        maximum_nprobes=10,
+        nprobes=10,
        refine_factor=5,
        postfilter=False,
        with_row_id=False,
@@ -1183,18 +1012,6 @@ async def test_query_serialization_async(table_async: AsyncTable):
        limit=10,
    )

-    q = (await table_async.search([5.0, 6.0])).minimum_nprobes(5).to_query_object()
-    check_set_props(
-        q,
-        vector=sample_vector,
-        minimum_nprobes=5,
-        maximum_nprobes=20,
-        postfilter=False,
-        with_row_id=False,
-        bypass_vector_index=False,
-        limit=10,
-    )
-
    q = (
        (await table_async.search([5.0, 6.0]))
        .distance_range(0.0, 1.0)
@@ -1206,8 +1023,7 @@ async def test_query_serialization_async(table_async: AsyncTable):
        lower_bound=0.0,
        upper_bound=1.0,
        postfilter=False,
-        minimum_nprobes=20,
-        maximum_nprobes=20,
+        nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
        limit=10,
@@ -1219,8 +1035,7 @@ async def test_query_serialization_async(table_async: AsyncTable):
        distance_type="cosine",
        vector=sample_vector,
        postfilter=False,
-        minimum_nprobes=20,
-        maximum_nprobes=20,
+        nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
        limit=10,
@@ -1232,8 +1047,7 @@ async def test_query_serialization_async(table_async: AsyncTable):
        ef=7,
        vector=sample_vector,
        postfilter=False,
-        minimum_nprobes=20,
-        maximum_nprobes=20,
+        nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
        limit=10,
@@ -1245,34 +1059,24 @@ async def test_query_serialization_async(table_async: AsyncTable):
        bypass_vector_index=True,
        vector=sample_vector,
        postfilter=False,
-        minimum_nprobes=20,
-        maximum_nprobes=20,
+        nprobes=20,
        with_row_id=False,
        limit=10,
    )

    # FTS queries
-    match_query = MatchQuery("foo", "text")
-    q = (await table_async.search(match_query)).limit(10).to_query_object()
+    q = (await table_async.search("foo")).limit(10).to_query_object()
    check_set_props(
        q,
        limit=10,
-        full_text_query=FullTextSearchQuery(columns=None, query=match_query),
+        full_text_query=FullTextSearchQuery(columns=[], query="foo"),
        with_row_id=False,
    )

-    q = (await table_async.search(match_query)).to_query_object()
+    q = (await table_async.search("foo", query_type="fts")).to_query_object()
    check_set_props(
        q,
-        full_text_query=FullTextSearchQuery(columns=None, query=match_query),
-        with_row_id=False,
-    )
-
-    phrase_query = PhraseQuery("foo", "text", slop=1)
-    q = (await table_async.search(phrase_query)).to_query_object()
-    check_set_props(
-        q,
-        full_text_query=FullTextSearchQuery(columns=None, query=phrase_query),
+        full_text_query=FullTextSearchQuery(columns=[], query="foo"),
        with_row_id=False,
    )

@@ -1339,20 +1143,3 @@ async def test_query_timeout_async(tmp_path):
            .nearest_to([0.0, 0.0])
            .to_list(timeout=timedelta(0))
        )
-
-
-def test_search_empty_table(mem_db):
-    """Test searching on empty table should not crash
-
-    Regression test for issue #303:
-    https://github.com/lancedb/lancedb/issues/303
-    Searching on empty table produces scary error message
-    """
-    schema = pa.schema(
-        [pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("id", pa.int64())]
-    )
-    table = mem_db.create_table("test_empty_search", schema=schema)
-
-    # Search on empty table should return empty results, not crash
-    results = table.search([1.0, 2.0]).limit(5).to_list()
-    assert results == []
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -210,25 +210,6 @@ async def test_retry_error():
        assert cause.status_code == 429


-def test_table_unimplemented_functions():
-    def handler(request):
-        if request.path == "/v1/table/test/create/?mode=create":
-            request.send_response(200)
-            request.send_header("Content-Type", "application/json")
-            request.end_headers()
-            request.wfile.write(b"{}")
-        else:
-            request.send_response(404)
-            request.end_headers()
-
-    with mock_lancedb_connection(handler) as db:
-        table = db.create_table("test", [{"id": 1}])
-        with pytest.raises(NotImplementedError):
-            table.to_arrow()
-        with pytest.raises(NotImplementedError):
-            table.to_pandas()
-
-
 def test_table_add_in_threadpool():
    def handler(request):
        if request.path == "/v1/table/test/insert/":
@@ -515,8 +496,6 @@ def test_query_sync_minimal():
            "ef": None,
            "vector": [1.0, 2.0, 3.0],
            "nprobes": 20,
-            "minimum_nprobes": 20,
-            "maximum_nprobes": 20,
            "version": None,
        }

@@ -557,8 +536,6 @@ def test_query_sync_maximal():
            "refine_factor": 10,
            "vector": [1.0, 2.0, 3.0],
            "nprobes": 5,
-            "minimum_nprobes": 5,
-            "maximum_nprobes": 5,
            "lower_bound": None,
            "upper_bound": None,
            "ef": None,
@@ -587,66 +564,6 @@ def test_query_sync_maximal():
        )


-def test_query_sync_nprobes():
-    def handler(body):
-        assert body == {
-            "distance_type": "l2",
-            "k": 10,
-            "prefilter": True,
-            "fast_search": True,
-            "vector_column": "vector2",
-            "refine_factor": None,
-            "lower_bound": None,
-            "upper_bound": None,
-            "ef": None,
-            "vector": [1.0, 2.0, 3.0],
-            "nprobes": 5,
-            "minimum_nprobes": 5,
-            "maximum_nprobes": 15,
-            "version": None,
-        }
-
-        return pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]})
-
-    with query_test_table(handler) as table:
-        (
-            table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
-            .minimum_nprobes(5)
-            .maximum_nprobes(15)
-            .to_list()
-        )
-
-
-def test_query_sync_no_max_nprobes():
-    def handler(body):
-        assert body == {
-            "distance_type": "l2",
-            "k": 10,
-            "prefilter": True,
-            "fast_search": True,
-            "vector_column": "vector2",
-            "refine_factor": None,
-            "lower_bound": None,
-            "upper_bound": None,
-            "ef": None,
-            "vector": [1.0, 2.0, 3.0],
-            "nprobes": 5,
-            "minimum_nprobes": 5,
-            "maximum_nprobes": 0,
-            "version": None,
-        }
-
-        return pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]})
-
-    with query_test_table(handler) as table:
-        (
-            table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
-            .minimum_nprobes(5)
-            .maximum_nprobes(0)
-            .to_list()
-        )
-
-
@pytest.mark.parametrize("server_version", [Version("0.1.0"), Version("0.2.0")])
 def test_query_sync_batch_queries(server_version):
    def handler(body):
@@ -749,8 +666,6 @@ def test_query_sync_hybrid():
                "refine_factor": None,
                "vector": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                "nprobes": 20,
-                "minimum_nprobes": 20,
-                "maximum_nprobes": 20,
                "lower_bound": None,
                "upper_bound": None,
                "ef": None,
--- a/Show More
+++ b/Show More