Bump version: 0.18.1-beta.1 → 0.18.1-beta.2

feat: upgrade lance to v0.23.0-beta.3 (#2074 )
This includes several bugfixes for `merge_insert` and null handling in vector search. https://github.com/lancedb/lance/releases/tag/v0.23.0-beta.3
2025-12-23 13:29:57 +00:00 · 2025-01-28 22:31:14 +00:00 · 2025-01-28 14:00:06 -08:00 · 2025-01-28 13:59:53 -08:00 · 2025-01-28 12:38:50 -08:00 · 2025-01-24 11:37:47 -08:00
62 changed files with 1135 additions and 432 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.15.0-beta.0"
+current_version = "0.15.1-beta.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/build_linux_wheel/action.yml
+++ b/.github/workflows/build_linux_wheel/action.yml
@@ -52,12 +52,7 @@ runs:
        args: ${{ inputs.args }}
        before-script-linux: |
          set -e
-          apt install -y unzip
-          if [ $(uname -m) = "x86_64" ]; then
-            PROTOC_ARCH="x86_64"
-          else
-            PROTOC_ARCH="aarch_64"
-          fi
-          curl -L https://github.com/protocolbuffers/protobuf/releases/download/v24.4/protoc-24.4-linux-$PROTOC_ARCH.zip > /tmp/protoc.zip \
+          yum install -y openssl-devel clang \
+            && curl -L https://github.com/protocolbuffers/protobuf/releases/download/v24.4/protoc-24.4-linux-aarch_64.zip > /tmp/protoc.zip \
            && unzip /tmp/protoc.zip -d /usr/local \
            && rm /tmp/protoc.zip
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -15,15 +15,21 @@ jobs:
          - platform: x86_64
            manylinux: "2_17"
            extra_args: ""
+            runner: ubuntu-22.04
          - platform: x86_64
            manylinux: "2_28"
            extra_args: "--features fp16kernels"
+            runner: ubuntu-22.04
          - platform: aarch64
-            manylinux: "2_24"
+            manylinux: "2_17"
            extra_args: ""
-          # We don't build fp16 kernels for aarch64, because it uses 
-          # cross compilation image, which doesn't have a new enough compiler.
-    runs-on: "ubuntu-22.04"
+            # For successful fat LTO builds, we need a large runner to avoid OOM errors.
+            runner: ubuntu-2404-8x-arm64
+          - platform: aarch64
+            manylinux: "2_28"
+            extra_args: "--features fp16kernels"
+            runner: ubuntu-2404-8x-arm64
+    runs-on: ${{ matrix.config.runner }}
    steps:
      - uses: actions/checkout@v4
        with:
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,14 +21,16 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.22.0", "features" = ["dynamodb"] }
-lance-io = "=0.22.0"
-lance-index = "=0.22.0"
-lance-linalg = "=0.22.0"
-lance-table = "=0.22.0"
-lance-testing = "=0.22.0"
-lance-datafusion = "=0.22.0"
-lance-encoding = "=0.22.0"
+lance = { "version" = "=0.23.0", "features" = [
+    "dynamodb",
+], git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
+lance-io = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
+lance-index = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
+lance-linalg = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
+lance-table = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
+lance-testing = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
+lance-datafusion = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
+lance-encoding = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
 # Note that this one does not include pyarrow
 arrow = { version = "53.2", optional = false }
 arrow-array = "53.2"
--- a/docs/src/js/README.md
+++ b/docs/src/js/README.md
@@ -40,37 +40,4 @@ The [quickstart](../basic.md) contains a more complete example.

 ## Development

-```sh
-npm run build
-npm run test
-```
-
-### Running lint / format
-
-LanceDb uses [biome](https://biomejs.dev/) for linting and formatting. if you are using VSCode you will need to install the official [Biome](https://marketplace.visualstudio.com/items?itemName=biomejs.biome) extension.
-To manually lint your code you can run:
-
-```sh
-npm run lint
-```
-
-to automatically fix all fixable issues:
-
-```sh
-npm run lint-fix
-```
-
-If you do not have your workspace root set to the `nodejs` directory, unfortunately the extension will not work. You can still run the linting and formatting commands manually.
-
-### Generating docs
-
-```sh
-npm run docs
-
-cd ../docs
-# Asssume the virtual environment was created
-# python3 -m venv venv
-# pip install -r requirements.txt
-. ./venv/bin/activate
-mkdocs build
-```
+See [CONTRIBUTING.md](_media/CONTRIBUTING.md) for information on how to contribute to LanceDB.
--- a/docs/src/js/_media/CONTRIBUTING.md
+++ b/docs/src/js/_media/CONTRIBUTING.md
@@ -0,0 +1,76 @@
+# Contributing to LanceDB Typescript
+
+This document outlines the process for contributing to LanceDB Typescript.
+For general contribution guidelines, see [CONTRIBUTING.md](../CONTRIBUTING.md).
+
+## Project layout
+
+The Typescript package is a wrapper around the Rust library, `lancedb`. We use
+the [napi-rs](https://napi.rs/) library to create the bindings between Rust and
+Typescript.
+
+* `src/`: Rust bindings source code
+* `lancedb/`: Typescript package source code
+* `__test__/`: Unit tests
+* `examples/`: An npm package with the examples shown in the documentation
+
+## Development environment
+
+To set up your development environment, you will need to install the following:
+
+1. Node.js 14 or later
+2. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
+3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
+
+Initial setup:
+
+```shell
+npm install
+```
+
+### Commit Hooks
+
+It is **highly recommended** to install the [pre-commit](https://pre-commit.com/) hooks to ensure that your
+code is formatted correctly and passes basic checks before committing:
+
+```shell
+pre-commit install
+```
+
+## Development
+
+Most common development commands can be run using the npm scripts.
+
+Build the package
+
+```shell
+npm install
+npm run build
+```
+
+Lint:
+
+```shell
+npm run lint
+```
+
+Format and fix lints:
+
+```shell
+npm run lint-fix
+```
+
+Run tests:
+
+```shell
+npm test
+```
+
+To run a single test:
+
+```shell
+# Single file: table.test.ts
+npm test -- table.test.ts
+# Single test: 'merge insert' in table.test.ts
+npm test -- table.test.ts --testNamePattern=merge\ insert
+```
--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -317,6 +317,32 @@ then call ``cleanup_files`` to remove the old files.

 ***

+### dropIndex()
+
+```ts
+abstract dropIndex(name): Promise<void>
+```
+
+Drop an index from the table.
+
+#### Parameters
+
+* **name**: `string`
+    The name of the index.
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
+
+#### Note
+
+This does not delete the index from disk, it just removes it from the table.
+To delete the index, run [Table#optimize](Table.md#optimize) after dropping the index.
+
+Use [Table.listIndices](Table.md#listindices) to find the names of the indices.
+
+***
+
 ### indexStats()

 ```ts
@@ -336,6 +362,8 @@ List all the stats of a specified index

 The stats of the index. If the index does not exist, it will return undefined

+Use [Table.listIndices](Table.md#listindices) to find the names of the indices.
+
 ***

 ### isOpen()
--- a/docs/src/js/classes/VectorQuery.md
+++ b/docs/src/js/classes/VectorQuery.md
@@ -128,6 +128,24 @@ whose data type is a fixed-size-list of floats.

 ***

+### distanceRange()
+
+```ts
+distanceRange(lowerBound?, upperBound?): VectorQuery
+```
+
+#### Parameters
+
+* **lowerBound?**: `number`
+
+* **upperBound?**: `number`
+
+#### Returns
+
+[`VectorQuery`](VectorQuery.md)
+
+***
+
 ### distanceType()

 ```ts
@@ -528,6 +546,22 @@ distance between the query vector and the actual uncompressed vector.

 ***

+### rerank()
+
+```ts
+rerank(reranker): VectorQuery
+```
+
+#### Parameters
+
+* **reranker**: [`Reranker`](../namespaces/rerankers/interfaces/Reranker.md)
+
+#### Returns
+
+[`VectorQuery`](VectorQuery.md)
+
+***
+
 ### select()

 ```ts
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -7,6 +7,7 @@
 ## Namespaces

 - [embedding](namespaces/embedding/README.md)
+- [rerankers](namespaces/rerankers/README.md)

 ## Enumerations

--- a/docs/src/js/interfaces/IvfPqOptions.md
+++ b/docs/src/js/interfaces/IvfPqOptions.md
@@ -68,6 +68,21 @@ The default value is 50.

 ***

+### numBits?
+
+```ts
+optional numBits: number;
+```
+
+Number of bits per sub-vector.
+
+This value controls how much each subvector is compressed.  The more bits the more
+accurate the index will be but the slower search.  The default is 8 bits.
+
+The number of bits must be 4 or 8.
+
+***
+
 ### numPartitions?

 ```ts
--- a/docs/src/js/namespaces/rerankers/README.md
+++ b/docs/src/js/namespaces/rerankers/README.md
@@ -0,0 +1,17 @@
+[**@lancedb/lancedb**](../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../globals.md) / rerankers
+
+# rerankers
+
+## Index
+
+### Classes
+
+- [RRFReranker](classes/RRFReranker.md)
+
+### Interfaces
+
+- [Reranker](interfaces/Reranker.md)
--- a/docs/src/js/namespaces/rerankers/classes/RRFReranker.md
+++ b/docs/src/js/namespaces/rerankers/classes/RRFReranker.md
@@ -0,0 +1,66 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [rerankers](../README.md) / RRFReranker
+
+# Class: RRFReranker
+
+Reranks the results using the Reciprocal Rank Fusion (RRF) algorithm.
+
+Internally this uses the Rust implementation
+
+## Constructors
+
+### new RRFReranker()
+
+```ts
+new RRFReranker(inner): RRFReranker
+```
+
+#### Parameters
+
+* **inner**: `RrfReranker`
+
+#### Returns
+
+[`RRFReranker`](RRFReranker.md)
+
+## Methods
+
+### rerankHybrid()
+
+```ts
+rerankHybrid(
+   query,
+   vecResults,
+   ftsResults): Promise<RecordBatch<any>>
+```
+
+#### Parameters
+
+* **query**: `string`
+
+* **vecResults**: `RecordBatch`&lt;`any`&gt;
+
+* **ftsResults**: `RecordBatch`&lt;`any`&gt;
+
+#### Returns
+
+`Promise`&lt;`RecordBatch`&lt;`any`&gt;&gt;
+
+***
+
+### create()
+
+```ts
+static create(k): Promise<RRFReranker>
+```
+
+#### Parameters
+
+* **k**: `number` = `60`
+
+#### Returns
+
+`Promise`&lt;[`RRFReranker`](RRFReranker.md)&gt;
--- a/docs/src/js/namespaces/rerankers/interfaces/Reranker.md
+++ b/docs/src/js/namespaces/rerankers/interfaces/Reranker.md
@@ -0,0 +1,30 @@
+[**@lancedb/lancedb**](../../../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../../../globals.md) / [rerankers](../README.md) / Reranker
+
+# Interface: Reranker
+
+## Methods
+
+### rerankHybrid()
+
+```ts
+rerankHybrid(
+   query,
+   vecResults,
+   ftsResults): Promise<RecordBatch<any>>
+```
+
+#### Parameters
+
+* **query**: `string`
+
+* **vecResults**: `RecordBatch`&lt;`any`&gt;
+
+* **ftsResults**: `RecordBatch`&lt;`any`&gt;
+
+#### Returns
+
+`Promise`&lt;`RecordBatch`&lt;`any`&gt;&gt;
--- a/docs/src/notebooks/tables_guide.ipynb
+++ b/docs/src/notebooks/tables_guide.ipynb
@@ -114,14 +114,17 @@
    }
   ],
   "source": [
-    "data = [\n",
-    "    {\"vector\": [1.1, 1.2], \"lat\": 45.5, \"long\": -122.7},\n",
-    "    {\"vector\": [0.2, 1.8], \"lat\": 40.1, \"long\": -74.1},\n",
-    "]\n",
+    "import pandas as pd\n",
    "\n",
-    "db.create_table(\"table2\", data)\n",
-    "\n",
-    "db[\"table2\"].head() "
+    "data = pd.DataFrame(\n",
+    "    {\n",
+    "        \"vector\": [[1.1, 1.2, 1.3, 1.4], [0.2, 1.8, 0.4, 3.6]],\n",
+    "        \"lat\": [45.5, 40.1],\n",
+    "        \"long\": [-122.7, -74.1],\n",
+    "    }\n",
+    ")\n",
+    "db.create_table(\"my_table_pandas\", data)\n",
+    "db[\"my_table_pandas\"].head()"
   ]
  },
  {
@@ -164,7 +167,7 @@
    "import pyarrow as pa\n",
    "\n",
    "custom_schema = pa.schema([\n",
-    "pa.field(\"vector\", pa.list_(pa.float32(), 2)),\n",
+    "pa.field(\"vector\", pa.list_(pa.float32(), 4)),\n",
    "pa.field(\"lat\", pa.float32()),\n",
    "pa.field(\"long\", pa.float32())\n",
    "])\n",
--- a/docs/src/python/python.md
+++ b/docs/src/python/python.md
@@ -147,8 +147,19 @@ to return the entire (typically filtered) table. Vector searches return the
 rows nearest to a query vector and can be created with the
 [AsyncTable.vector_search][lancedb.table.AsyncTable.vector_search] method.

-::: lancedb.query.AsyncQueryBase

 ::: lancedb.query.AsyncQuery
+    options:
+      inherited_members: true

 ::: lancedb.query.AsyncVectorQuery
+    options:
+      inherited_members: true
+
+::: lancedb.query.AsyncFTSQuery
+    options:
+      inherited_members: true
+
+::: lancedb.query.AsyncHybridQuery
+    options:
+      inherited_members: true
--- a/docs/src/search.md
+++ b/docs/src/search.md
@@ -149,6 +149,7 @@ You can index on a column with multivector type and search on it, the query can
 where `sim` is the similarity function (e.g. cosine).

 For now, only `cosine` metric is supported for multivector search.
+The vector value type can be `float16`, `float32` or `float64`.

 === "Python"

--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.15.0-beta.0</version>
+        <version>0.15.1-beta.0</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.15.0-beta.0</version>
+    <version>0.15.1-beta.0</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.15.0-beta.0",
+  "version": "0.15.1-beta.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.15.0-beta.0",
+      "version": "0.15.1-beta.0",
      "cpu": [
        "x64",
        "arm64"
@@ -52,14 +52,14 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.15.0-beta.0",
-        "@lancedb/vectordb-darwin-x64": "0.15.0-beta.0",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.15.0-beta.0",
-        "@lancedb/vectordb-linux-arm64-musl": "0.15.0-beta.0",
-        "@lancedb/vectordb-linux-x64-gnu": "0.15.0-beta.0",
-        "@lancedb/vectordb-linux-x64-musl": "0.15.0-beta.0",
-        "@lancedb/vectordb-win32-arm64-msvc": "0.15.0-beta.0",
-        "@lancedb/vectordb-win32-x64-msvc": "0.15.0-beta.0"
+        "@lancedb/vectordb-darwin-arm64": "0.15.1-beta.0",
+        "@lancedb/vectordb-darwin-x64": "0.15.1-beta.0",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.0",
+        "@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.0",
+        "@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.0",
+        "@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.0",
+        "@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.0",
+        "@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.0"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -329,110 +329,6 @@
        "@jridgewell/sourcemap-codec": "^1.4.10"
      }
    },
-    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.15.0-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.15.0-beta.0.tgz",
-      "integrity": "sha512-4sPAW4p1YFVfURyf0k017l6LRCz+VmN9fVUBy7W27b6EOQ3xuIb3t5xq3JAtslMPWBP3wxP8rKXXDmlbqDg3+g==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.15.0-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.15.0-beta.0.tgz",
-      "integrity": "sha512-uzGINrBBsZattB4/ZYxdGNkTxNh3MqE6Y4nF762qo0zWWSiu+QNHQ+ZyLAZ2lwrEvwxs8LUaJNmnpn3nocHc1A==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.15.0-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.15.0-beta.0.tgz",
-      "integrity": "sha512-bgphfea8h65vJ+bAL+vb+XEfmjskLZ+trZ3GN4n6SICU7XMGSFPl9xzPLGAj1WsoFCTJHe87DRYQpsWGlOI/LQ==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-linux-arm64-musl": {
-      "version": "0.15.0-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.15.0-beta.0.tgz",
-      "integrity": "sha512-GpmVgqMS9ztNX53z8v0JdZiG6K1cK+mJnGZd3Gzguiavrly4mkYZ8IKNwWP9RmewUMNsFWR0IzD4VR+ojVpjlQ==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.15.0-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.15.0-beta.0.tgz",
-      "integrity": "sha512-6Y/39TDv4UDVWnl8UpUJ8mqv9rUNc9Q5VR510I7w34c0ChdWvjqdcy+JFnGrraamE1DA8E6wGEz+5oG0zprkNg==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-linux-x64-musl": {
-      "version": "0.15.0-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.15.0-beta.0.tgz",
-      "integrity": "sha512-GRdW2dhf6DmynhRojjtQjs8DeARM1WpbZZKXukeofOSMv6JoRBSWKw2DzW5sF/285IMU81B0OXZE75QjLp+VJg==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-win32-arm64-msvc": {
-      "version": "0.15.0-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.15.0-beta.0.tgz",
-      "integrity": "sha512-2EmRHuqqj8kC5ArUZztUWWTfNd774zL68btOlyhYL1CAiet5jIeGuFWJifdh+PXfQeLoa4GLW5LwyudIR4IHwA==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "win32"
-      ]
-    },
-    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.15.0-beta.0",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.15.0-beta.0.tgz",
-      "integrity": "sha512-lWq9b7LnWMGO0zDsp3rsLYyAzLooV7zQP77ph9Qv9fF0e4egD5l6SmMsAdQqLQnlhbQjkRjt3XRoDsqI809fcw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "win32"
-      ]
-    },
    "node_modules/@neon-rs/cli": {
      "version": "0.0.160",
      "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.15.0-beta.0",
+  "version": "0.15.1-beta.0",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
@@ -92,13 +92,13 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.15.0-beta.0",
-    "@lancedb/vectordb-darwin-arm64": "0.15.0-beta.0",
-    "@lancedb/vectordb-linux-x64-gnu": "0.15.0-beta.0",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.15.0-beta.0",
-    "@lancedb/vectordb-linux-x64-musl": "0.15.0-beta.0",
-    "@lancedb/vectordb-linux-arm64-musl": "0.15.0-beta.0",
-    "@lancedb/vectordb-win32-x64-msvc": "0.15.0-beta.0",
-    "@lancedb/vectordb-win32-arm64-msvc": "0.15.0-beta.0"
+    "@lancedb/vectordb-darwin-x64": "0.15.1-beta.0",
+    "@lancedb/vectordb-darwin-arm64": "0.15.1-beta.0",
+    "@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.0",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.0",
+    "@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.0",
+    "@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.0",
+    "@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.0",
+    "@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.0"
  }
 }
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.15.0-beta.0"
+version = "0.15.1-beta.0"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
--- a/nodejs/test/embedding.test.ts
+++ b/nodejs/test/embedding.test.ts
@@ -83,6 +83,74 @@ describe("embedding functions", () => {
    expect(vector0).toEqual([1, 2, 3]);
  });

+  it("should be able to append and upsert using embedding function", async () => {
+    @register()
+    class MockEmbeddingFunction extends EmbeddingFunction<string> {
+      toJSON(): object {
+        return {};
+      }
+      ndims() {
+        return 3;
+      }
+      embeddingDataType(): Float {
+        return new Float32();
+      }
+      async computeQueryEmbeddings(_data: string) {
+        return [1, 2, 3];
+      }
+      async computeSourceEmbeddings(data: string[]) {
+        return Array.from({ length: data.length }).fill([
+          1, 2, 3,
+        ]) as number[][];
+      }
+    }
+    const func = new MockEmbeddingFunction();
+    const db = await connect(tmpDir.name);
+    const table = await db.createTable(
+      "test",
+      [
+        { id: 1, text: "hello" },
+        { id: 2, text: "world" },
+      ],
+      {
+        embeddingFunction: {
+          function: func,
+          sourceColumn: "text",
+        },
+      },
+    );
+
+    const schema = await table.schema();
+    expect(schema.metadata.get("embedding_functions")).toBeDefined();
+
+    // Append some new data
+    const data1 = [
+      { id: 3, text: "forest" },
+      { id: 4, text: "mountain" },
+    ];
+    await table.add(data1);
+
+    // Upsert some data
+    const data2 = [
+      { id: 5, text: "river" },
+      { id: 2, text: "canyon" },
+    ];
+    await table
+      .mergeInsert("id")
+      .whenMatchedUpdateAll()
+      .whenNotMatchedInsertAll()
+      .execute(data2);
+
+    const rows = await table.query().toArray();
+    rows.sort((a, b) => a.id - b.id);
+    const texts = rows.map((row) => row.text);
+    expect(texts).toEqual(["hello", "canyon", "forest", "mountain", "river"]);
+    const vectorsDefined = rows.map(
+      (row) => row.vector !== undefined && row.vector !== null,
+    );
+    expect(vectorsDefined).toEqual(new Array(5).fill(true));
+  });
+
  it("should be able to create an empty table with an embedding function", async () => {
    @register()
    class MockEmbeddingFunction extends EmbeddingFunction<string> {
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -473,6 +473,10 @@ describe("When creating an index", () => {
    // test offset
    rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
    expect(rst.numRows).toBe(1);
+
+    await tbl.dropIndex("vec_idx");
+    const indices2 = await tbl.listIndices();
+    expect(indices2.length).toBe(0);
  });

  it("should search with distance range", async () => {
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -609,6 +609,14 @@ async function applyEmbeddings<T>(
    return table;
  }

+  let schemaMetadata = schema?.metadata || new Map<string, string>();
+
+  if (!(embeddings == null || embeddings === undefined)) {
+    const registry = getRegistry();
+    const embeddingMetadata = registry.getTableMetadata([embeddings]);
+    schemaMetadata = new Map([...schemaMetadata, ...embeddingMetadata]);
+  }
+
  // Convert from ArrowTable to Record<String, Vector>
  const colEntries = [...Array(table.numCols).keys()].map((_, idx) => {
    const name = table.schema.fields[idx].name;
@@ -677,15 +685,21 @@ async function applyEmbeddings<T>(
    newColumns[destColumn] = makeVector(vectors, destType);
  }

-  const newTable = new ArrowTable(newColumns);
+  let newTable = new ArrowTable(newColumns);
  if (schema != null) {
    if (schema.fields.find((f) => f.name === destColumn) === undefined) {
      throw new Error(
        `When using embedding functions and specifying a schema the schema should include the embedding column but the column ${destColumn} was missing`,
      );
    }
-    return alignTable(newTable, schema as Schema);
+    newTable = alignTable(newTable, schema as Schema);
  }
+
+  newTable = new ArrowTable(
+    new Schema(newTable.schema.fields, schemaMetadata),
+    newTable.batches,
+  );
+
  return newTable;
 }

--- a/nodejs/lancedb/merge.ts
+++ b/nodejs/lancedb/merge.ts
@@ -1,13 +1,20 @@
-import { Data, fromDataToBuffer } from "./arrow";
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+import { Data, Schema, fromDataToBuffer } from "./arrow";
 import { NativeMergeInsertBuilder } from "./native";

 /** A builder used to create and run a merge insert operation */
 export class MergeInsertBuilder {
  #native: NativeMergeInsertBuilder;
+  #schema: Schema | Promise<Schema>;

  /** Construct a MergeInsertBuilder. __Internal use only.__ */
-  constructor(native: NativeMergeInsertBuilder) {
+  constructor(
+    native: NativeMergeInsertBuilder,
+    schema: Schema | Promise<Schema>,
+  ) {
    this.#native = native;
+    this.#schema = schema;
  }

  /**
@@ -35,6 +42,7 @@ export class MergeInsertBuilder {
  whenMatchedUpdateAll(options?: { where: string }): MergeInsertBuilder {
    return new MergeInsertBuilder(
      this.#native.whenMatchedUpdateAll(options?.where),
+      this.#schema,
    );
  }
  /**
@@ -42,7 +50,10 @@ export class MergeInsertBuilder {
   * be inserted into the target table.
   */
  whenNotMatchedInsertAll(): MergeInsertBuilder {
-    return new MergeInsertBuilder(this.#native.whenNotMatchedInsertAll());
+    return new MergeInsertBuilder(
+      this.#native.whenNotMatchedInsertAll(),
+      this.#schema,
+    );
  }
  /**
   * Rows that exist only in the target table (old data) will be
@@ -56,6 +67,7 @@ export class MergeInsertBuilder {
  }): MergeInsertBuilder {
    return new MergeInsertBuilder(
      this.#native.whenNotMatchedBySourceDelete(options?.where),
+      this.#schema,
    );
  }
  /**
@@ -64,7 +76,14 @@ export class MergeInsertBuilder {
   * Nothing is returned but the `Table` is updated
   */
  async execute(data: Data): Promise<void> {
-    const buffer = await fromDataToBuffer(data);
+    let schema: Schema;
+    if (this.#schema instanceof Promise) {
+      schema = await this.#schema;
+      this.#schema = schema; // In case of future calls
+    } else {
+      schema = this.#schema;
+    }
+    const buffer = await fromDataToBuffer(data, undefined, schema);
    await this.#native.execute(buffer);
  }
 }
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -226,6 +226,19 @@ export abstract class Table {
    column: string,
    options?: Partial<IndexOptions>,
  ): Promise<void>;
+
+  /**
+   * Drop an index from the table.
+   *
+   * @param name The name of the index.
+   *
+   * @note This does not delete the index from disk, it just removes it from the table.
+   * To delete the index, run {@link Table#optimize} after dropping the index.
+   *
+   * Use {@link Table.listIndices} to find the names of the indices.
+   */
+  abstract dropIndex(name: string): Promise<void>;
+
  /**
   * Create a {@link Query} Builder.
   *
@@ -426,6 +439,8 @@ export abstract class Table {
   *
   * @param {string} name The name of the index.
   * @returns {IndexStatistics | undefined} The stats of the index. If the index does not exist, it will return undefined
+   *
+   * Use {@link Table.listIndices} to find the names of the indices.
   */
  abstract indexStats(name: string): Promise<IndexStatistics | undefined>;

@@ -505,14 +520,8 @@ export class LocalTable extends Table {
  async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
    const mode = options?.mode ?? "append";
    const schema = await this.schema();
-    const registry = getRegistry();
-    const functions = await registry.parseFunctions(schema.metadata);

-    const buffer = await fromDataToBuffer(
-      data,
-      functions.values().next().value,
-      schema,
-    );
+    const buffer = await fromDataToBuffer(data, undefined, schema);
    await this.inner.add(buffer, mode);
  }

@@ -591,6 +600,10 @@ export class LocalTable extends Table {
    await this.inner.createIndex(nativeIndex, column, options?.replace);
  }

+  async dropIndex(name: string): Promise<void> {
+    await this.inner.dropIndex(name);
+  }
+
  query(): Query {
    return new Query(this.inner);
  }
@@ -714,7 +727,7 @@ export class LocalTable extends Table {
  }
  mergeInsert(on: string | string[]): MergeInsertBuilder {
    on = Array.isArray(on) ? on : [on];
-    return new MergeInsertBuilder(this.inner.mergeInsert(on));
+    return new MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());
  }

  /**
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.15.0-beta.0",
+	"version": "0.15.1-beta.0",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.15.0-beta.0",
+	"version": "0.15.1-beta.0",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.15.0-beta.0",
+	"version": "0.15.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.15.0-beta.0",
+	"version": "0.15.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.15.0-beta.0",
+	"version": "0.15.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.15.0-beta.0",
+	"version": "0.15.1-beta.0",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.15.0-beta.0",
+  "version": "0.15.1-beta.0",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.15.0-beta.0",
+	"version": "0.15.1-beta.0",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.15.0-beta.0",
+  "version": "0.15.1-beta.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.15.0-beta.0",
+      "version": "0.15.1-beta.0",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.15.0-beta.0",
+  "version": "0.15.1-beta.0",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -135,6 +135,14 @@ impl Table {
        builder.execute().await.default_error()
    }

+    #[napi(catch_unwind)]
+    pub async fn drop_index(&self, index_name: String) -> napi::Result<()> {
+        self.inner_ref()?
+            .drop_index(&index_name)
+            .await
+            .default_error()
+    }
+
    #[napi(catch_unwind)]
    pub async fn update(
        &self,
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.18.0"
+current_version = "0.18.1-beta.2"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.18.0"
+version = "0.18.1-beta.2"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,7 +4,7 @@ name = "lancedb"
 dynamic = ["version"]
 dependencies = [
    "deprecation",
-    "pylance==0.22.0",
+    "pylance==0.23.0b3",
    "tqdm>=4.27.0",
    "pydantic>=1.10",
    "packaging",
@@ -55,7 +55,7 @@ tests = [
    "tantivy",
    "pyarrow-stubs",
 ]
-dev = ["ruff", "pre-commit", "pyright"]
+dev = ["ruff", "pre-commit", "pyright", 'typing-extensions>=4.0.0; python_version < "3.11"']
 docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
 clip = ["torch", "pillow", "open-clip"]
 embeddings = [
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -84,11 +84,15 @@ class RecordBatchStream:
 class Query:
    def where(self, filter: str): ...
    def select(self, columns: Tuple[str, str]): ...
+    def select_columns(self, columns: List[str]): ...
    def limit(self, limit: int): ...
    def offset(self, offset: int): ...
+    def fast_search(self): ...
+    def with_row_id(self): ...
+    def postfilter(self): ...
    def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
    def nearest_to_text(self, query: dict) -> FTSQuery: ...
-    async def execute(self, max_batch_legnth: Optional[int]) -> RecordBatchStream: ...
+    async def execute(self, max_batch_length: Optional[int]) -> RecordBatchStream: ...

 class FTSQuery:
    def where(self, filter: str): ...
@@ -98,6 +102,8 @@ class FTSQuery:
    def fast_search(self): ...
    def with_row_id(self): ...
    def postfilter(self): ...
+    def get_query(self) -> str: ...
+    def add_query_vector(self, query_vec: pa.Array) -> None: ...
    def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
    async def execute(self, max_batch_length: Optional[int]) -> RecordBatchStream: ...
    async def explain_plan(self) -> str: ...
--- a/python/python/lancedb/arrow.py
+++ b/python/python/lancedb/arrow.py
@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Optional, Union

 import pyarrow as pa

@@ -12,17 +12,27 @@ class AsyncRecordBatchReader:
    Also allows access to the schema of the stream
    """

-    def __init__(self, inner: RecordBatchStream):
-        self.inner_ = inner
-
-    @property
-    def schema(self) -> pa.Schema:
+    def __init__(
+        self,
+        inner: Union[RecordBatchStream, pa.Table],
+        max_batch_length: Optional[int] = None,
+    ):
        """
-        Get the schema of the batches produced by the stream

-        Accessing the schema does not consume any data from the stream
+        Attributes
+        ----------
+        schema : pa.Schema
+            The schema of the batches produced by the stream.
+            Accessing the schema does not consume any data from the stream
        """
-        return self.inner_.schema()
+        if isinstance(inner, pa.Table):
+            self._inner = self._async_iter_from_table(inner, max_batch_length)
+            self.schema: pa.Schema = inner.schema
+        elif isinstance(inner, RecordBatchStream):
+            self._inner = inner
+            self.schema: pa.Schema = inner.schema
+        else:
+            raise TypeError("inner must be a RecordBatchStream or a Table")

    async def read_all(self) -> List[pa.RecordBatch]:
        """
@@ -38,7 +48,18 @@ class AsyncRecordBatchReader:
        return self

    async def __anext__(self) -> pa.RecordBatch:
-        next = await self.inner_.next()
-        if next is None:
-            raise StopAsyncIteration
-        return next
+        return await self._inner.__anext__()
+
+    @staticmethod
+    async def _async_iter_from_table(
+        table: pa.Table, max_batch_length: Optional[int] = None
+    ):
+        """
+        Create an AsyncRecordBatchReader from a Table
+
+        This is useful when you have a Table that you want to iterate
+        over asynchronously
+        """
+        batches = table.to_batches(max_chunksize=max_batch_length)
+        for batch in batches:
+            yield batch
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -20,6 +20,7 @@ import asyncio
 import deprecation
 import numpy as np
 import pyarrow as pa
+import pyarrow.compute as pc
 import pyarrow.fs as pa_fs
 import pydantic

@@ -31,6 +32,7 @@ from .rerankers.util import check_reranker_result
 from .util import safe_import_pandas, flatten_columns

 if TYPE_CHECKING:
+    import sys
    import PIL
    import polars as pl

@@ -42,6 +44,11 @@ if TYPE_CHECKING:
    from .pydantic import LanceModel
    from .table import Table

+    if sys.version_info >= (3, 11):
+        from typing import Self
+    else:
+        from typing_extensions import Self
+
 pd = safe_import_pandas()


@@ -498,7 +505,7 @@ class LanceQueryBuilder(ABC):
                "column": self._vector_column,
                "q": self._query,
                "k": self._limit,
-                "metric": self._metric,
+                "metric": self._distance_type,
                "nprobes": self._nprobes,
                "refine_factor": self._refine_factor,
                "use_index": self._use_index,
@@ -569,7 +576,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
    >>> db = lancedb.connect("./.lancedb")
    >>> table = db.create_table("my_table", data=data)
    >>> (table.search([0.4, 0.4])
-    ...       .metric("cosine")
+    ...       .distance_type("cosine")
    ...       .where("b < 10")
    ...       .select(["b", "vector"])
    ...       .limit(2)
@@ -589,7 +596,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
    ):
        super().__init__(table)
        self._query = query
-        self._metric = "L2"
+        self._distance_type = "L2"
        self._nprobes = 20
        self._lower_bound = None
        self._upper_bound = None
@@ -603,6 +610,9 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
    def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
        """Set the distance metric to use.

+        This is an alias for distance_type() and may be deprecated in the future.
+        Please use distance_type() instead.
+
        Parameters
        ----------
        metric: "L2" or "cosine" or "dot"
@@ -613,7 +623,32 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        LanceVectorQueryBuilder
            The LanceQueryBuilder object.
        """
-        self._metric = metric.lower()
+        return self.distance_type(metric)
+
+    def distance_type(
+        self, distance_type: Literal["L2", "cosine", "dot"]
+    ) -> "LanceVectorQueryBuilder":
+        """Set the distance metric to use.
+
+        When performing a vector search we try and find the "nearest" vectors according
+        to some kind of distance metric. This parameter controls which distance metric
+        to use.
+
+        Note: if there is a vector index then the distance type used MUST match the
+        distance type used to train the vector index. If this is not done then the
+        results will be invalid.
+
+        Parameters
+        ----------
+        distance_type: "L2" or "cosine" or "dot"
+            The distance metric to use. By default "L2" is used.
+
+        Returns
+        -------
+        LanceVectorQueryBuilder
+            The LanceQueryBuilder object.
+        """
+        self._distance_type = distance_type.lower()
        return self

    def nprobes(self, nprobes: int) -> LanceVectorQueryBuilder:
@@ -738,7 +773,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
            filter=self._where,
            prefilter=self._prefilter,
            k=self._limit,
-            metric=self._metric,
+            metric=self._distance_type,
            columns=self._columns,
            nprobes=self._nprobes,
            lower_bound=self._lower_bound,
@@ -1071,7 +1106,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        self._reranker = RRFReranker()
        self._nprobes = None
        self._refine_factor = None
-        self._metric = None
+        self._distance_type = None
        self._phrase_query = False

    def _validate_query(self, query, vector=None, text=None):
@@ -1139,8 +1174,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
            self._fts_query.with_row_id(True)
        if self._phrase_query:
            self._fts_query.phrase_query(True)
-        if self._metric:
-            self._vector_query.metric(self._metric)
+        if self._distance_type:
+            self._vector_query.metric(self._distance_type)
        if self._nprobes:
            self._vector_query.nprobes(self._nprobes)
        if self._refine_factor:
@@ -1183,18 +1218,52 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
            fts_results = LanceHybridQueryBuilder._rank(fts_results, "_score")

        # normalize the scores to be between 0 and 1, 0 being most relevant
-        vector_results = LanceHybridQueryBuilder._normalize_scores(
-            vector_results, "_distance"
-        )
+        # We check whether the results (vector and FTS) are empty, because when
+        # they are, they often are missing the _rowid column, which causes an error
+        if vector_results.num_rows > 0:
+            distance_i = vector_results.column_names.index("_distance")
+            original_distances = vector_results.column(distance_i)
+            original_distance_row_ids = vector_results.column("_rowid")
+            vector_results = vector_results.set_column(
+                distance_i,
+                vector_results.field(distance_i),
+                LanceHybridQueryBuilder._normalize_scores(original_distances),
+            )

        # In fts higher scores represent relevance. Not inverting them here as
        # rerankers might need to preserve this score to support `return_score="all"`
-        fts_results = LanceHybridQueryBuilder._normalize_scores(fts_results, "_score")
+        if fts_results.num_rows > 0:
+            score_i = fts_results.column_names.index("_score")
+            original_scores = fts_results.column(score_i)
+            original_score_row_ids = fts_results.column("_rowid")
+            fts_results = fts_results.set_column(
+                score_i,
+                fts_results.field(score_i),
+                LanceHybridQueryBuilder._normalize_scores(original_scores),
+            )

        results = reranker.rerank_hybrid(fts_query, vector_results, fts_results)

        check_reranker_result(results)

+        if "_distance" in results.column_names:
+            # restore the original distances
+            indices = pc.index_in(
+                results["_rowid"], original_distance_row_ids, skip_nulls=True
+            )
+            original_distances = pc.take(original_distances, indices)
+            distance_i = results.column_names.index("_distance")
+            results = results.set_column(distance_i, "_distance", original_distances)
+
+        if "_score" in results.column_names:
+            # restore the original scores
+            indices = pc.index_in(
+                results["_rowid"], original_score_row_ids, skip_nulls=True
+            )
+            original_scores = pc.take(original_scores, indices)
+            score_i = results.column_names.index("_score")
+            results = results.set_column(score_i, "_score", original_scores)
+
        results = results.slice(length=limit)

        if not with_row_ids:
@@ -1224,28 +1293,23 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        return results

    @staticmethod
-    def _normalize_scores(results: pa.Table, column: str, invert=False):
-        if len(results) == 0:
-            return results
-        # Get the _score column from results
-        scores = results.column(column).to_numpy()
+    def _normalize_scores(scores: pa.Array, invert=False) -> pa.Array:
+        if len(scores) == 0:
+            return scores
        # normalize the scores by subtracting the min and dividing by the max
-        max, min = np.max(scores), np.min(scores)
-        if np.isclose(max, min):
-            rng = max
-        else:
-            rng = max - min
-        # If rng is 0 then min and max are both 0 and so we can leave the scores as is
-        if rng != 0:
-            scores = (scores - min) / rng
+        min, max = pc.min_max(scores).values()
+        rng = pc.subtract(max, min)
+
+        if not pc.equal(rng, pa.scalar(0.0)).as_py():
+            scores = pc.divide(pc.subtract(scores, min), rng)
+        elif not pc.equal(max, pa.scalar(0.0)).as_py():
+            # If rng is 0, then we at least want the scores to be 0
+            scores = pc.subtract(scores, min)
+
        if invert:
-            scores = 1 - scores
-        # replace the _score column with the ranks
-        _score_idx = results.column_names.index(column)
-        results = results.set_column(
-            _score_idx, column, pa.array(scores, type=pa.float32())
-        )
-        return results
+            scores = pc.subtract(1, scores)
+
+        return scores

    def rerank(
        self,
@@ -1350,6 +1414,9 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
    def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
        """Set the distance metric to use.

+        This is an alias for distance_type() and may be deprecated in the future.
+        Please use distance_type() instead.
+
        Parameters
        ----------
        metric: "L2" or "cosine" or "dot"
@@ -1360,7 +1427,32 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        LanceVectorQueryBuilder
            The LanceQueryBuilder object.
        """
-        self._metric = metric.lower()
+        return self.distance_type(metric)
+
+    def distance_type(
+        self, distance_type: Literal["L2", "cosine", "dot"]
+    ) -> "LanceHybridQueryBuilder":
+        """Set the distance metric to use.
+
+        When performing a vector search we try and find the "nearest" vectors according
+        to some kind of distance metric. This parameter controls which distance metric
+        to use.
+
+        Note: if there is a vector index then the distance type used MUST match the
+        distance type used to train the vector index. If this is not done then the
+        results will be invalid.
+
+        Parameters
+        ----------
+        distance_type: "L2" or "cosine" or "dot"
+            The distance metric to use. By default "L2" is used.
+
+        Returns
+        -------
+        LanceVectorQueryBuilder
+            The LanceQueryBuilder object.
+        """
+        self._distance_type = distance_type.lower()
        return self

    def refine_factor(self, refine_factor: int) -> LanceHybridQueryBuilder:
@@ -1418,7 +1510,7 @@ class AsyncQueryBase(object):
        """
        self._inner = inner

-    def where(self, predicate: str) -> AsyncQuery:
+    def where(self, predicate: str) -> Self:
        """
        Only return rows matching the given predicate

@@ -1437,7 +1529,7 @@ class AsyncQueryBase(object):
        self._inner.where(predicate)
        return self

-    def select(self, columns: Union[List[str], dict[str, str]]) -> AsyncQuery:
+    def select(self, columns: Union[List[str], dict[str, str]]) -> Self:
        """
        Return only the specified columns.

@@ -1475,7 +1567,7 @@ class AsyncQueryBase(object):
            raise TypeError("columns must be a list of column names or a dict")
        return self

-    def limit(self, limit: int) -> AsyncQuery:
+    def limit(self, limit: int) -> Self:
        """
        Set the maximum number of results to return.

@@ -1485,7 +1577,7 @@ class AsyncQueryBase(object):
        self._inner.limit(limit)
        return self

-    def offset(self, offset: int) -> AsyncQuery:
+    def offset(self, offset: int) -> Self:
        """
        Set the offset for the results.

@@ -1497,7 +1589,7 @@ class AsyncQueryBase(object):
        self._inner.offset(offset)
        return self

-    def fast_search(self) -> AsyncQuery:
+    def fast_search(self) -> Self:
        """
        Skip searching un-indexed data.

@@ -1511,14 +1603,14 @@ class AsyncQueryBase(object):
        self._inner.fast_search()
        return self

-    def with_row_id(self) -> AsyncQuery:
+    def with_row_id(self) -> Self:
        """
        Include the _rowid column in the results.
        """
        self._inner.with_row_id()
        return self

-    def postfilter(self) -> AsyncQuery:
+    def postfilter(self) -> Self:
        """
        If this is called then filtering will happen after the search instead of
        before.
@@ -1754,7 +1846,7 @@ class AsyncQuery(AsyncQueryBase):
            raise ValueError("query_vector can not be None")

        if (
-            isinstance(query_vector, list)
+            isinstance(query_vector, (list, np.ndarray, pa.Array))
            and len(query_vector) > 0
            and isinstance(query_vector[0], (list, np.ndarray, pa.Array))
        ):
@@ -1807,8 +1899,8 @@ class AsyncFTSQuery(AsyncQueryBase):
        self._inner = inner
        self._reranker = None

-    def get_query(self):
-        self._inner.get_query()
+    def get_query(self) -> str:
+        return self._inner.get_query()

    def rerank(
        self,
@@ -1891,29 +1983,18 @@ class AsyncFTSQuery(AsyncQueryBase):
                self._inner.nearest_to(AsyncQuery._query_vec_to_array(query_vector))
            )

-    async def to_arrow(self) -> pa.Table:
-        results = await super().to_arrow()
+    async def to_batches(
+        self, *, max_batch_length: Optional[int] = None
+    ) -> AsyncRecordBatchReader:
+        reader = await super().to_batches()
+        results = pa.Table.from_batches(await reader.read_all(), reader.schema)
        if self._reranker:
-            results = self._reranker.rerank_fts(results)
-        return results
+            results = self._reranker.rerank_fts(self.get_query(), results)
+        return AsyncRecordBatchReader(results, max_batch_length=max_batch_length)


-class AsyncVectorQuery(AsyncQueryBase):
-    def __init__(self, inner: LanceVectorQuery):
-        """
-        Construct an AsyncVectorQuery
-
-        This method is not intended to be called directly.  Instead, create
-        a query first with [AsyncTable.query][lancedb.table.AsyncTable.query] and then
-        use [AsyncQuery.nearest_to][lancedb.query.AsyncQuery.nearest_to]] to convert to
-        a vector query.  Or you can use
-        [AsyncTable.vector_search][lancedb.table.AsyncTable.vector_search]
-        """
-        super().__init__(inner)
-        self._inner = inner
-        self._reranker = None
-
-    def column(self, column: str) -> AsyncVectorQuery:
+class AsyncVectorQueryBase:
+    def column(self, column: str) -> Self:
        """
        Set the vector column to query

@@ -1926,7 +2007,7 @@ class AsyncVectorQuery(AsyncQueryBase):
        self._inner.column(column)
        return self

-    def nprobes(self, nprobes: int) -> AsyncVectorQuery:
+    def nprobes(self, nprobes: int) -> Self:
        """
        Set the number of partitions to search (probe)

@@ -1954,7 +2035,7 @@ class AsyncVectorQuery(AsyncQueryBase):

    def distance_range(
        self, lower_bound: Optional[float] = None, upper_bound: Optional[float] = None
-    ) -> AsyncVectorQuery:
+    ) -> Self:
        """Set the distance range to use.

        Only rows with distances within range [lower_bound, upper_bound)
@@ -1975,7 +2056,7 @@ class AsyncVectorQuery(AsyncQueryBase):
        self._inner.distance_range(lower_bound, upper_bound)
        return self

-    def ef(self, ef: int) -> AsyncVectorQuery:
+    def ef(self, ef: int) -> Self:
        """
        Set the number of candidates to consider during search

@@ -1990,7 +2071,7 @@ class AsyncVectorQuery(AsyncQueryBase):
        self._inner.ef(ef)
        return self

-    def refine_factor(self, refine_factor: int) -> AsyncVectorQuery:
+    def refine_factor(self, refine_factor: int) -> Self:
        """
        A multiplier to control how many additional rows are taken during the refine
        step
@@ -2026,7 +2107,7 @@ class AsyncVectorQuery(AsyncQueryBase):
        self._inner.refine_factor(refine_factor)
        return self

-    def distance_type(self, distance_type: str) -> AsyncVectorQuery:
+    def distance_type(self, distance_type: str) -> Self:
        """
        Set the distance metric to use

@@ -2044,7 +2125,7 @@ class AsyncVectorQuery(AsyncQueryBase):
        self._inner.distance_type(distance_type)
        return self

-    def bypass_vector_index(self) -> AsyncVectorQuery:
+    def bypass_vector_index(self) -> Self:
        """
        If this is called then any vector index is skipped

@@ -2057,6 +2138,23 @@ class AsyncVectorQuery(AsyncQueryBase):
        self._inner.bypass_vector_index()
        return self

+
+class AsyncVectorQuery(AsyncQueryBase, AsyncVectorQueryBase):
+    def __init__(self, inner: LanceVectorQuery):
+        """
+        Construct an AsyncVectorQuery
+
+        This method is not intended to be called directly.  Instead, create
+        a query first with [AsyncTable.query][lancedb.table.AsyncTable.query] and then
+        use [AsyncQuery.nearest_to][lancedb.query.AsyncQuery.nearest_to]] to convert to
+        a vector query.  Or you can use
+        [AsyncTable.vector_search][lancedb.table.AsyncTable.vector_search]
+        """
+        super().__init__(inner)
+        self._inner = inner
+        self._reranker = None
+        self._query_string = None
+
    def rerank(
        self, reranker: Reranker = RRFReranker(), query_string: Optional[str] = None
    ) -> AsyncHybridQuery:
@@ -2065,6 +2163,11 @@ class AsyncVectorQuery(AsyncQueryBase):

        self._reranker = reranker

+        if not self._query_string and not query_string:
+            raise ValueError("query_string must be provided to rerank the results.")
+
+        self._query_string = query_string
+
        return self

    def nearest_to_text(
@@ -2100,14 +2203,17 @@ class AsyncVectorQuery(AsyncQueryBase):
            self._inner.nearest_to_text({"query": query, "columns": columns})
        )

-    async def to_arrow(self) -> pa.Table:
-        results = await super().to_arrow()
+    async def to_batches(
+        self, *, max_batch_length: Optional[int] = None
+    ) -> AsyncRecordBatchReader:
+        reader = await super().to_batches()
+        results = pa.Table.from_batches(await reader.read_all(), reader.schema)
        if self._reranker:
-            results = self._reranker.rerank_vector(results)
-        return results
+            results = self._reranker.rerank_vector(self._query_string, results)
+        return AsyncRecordBatchReader(results, max_batch_length=max_batch_length)


-class AsyncHybridQuery(AsyncQueryBase):
+class AsyncHybridQuery(AsyncQueryBase, AsyncVectorQueryBase):
    """
    A query builder that performs hybrid vector and full text search.
    Results are combined and reranked based on the specified reranker.
@@ -2155,10 +2261,9 @@ class AsyncHybridQuery(AsyncQueryBase):

        return self

-    async def to_batches(self):
-        raise NotImplementedError("to_batches not yet supported on a hybrid query")
-
-    async def to_arrow(self) -> pa.Table:
+    async def to_batches(
+        self, *, max_batch_length: Optional[int] = None
+    ) -> AsyncRecordBatchReader:
        fts_query = AsyncFTSQuery(self._inner.to_fts_query())
        vec_query = AsyncVectorQuery(self._inner.to_vector_query())

@@ -2173,7 +2278,7 @@ class AsyncHybridQuery(AsyncQueryBase):
            vec_query.to_arrow(),
        )

-        return LanceHybridQueryBuilder._combine_hybrid_results(
+        result = LanceHybridQueryBuilder._combine_hybrid_results(
            fts_results=fts_results,
            vector_results=vector_results,
            norm=self._norm,
@@ -2183,6 +2288,8 @@ class AsyncHybridQuery(AsyncQueryBase):
            with_row_ids=with_row_ids,
        )

+        return AsyncRecordBatchReader(result, max_batch_length=max_batch_length)
+
    async def explain_plan(self, verbose: Optional[bool] = False):
        """Return the execution plan for this query.

--- a/python/python/lancedb/rerankers/linear_combination.py
+++ b/python/python/lancedb/rerankers/linear_combination.py
@@ -11,6 +11,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

+from collections import defaultdict
 from numpy import nan
 import pyarrow as pa

@@ -95,43 +96,22 @@ class LinearCombinationReranker(Reranker):
                    pa.array([nan] * len(vector_results), type=pa.float32()),
                )
            return results
-
-        # sort both input tables on _rowid
-        combined_list = []
-        vector_list = vector_results.sort_by("_rowid").to_pylist()
-        fts_list = fts_results.sort_by("_rowid").to_pylist()
-        i, j = 0, 0
-        while i < len(vector_list):
-            if j >= len(fts_list):
-                for vi in vector_list[i:]:
-                    vi["_relevance_score"] = self._combine_score(vi["_distance"], fill)
-                    combined_list.append(vi)
-                break
-
-            vi = vector_list[i]
-            fj = fts_list[j]
-            # invert the fts score from relevance to distance
-            inverted_fts_score = self._invert_score(fj["_score"])
-            if vi["_rowid"] == fj["_rowid"]:
-                vi["_relevance_score"] = self._combine_score(
-                    vi["_distance"], inverted_fts_score
-                )
-                vi["_score"] = fj["_score"]  # keep the original score
-                combined_list.append(vi)
-                i += 1
-                j += 1
-            elif vector_list[i]["_rowid"] < fts_list[j]["_rowid"]:
-                vi["_relevance_score"] = self._combine_score(vi["_distance"], fill)
-                combined_list.append(vi)
-                i += 1
+        results = defaultdict()
+        for vector_result in vector_results.to_pylist():
+            results[vector_result["_rowid"]] = vector_result
+        for fts_result in fts_results.to_pylist():
+            row_id = fts_result["_rowid"]
+            if row_id in results:
+                results[row_id]["_score"] = fts_result["_score"]
            else:
-                fj["_relevance_score"] = self._combine_score(inverted_fts_score, fill)
-                combined_list.append(fj)
-                j += 1
-        if j < len(fts_list) - 1:
-            for fj in fts_list[j:]:
-                fj["_relevance_score"] = self._combine_score(inverted_fts_score, fill)
-                combined_list.append(fj)
+                results[row_id] = fts_result
+
+        combined_list = []
+        for row_id, result in results.items():
+            vector_score = self._invert_score(result.get("_distance", fill))
+            fts_score = result.get("_score", fill)
+            result["_relevance_score"] = self._combine_score(vector_score, fts_score)
+            combined_list.append(result)

        relevance_score_schema = pa.schema(
            [
@@ -148,10 +128,10 @@ class LinearCombinationReranker(Reranker):
            tbl = self._keep_relevance_score(tbl)
        return tbl

-    def _combine_score(self, score1, score2):
+    def _combine_score(self, vector_score, fts_score):
        # these scores represent distance
-        return 1 - (self.weight * score1 + (1 - self.weight) * score2)
+        return 1 - (self.weight * vector_score + (1 - self.weight) * fts_score)

-    def _invert_score(self, score: float):
+    def _invert_score(self, dist: float):
        # Invert the score between relevance and distance
-        return 1 - score
+        return 1 - dist
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -586,6 +586,26 @@ class Table(ABC):
        """
        raise NotImplementedError

+    def drop_index(self, name: str) -> None:
+        """
+        Drop an index from the table.
+
+        Parameters
+        ----------
+        name: str
+            The name of the index to drop.
+
+        Notes
+        -----
+        This does not delete the index from disk, it just removes it from the table.
+        To delete the index, run [optimize][lancedb.table.Table.optimize]
+        after dropping the index.
+
+        Use [list_indices][lancedb.table.Table.list_indices] to find the names of
+        the indices.
+        """
+        raise NotImplementedError
+
    @abstractmethod
    def create_scalar_index(
        self,
@@ -1594,6 +1614,9 @@ class LanceTable(Table):
            )
        )

+    def drop_index(self, name: str) -> None:
+        return LOOP.run(self._table.drop_index(name))
+
    def create_scalar_index(
        self,
        column: str,
@@ -2716,6 +2739,26 @@ class AsyncTable:
                add_note(e, help_msg)
            raise e

+    async def drop_index(self, name: str) -> None:
+        """
+        Drop an index from the table.
+
+        Parameters
+        ----------
+        name: str
+            The name of the index to drop.
+
+        Notes
+        -----
+        This does not delete the index from disk, it just removes it from the table.
+        To delete the index, run [optimize][lancedb.table.AsyncTable.optimize]
+        after dropping the index.
+
+        Use [list_indices][lancedb.table.AsyncTable.list_indices] to find the names
+        of the indices.
+        """
+        await self._inner.drop_index(name)
+
    async def add(
        self,
        data: DATA,
--- a/python/python/tests/docs/test_binary_vector.py
+++ b/python/python/tests/docs/test_binary_vector.py
@@ -3,6 +3,7 @@ import shutil
 # --8<-- [start:imports]
 import lancedb
 import numpy as np
+import pyarrow as pa
 import pytest
 # --8<-- [end:imports]

@@ -12,16 +13,32 @@ shutil.rmtree("data/binary_lancedb", ignore_errors=True)
 def test_binary_vector():
    # --8<-- [start:sync_binary_vector]
    db = lancedb.connect("data/binary_lancedb")
-    data = [
-        {
-            "id": i,
-            "vector": np.random.randint(0, 256, size=16),
-        }
-        for i in range(1024)
-    ]
-    tbl = db.create_table("my_binary_vectors", data=data)
-    query = np.random.randint(0, 256, size=16)
-    tbl.search(query).metric("hamming").to_arrow()
+    schema = pa.schema(
+        [
+            pa.field("id", pa.int64()),
+            # for dim=256, lance stores every 8 bits in a byte
+            # so the vector field should be a list of 256 / 8 = 32 bytes
+            pa.field("vector", pa.list_(pa.uint8(), 32)),
+        ]
+    )
+    tbl = db.create_table("my_binary_vectors", schema=schema)
+
+    data = []
+    for i in range(1024):
+        vector = np.random.randint(0, 2, size=256)
+        # pack the binary vector into bytes to save space
+        packed_vector = np.packbits(vector)
+        data.append(
+            {
+                "id": i,
+                "vector": packed_vector,
+            }
+        )
+    tbl.add(data)
+
+    query = np.random.randint(0, 2, size=256)
+    packed_query = np.packbits(query)
+    tbl.search(packed_query).distance_type("hamming").to_arrow()
    # --8<-- [end:sync_binary_vector]
    db.drop_table("my_binary_vectors")

@@ -30,15 +47,31 @@ def test_binary_vector():
 async def test_binary_vector_async():
    # --8<-- [start:async_binary_vector]
    db = await lancedb.connect_async("data/binary_lancedb")
-    data = [
-        {
-            "id": i,
-            "vector": np.random.randint(0, 256, size=16),
-        }
-        for i in range(1024)
-    ]
-    tbl = await db.create_table("my_binary_vectors", data=data)
-    query = np.random.randint(0, 256, size=16)
-    await tbl.query().nearest_to(query).distance_type("hamming").to_arrow()
+    schema = pa.schema(
+        [
+            pa.field("id", pa.int64()),
+            # for dim=256, lance stores every 8 bits in a byte
+            # so the vector field should be a list of 256 / 8 = 32 bytes
+            pa.field("vector", pa.list_(pa.uint8(), 32)),
+        ]
+    )
+    tbl = await db.create_table("my_binary_vectors", schema=schema)
+
+    data = []
+    for i in range(1024):
+        vector = np.random.randint(0, 2, size=256)
+        # pack the binary vector into bytes to save space
+        packed_vector = np.packbits(vector)
+        data.append(
+            {
+                "id": i,
+                "vector": packed_vector,
+            }
+        )
+    await tbl.add(data)
+
+    query = np.random.randint(0, 2, size=256)
+    packed_query = np.packbits(query)
+    await tbl.query().nearest_to(packed_query).distance_type("hamming").to_arrow()
    # --8<-- [end:async_binary_vector]
    await db.drop_table("my_binary_vectors")
--- a/python/python/tests/docs/test_multivector.py
+++ b/python/python/tests/docs/test_multivector.py
@@ -0,0 +1,80 @@
+import shutil
+from lancedb.index import IvfPq
+import pytest
+
+# --8<-- [start:imports]
+import lancedb
+import numpy as np
+import pyarrow as pa
+# --8<-- [end:imports]
+
+shutil.rmtree("data/multivector_demo", ignore_errors=True)
+
+
+def test_multivector():
+    # --8<-- [start:sync_multivector]
+    db = lancedb.connect("data/multivector_demo")
+    schema = pa.schema(
+        [
+            pa.field("id", pa.int64()),
+            # float16, float32, and float64 are supported
+            pa.field("vector", pa.list_(pa.list_(pa.float32(), 256))),
+        ]
+    )
+    data = [
+        {
+            "id": i,
+            "vector": np.random.random(size=(2, 256)).tolist(),
+        }
+        for i in range(1024)
+    ]
+    tbl = db.create_table("my_table", data=data, schema=schema)
+
+    # only cosine similarity is supported for multi-vectors
+    tbl.create_index(metric="cosine")
+
+    # query with single vector
+    query = np.random.random(256).astype(np.float16)
+    tbl.search(query).to_arrow()
+
+    # query with multiple vectors
+    query = np.random.random(size=(2, 256))
+    tbl.search(query).to_arrow()
+
+    # --8<-- [end:sync_multivector]
+    db.drop_table("my_table")
+
+
+@pytest.mark.asyncio
+async def test_multivector_async():
+    # --8<-- [start:async_multivector]
+    db = await lancedb.connect_async("data/multivector_demo")
+    schema = pa.schema(
+        [
+            pa.field("id", pa.int64()),
+            # float16, float32, and float64 are supported
+            pa.field("vector", pa.list_(pa.list_(pa.float32(), 256))),
+        ]
+    )
+    data = [
+        {
+            "id": i,
+            "vector": np.random.random(size=(2, 256)).tolist(),
+        }
+        for i in range(1024)
+    ]
+    tbl = await db.create_table("my_table", data=data, schema=schema)
+
+    # only cosine similarity is supported for multi-vectors
+    await tbl.create_index(column="vector", config=IvfPq(distance_type="cosine"))
+
+    # query with single vector
+    query = np.random.random(256)
+    await tbl.query().nearest_to(query).to_arrow()
+
+    # query with multiple vectors
+    query = np.random.random(size=(2, 256))
+    await tbl.query().nearest_to(query).to_arrow()
+
+    # --8<-- [end:async_multivector]
+    await db.drop_table("my_table")
--- a/python/python/tests/docs/test_search.py
+++ b/python/python/tests/docs/test_search.py
@@ -65,7 +65,7 @@ def test_vector_search():
    tbl.search(np.random.random((1536))).limit(10).to_list()
    # --8<-- [end:exhaustive_search]
    # --8<-- [start:exhaustive_search_cosine]
-    tbl.search(np.random.random((1536))).metric("cosine").limit(10).to_list()
+    tbl.search(np.random.random((1536))).distance_type("cosine").limit(10).to_list()
    # --8<-- [end:exhaustive_search_cosine]
    # --8<-- [start:create_table_with_nested_schema]
    # Let's add 100 sample rows to our dataset
--- a/python/python/tests/test_hybrid_query.py
+++ b/python/python/tests/test_hybrid_query.py
@@ -3,7 +3,9 @@

 import lancedb

+from lancedb.query import LanceHybridQueryBuilder
 import pyarrow as pa
+import pyarrow.compute as pc
 import pytest
 import pytest_asyncio

@@ -67,6 +69,7 @@ async def test_async_hybrid_query_filters(table: AsyncTable):
        .where("text not in ('a', 'dog')")
        .nearest_to([0.3, 0.3])
        .nearest_to_text("*a*")
+        .distance_type("l2")
        .limit(2)
        .to_arrow()
    )
@@ -109,3 +112,23 @@ async def test_explain_plan(table: AsyncTable):
    assert "KNNVectorDistance" in plan
    assert "FTS Search Plan" in plan
    assert "LanceScan" in plan
+
+
+def test_normalize_scores():
+    cases = [
+        (pa.array([0.1, 0.4]), pa.array([0.0, 1.0])),
+        (pa.array([2.0, 10.0, 20.0]), pa.array([0.0, 8.0 / 18.0, 1.0])),
+        (pa.array([0.0, 0.0, 0.0]), pa.array([0.0, 0.0, 0.0])),
+        (pa.array([10.0, 9.9999999999999]), pa.array([0.0, 0.0])),
+    ]
+
+    for input, expected in cases:
+        for invert in [True, False]:
+            result = LanceHybridQueryBuilder._normalize_scores(input, invert)
+
+            if invert:
+                expected = pc.subtract(1.0, expected)
+
+            assert pc.equal(
+                result, expected
+            ), f"Expected {expected} but got {result} for invert={invert}"
--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -80,6 +80,10 @@ async def test_create_scalar_index(some_table: AsyncTable):
    # can also specify index type
    await some_table.create_index("id", config=BTree())

+    await some_table.drop_index("id_idx")
+    indices = await some_table.list_indices()
+    assert len(indices) == 0
+

@pytest.mark.asyncio
 async def test_create_bitmap_index(some_table: AsyncTable):
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -7,6 +7,7 @@ from pathlib import Path

 import lancedb
 from lancedb.index import IvfPq, FTS
+from lancedb.rerankers.cross_encoder import CrossEncoderReranker
 import numpy as np
 import pandas.testing as tm
 import pyarrow as pa
@@ -69,7 +70,7 @@ async def table_struct_async(tmp_path) -> AsyncTable:


@pytest.fixture
-def multivec_table() -> lancedb.table.Table:
+def multivec_table(vector_value_type=pa.float32()) -> lancedb.table.Table:
    db = lancedb.connect("memory://")
    # Generate 256 rows of data
    num_rows = 256
@@ -85,7 +86,7 @@ def multivec_table() -> lancedb.table.Table:
    df = pa.table(
        {
            "vector": pa.array(
-                vector_data, type=pa.list_(pa.list_(pa.float32(), list_size=2))
+                vector_data, type=pa.list_(pa.list_(vector_value_type, list_size=2))
            ),
            "id": pa.array(id_data),
            "float_field": pa.array(float_field_data),
@@ -95,7 +96,7 @@ def multivec_table() -> lancedb.table.Table:


@pytest_asyncio.fixture
-async def multivec_table_async(tmp_path) -> AsyncTable:
+async def multivec_table_async(vector_value_type=pa.float32()) -> AsyncTable:
    conn = await lancedb.connect_async(
        "memory://", read_consistency_interval=timedelta(seconds=0)
    )
@@ -113,7 +114,7 @@ async def multivec_table_async(tmp_path) -> AsyncTable:
    df = pa.table(
        {
            "vector": pa.array(
-                vector_data, type=pa.list_(pa.list_(pa.float32(), list_size=2))
+                vector_data, type=pa.list_(pa.list_(vector_value_type, list_size=2))
            ),
            "id": pa.array(id_data),
            "float_field": pa.array(float_field_data),
@@ -231,6 +232,9 @@ async def test_distance_range_async(table_async: AsyncTable):
    assert res["_distance"].to_pylist() == [min_dist, max_dist]


+@pytest.mark.parametrize(
+    "multivec_table", [pa.float16(), pa.float32(), pa.float64()], indirect=True
+)
 def test_multivector(multivec_table: lancedb.table.Table):
    # create index on multivector
    multivec_table.create_index(
@@ -261,6 +265,9 @@ def test_multivector(multivec_table: lancedb.table.Table):


@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "multivec_table_async", [pa.float16(), pa.float32(), pa.float64()], indirect=True
+)
 async def test_multivector_async(multivec_table_async: AsyncTable):
    # create index on multivector
    await multivec_table_async.create_index(
@@ -370,14 +377,14 @@ def test_query_builder_with_metric(table):
    df_default = LanceVectorQueryBuilder(table, query, vector_column_name).to_pandas()
    df_l2 = (
        LanceVectorQueryBuilder(table, query, vector_column_name)
-        .metric("L2")
+        .distance_type("L2")
        .to_pandas()
    )
    tm.assert_frame_equal(df_default, df_l2)

    df_cosine = (
        LanceVectorQueryBuilder(table, query, vector_column_name)
-        .metric("cosine")
+        .distance_type("cosine")
        .limit(1)
        .to_pandas()
    )
@@ -394,7 +401,7 @@ def test_query_builder_with_different_vector_column():
    vector_column_name = "foo_vector"
    builder = (
        LanceVectorQueryBuilder(table, query, vector_column_name)
-        .metric("cosine")
+        .distance_type("cosine")
        .where("b < 10")
        .select(["b"])
        .limit(2)
@@ -509,15 +516,24 @@ async def test_query_async(table_async: AsyncTable):
        expected_columns=["id", "vector", "_rowid"],
    )

+
+@pytest.mark.asyncio
+@pytest.mark.slow
+async def test_query_reranked_async(table_async: AsyncTable):
    # FTS with rerank
    await table_async.create_index("text", config=FTS(with_position=False))
    await check_query(
-        table_async.query().nearest_to_text("dog").rerank(),
+        table_async.query().nearest_to_text("dog").rerank(CrossEncoderReranker()),
        expected_num_rows=1,
    )

    # Vector query with rerank
-    await check_query(table_async.vector_search([1, 2]).rerank(), expected_num_rows=2)
+    await check_query(
+        table_async.vector_search([1, 2]).rerank(
+            CrossEncoderReranker(), query_string="dog"
+        ),
+        expected_num_rows=2,
+    )


@pytest.mark.asyncio
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -366,7 +366,7 @@ def test_query_sync_maximal():
    with query_test_table(handler) as table:
        (
            table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
-            .metric("cosine")
+            .distance_type("cosine")
            .limit(42)
            .offset(10)
            .refine_factor(10)
--- a/python/python/tests/test_rerankers.py
+++ b/python/python/tests/test_rerankers.py
@@ -3,6 +3,8 @@ import random

 import lancedb
 import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
 import pytest
 from lancedb.conftest import MockTextEmbeddingFunction  # noqa
 from lancedb.embeddings import EmbeddingFunctionRegistry
@@ -281,6 +283,31 @@ def _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy):
@pytest.mark.parametrize("use_tantivy", [True, False])
 def test_linear_combination(tmp_path, use_tantivy):
    reranker = LinearCombinationReranker()
+
+    vector_results = pa.Table.from_pydict(
+        {
+            "_rowid": [0, 1, 2, 3, 4],
+            "_distance": [0.1, 0.2, 0.3, 0.4, 0.5],
+            "_text": ["a", "b", "c", "d", "e"],
+        }
+    )
+
+    fts_results = pa.Table.from_pydict(
+        {
+            "_rowid": [1, 2, 3, 4, 5],
+            "_score": [0.1, 0.2, 0.3, 0.4, 0.5],
+            "_text": ["b", "c", "d", "e", "f"],
+        }
+    )
+
+    combined_results = reranker.merge_results(vector_results, fts_results, 1.0)
+    assert len(combined_results) == 6
+    assert "_rowid" in combined_results.column_names
+    assert "_text" in combined_results.column_names
+    assert "_distance" not in combined_results.column_names
+    assert "_score" not in combined_results.column_names
+    assert "_relevance_score" in combined_results.column_names
+
    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)


@@ -290,6 +317,55 @@ def test_rrf_reranker(tmp_path, use_tantivy):
    _run_test_hybrid_reranker(reranker, tmp_path, use_tantivy)


+def test_rrf_reranker_distance():
+    data = pa.table(
+        {
+            "vector": pa.FixedSizeListArray.from_arrays(
+                pc.random(32 * 1024).cast(pa.float32()), 32
+            ),
+            "text": pa.array(["hello"] * 1024),
+        }
+    )
+    db = lancedb.connect("memory://")
+    table = db.create_table("test", data)
+
+    table.create_index(num_partitions=1, num_sub_vectors=2)
+    table.create_fts_index("text", use_tantivy=False)
+
+    reranker = RRFReranker(return_score="all")
+
+    hybrid_results = (
+        table.search(query_type="hybrid")
+        .vector([0.0] * 32)
+        .text("hello")
+        .with_row_id(True)
+        .rerank(reranker)
+        .to_list()
+    )
+    hybrid_distances = {row["_rowid"]: row["_distance"] for row in hybrid_results}
+    hybrid_scores = {row["_rowid"]: row["_score"] for row in hybrid_results}
+
+    vector_results = table.search([0.0] * 32).with_row_id(True).to_list()
+    vector_distances = {row["_rowid"]: row["_distance"] for row in vector_results}
+
+    fts_results = table.search("hello", query_type="fts").with_row_id(True).to_list()
+    fts_scores = {row["_rowid"]: row["_score"] for row in fts_results}
+
+    found_match = False
+    for rowid, distance in hybrid_distances.items():
+        if rowid in vector_distances:
+            found_match = True
+            assert distance == vector_distances[rowid], "Distance mismatch"
+    assert found_match, "No results matched between hybrid and vector search"
+
+    found_match = False
+    for rowid, score in hybrid_scores.items():
+        if rowid in fts_scores and fts_scores[rowid] is not None:
+            found_match = True
+            assert score == fts_scores[rowid], "Score mismatch"
+    assert found_match, "No results matched between hybrid and fts search"
+
+
@pytest.mark.skipif(
    os.environ.get("COHERE_API_KEY") is None, reason="COHERE_API_KEY not set"
 )
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -1008,6 +1008,10 @@ def test_create_scalar_index(mem_db: DBConnection):
    results = table.search([5, 5]).where("x != 'b'").to_arrow()
    assert results["_distance"][0].as_py() > 0

+    table.drop_index(scalar_index.name)
+    indices = table.list_indices()
+    assert len(indices) == 0
+

 def test_empty_query(mem_db: DBConnection):
    table = mem_db.create_table(
@@ -1238,7 +1242,9 @@ def test_hybrid_search_metric_type(tmp_db: DBConnection):

    # with custom metric
    result_dot = (
-        table.search("feeling lucky", query_type="hybrid").metric("dot").to_arrow()
+        table.search("feeling lucky", query_type="hybrid")
+        .distance_type("dot")
+        .to_arrow()
    )
    result_l2 = table.search("feeling lucky", query_type="hybrid").to_arrow()
    assert len(result_dot) > 0
--- a/python/src/arrow.rs
+++ b/python/src/arrow.rs
@@ -9,7 +9,10 @@ use arrow::{
 };
 use futures::stream::StreamExt;
 use lancedb::arrow::SendableRecordBatchStream;
-use pyo3::{pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult, Python};
+use pyo3::{
+    exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
+    Python,
+};
 use pyo3_async_runtimes::tokio::future_into_py;

 use crate::error::PythonErrorExt;
@@ -32,20 +35,25 @@ impl RecordBatchStream {

 #[pymethods]
 impl RecordBatchStream {
+    #[getter]
    pub fn schema(&self, py: Python) -> PyResult<PyObject> {
        (*self.schema).clone().into_pyarrow(py)
    }

-    pub fn next(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
+    pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
+        self_
+    }
+
+    pub fn __anext__(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner.clone();
        future_into_py(self_.py(), async move {
-            let inner_next = inner.lock().await.next().await;
-            inner_next
-                .map(|item| {
-                    let item = item.infer_error()?;
-                    Python::with_gil(|py| item.to_pyarrow(py))
-                })
-                .transpose()
+            let inner_next = inner
+                .lock()
+                .await
+                .next()
+                .await
+                .ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
+            Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
        })
    }
 }
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -21,7 +21,7 @@ use pyo3::{
    types::{PyModule, PyModuleMethods},
    wrap_pyfunction, Bound, PyResult, Python,
 };
-use query::{Query, VectorQuery};
+use query::{FTSQuery, HybridQuery, Query, VectorQuery};
 use table::Table;

 pub mod arrow;
@@ -42,6 +42,8 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<Table>()?;
    m.add_class::<IndexConfig>()?;
    m.add_class::<Query>()?;
+    m.add_class::<FTSQuery>()?;
+    m.add_class::<HybridQuery>()?;
    m.add_class::<VectorQuery>()?;
    m.add_class::<RecordBatchStream>()?;
    m.add_function(wrap_pyfunction!(connect, m)?)?;
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -194,6 +194,14 @@ impl Table {
        })
    }

+    pub fn drop_index(self_: PyRef<'_, Self>, index_name: String) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move {
+            inner.drop_index(&index_name).await.infer_error()?;
+            Ok(())
+        })
+    }
+
    pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.15.0-beta.0"
+version = "0.15.1-beta.0"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.15.0-beta.0"
+version = "0.15.1-beta.0"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -6,7 +6,7 @@ use crate::index::IndexStatistics;
 use crate::query::Select;
 use crate::table::AddDataMode;
 use crate::utils::{supported_btree_data_type, supported_vector_data_type};
-use crate::{Error, Table};
+use crate::{DistanceType, Error, Table};
 use arrow_array::RecordBatchReader;
 use arrow_ipc::reader::FileReader;
 use arrow_schema::{DataType, SchemaRef};
@@ -592,7 +592,7 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
                        message: format!("Column {} not found in schema", column),
                    })?;
                if supported_vector_data_type(field.data_type()) {
-                    ("IVF_PQ", None)
+                    ("IVF_PQ", Some(DistanceType::L2))
                } else if supported_btree_data_type(field.data_type()) {
                    ("BTREE", None)
                } else {
@@ -816,6 +816,14 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {

        Ok(Some(stats))
    }
+
+    /// Not yet supported on LanceDB Cloud.
+    async fn drop_index(&self, _name: &str) -> Result<()> {
+        Err(Error::NotSupported {
+            message: "Drop index is not yet supported on LanceDB Cloud.".into(),
+        })
+    }
+
    async fn table_definition(&self) -> Result<TableDefinition> {
        Err(Error::NotSupported {
            message: "table_definition is not supported on LanceDB cloud.".into(),
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -41,6 +41,7 @@ use lance::dataset::{
    WriteParams,
 };
 use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
+use lance::index::vector::utils::infer_vector_dim;
 use lance::io::WrappingObjectStore;
 use lance_datafusion::exec::execute_plan;
 use lance_index::vector::hnsw::builder::HnswBuildParams;
@@ -410,6 +411,7 @@ pub(crate) trait TableInternal: std::fmt::Display + std::fmt::Debug + Send + Syn
    async fn update(&self, update: UpdateBuilder) -> Result<u64>;
    async fn create_index(&self, index: IndexBuilder) -> Result<()>;
    async fn list_indices(&self) -> Result<Vec<IndexConfig>>;
+    async fn drop_index(&self, name: &str) -> Result<()>;
    async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>>;
    async fn merge_insert(
        &self,
@@ -984,6 +986,18 @@ impl Table {
        self.inner.index_stats(index_name.as_ref()).await
    }

+    /// Drop an index from the table.
+    ///
+    /// Note: This is not yet available in LanceDB cloud.
+    ///
+    /// This does not delete the index from disk, it just removes it from the table.
+    /// To delete the index, run [`Self::optimize()`] after dropping the index.
+    ///
+    /// Use [`Self::list_indices()`] to find the names of the indices.
+    pub async fn drop_index(&self, name: &str) -> Result<()> {
+        self.inner.drop_index(name).await
+    }
+
    // Take many execution plans and map them into a single plan that adds
    // a query_index column and unions them.
    pub(crate) fn multi_vector_plan(
@@ -1370,14 +1384,8 @@ impl NativeTable {
        let num_sub_vectors: u32 = if let Some(n) = index.num_sub_vectors {
            n
        } else {
-            match field.data_type() {
-                arrow_schema::DataType::FixedSizeList(_, n) => {
-                    Ok::<u32, Error>(suggested_num_sub_vectors(*n as u32))
-                }
-                _ => Err(Error::Schema {
-                    message: format!("Column '{}' is not a FixedSizeList", field.name()),
-                }),
-            }?
+            let dim = infer_vector_dim(field.data_type())?;
+            suggested_num_sub_vectors(dim as u32)
        };
        let mut dataset = self.dataset.get_mut().await?;
        let lance_idx_params = lance::index::vector::VectorIndexParams::ivf_pq(
@@ -1734,6 +1742,12 @@ impl NativeTable {
    }

    /// Update field metadata
+    ///
+    /// # Arguments:
+    /// * `new_values` - An iterator of tuples where the first element is the
+    ///   field id and the second element is a hashmap of metadata key-value
+    ///   pairs.
+    ///
    pub async fn replace_field_metadata(
        &self,
        new_values: impl IntoIterator<Item = (u32, HashMap<String, String>)>,
@@ -1877,6 +1891,12 @@ impl TableInternal for NativeTable {
        }
    }

+    async fn drop_index(&self, index_name: &str) -> Result<()> {
+        let mut dataset = self.dataset.get_mut().await?;
+        dataset.drop_index(index_name).await?;
+        Ok(())
+    }
+
    async fn update(&self, update: UpdateBuilder) -> Result<u64> {
        let dataset = self.dataset.get().await?.clone();
        let mut builder = LanceUpdateBuilder::new(Arc::new(dataset));
@@ -2903,6 +2923,9 @@ mod tests {
        assert_eq!(stats.num_unindexed_rows, 0);
        assert_eq!(stats.index_type, crate::index::IndexType::IvfPq);
        assert_eq!(stats.distance_type, Some(crate::DistanceType::L2));
+
+        table.drop_index(index_name).await.unwrap();
+        assert_eq!(table.list_indices().await.unwrap().len(), 0);
    }

    #[tokio::test]
@@ -3513,11 +3536,10 @@ mod tests {
            .unwrap();

        let native_tbl = table.as_native().unwrap();
-        let schema = native_tbl.schema().await.unwrap();
+        let schema = native_tbl.manifest().await.unwrap().schema;

-        let (field_idx, field) = schema.column_with_name("i").unwrap();
-        let field_metadata = field.metadata();
-        assert_eq!(field_metadata.len(), 0);
+        let field = schema.field("i").unwrap();
+        assert_eq!(field.metadata.len(), 0);

        native_tbl
            .replace_schema_metadata(vec![(
@@ -3538,16 +3560,15 @@ mod tests {
        let mut new_field_metadata = HashMap::<String, String>::new();
        new_field_metadata.insert("test_field_key1".into(), "test_field_val1".into());
        native_tbl
-            .replace_field_metadata(vec![(field_idx as u32, new_field_metadata)])
+            .replace_field_metadata(vec![(field.id as u32, new_field_metadata)])
            .await
            .unwrap();

-        let schema = native_tbl.schema().await.unwrap();
-        let (_field_idx, field) = schema.column_with_name("i").unwrap();
-        let field_metadata = field.metadata();
-        assert_eq!(field_metadata.len(), 1);
+        let schema = native_tbl.manifest().await.unwrap().schema;
+        let field = schema.field("i").unwrap();
+        assert_eq!(field.metadata.len(), 1);
        assert_eq!(
-            field_metadata.get("test_field_key1"),
+            field.metadata.get("test_field_key1"),
            Some(&"test_field_val1".to_string())
        );
    }
--- a/rust/lancedb/src/utils.rs
+++ b/rust/lancedb/src/utils.rs
@@ -17,6 +17,7 @@ use std::sync::Arc;
 use arrow_schema::{DataType, Schema};
 use lance::arrow::json::JsonDataType;
 use lance::dataset::{ReadParams, WriteParams};
+use lance::index::vector::utils::infer_vector_dim;
 use lance::io::{ObjectStoreParams, WrappingObjectStore};
 use lazy_static::lazy_static;

@@ -104,12 +105,12 @@ pub fn validate_table_name(name: &str) -> Result<()> {

 /// Find one default column to create index or perform vector query.
 pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result<String> {
-    // Try to find one fixed size list array column.
+    // Try to find a vector column.
    let candidates = schema
        .fields()
        .iter()
-        .filter_map(|field| match inf_vector_dim(field) {
-            Some(d) if dim.is_none() || dim == Some(d) => Some(field.name()),
+        .filter_map(|field| match infer_vector_dim(field.data_type()) {
+            Ok(d) if dim.is_none() || dim == Some(d as i32) => Some(field.name()),
            _ => None,
        })
        .collect::<Vec<_>>();
@@ -133,20 +134,6 @@ pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result
    }
 }

-fn inf_vector_dim(field: &arrow_schema::Field) -> Option<i32> {
-    match field.data_type() {
-        arrow_schema::DataType::FixedSizeList(f, d) => {
-            if f.data_type().is_floating() || f.data_type() == &DataType::UInt8 {
-                Some(*d)
-            } else {
-                None
-            }
-        }
-        arrow_schema::DataType::List(f) => inf_vector_dim(f),
-        _ => None,
-    }
-}
-
 pub fn supported_btree_data_type(dtype: &DataType) -> bool {
    dtype.is_integer()
        || dtype.is_floating()
Author	SHA1	Message	Date
Lance Release	a9897d9d85	Bump version: 0.18.1-beta.1 → 0.18.1-beta.2	2025-01-28 22:31:14 +00:00
Will Jones	acda7a4589	feat: upgrade lance to v0.23.0-beta.3 (#2074 ) This includes several bugfixes for `merge_insert` and null handling in vector search. https://github.com/lancedb/lance/releases/tag/v0.23.0-beta.3	2025-01-28 14:00:06 -08:00
Vaibhav	dac0857745	feat: add `distance_type()` parameter to python sync query builders and `metric()` as an alias (#2073 ) This PR aims to fix #2047 by doing the following things: - Add a distance_type parameter to the sync query builders of Python SDK. - Make metric an alias to distance_type.	2025-01-28 13:59:53 -08:00
Will Jones	0a9e1eab75	fix(node): `createTable()` should save embeddings, and `mergeInsert` should use them (#2065 ) * `createTable()` now saves embeddings in the schema metadata. Previously, it would drop them. (`createEmptyTable()` was already tested and worked.) * `mergeInsert()` now uses embeddings. Fixes #2066	2025-01-28 12:38:50 -08:00
V	d999d72c8d	docs: pandas example (#2044 ) Fix example for section ## From pandas DataFrame	2025-01-24 11:37:47 -08:00
Lance Release	de4720993e	Updating package-lock.json	2025-01-23 23:02:20 +00:00
Lance Release	6c14a307e2	Updating package-lock.json	2025-01-23 23:02:03 +00:00
Lance Release	43747278c8	Bump version: 0.15.0 → 0.15.1-beta.0	2025-01-23 23:01:40 +00:00
Lance Release	e5f42a850e	Bump version: 0.18.1-beta.0 → 0.18.1-beta.1	2025-01-23 23:01:13 +00:00
Will Jones	7920ecf66e	ci(python): stop using deprecated 2_24 manylinux for arm (#2064 ) Based on changes made in Lance: * https://github.com/lancedb/lance/pull/3409 * https://github.com/lancedb/lance/pull/3411	2025-01-23 15:00:34 -08:00
Will Jones	28e1b70e4b	fix(python): preserve original distance and score in hybrid queries (#2061 ) Fixes #2031 When we do hybrid search, we normalize the scores. We do this calculation in-place, because the Rerankers expect the `_distance` and `_score` columns to be the normalized ones. So I've changed the logic so that we restore the original distance and scores by matching on row ids.	2025-01-23 13:54:26 -08:00
Will Jones	52b79d2b1e	feat: upgrade lance to v0.23.0-beta.2 (#2063 ) Fixes https://github.com/lancedb/lancedb/issues/2043	2025-01-23 13:51:30 -08:00
Bert	c05d45150d	docs: clarify the arguments for `replace_field_metadata` (#2053 ) When calling `replace_field_metadata` we pass in an iter of tuples `(u32, HashMap<String, String>)`. That `u32` needs to be the field id from the lance schema `7f60aa0a87/rust/lance-core/src/datatypes/field.rs (L123)` This can sometimes be different than the index of the field in the arrow schema (e.g. if fields have been dropped). This PR adds docs that try to clarify what that argument should be, as well as corrects the usage in the test (which was improperly passing the index of the arrow schema).	2025-01-23 08:52:27 -05:00
BubbleCal	48ed3bb544	chore: replace the util to lance's (#2052 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-01-23 11:04:37 +08:00
Will Jones	bcfc93cc88	fix(python): various fixes for async query builders (#2048 ) This includes several improvements and fixes to the Python Async query builders: 1. The API reference docs show all the methods for each builder 2. The hybrid query builder now has all the same setter methods as the vector search one, so you can now set things like `.distance_type()` on a hybrid query. 3. Re-rankers are now properly hooked up and tested for FTS and vector search. Previously the re-rankers were accidentally bypassed in unit tests, because the builders overrode `.to_arrow()`, but the unit test called `.to_batches()` which was only defined in the base class. Now all builders implement `.to_batches()` and leave `.to_arrow()` to the base class. 4. The `AsyncQueryBase` and `AsyncVectoryQueryBase` setter methods now return `Self`, which provides the appropriate subclass as the type hint return value. Previously, `AsyncQueryBase` had them all hard-coded to `AsyncQuery`, which was unfortunate. (This required bringing in `typing-extensions` for older Python version, but I think it's worth it.)	2025-01-20 16:14:34 -08:00
BubbleCal	214d0debf5	docs: claim LanceDB supports float16/float32/float64 for multivector (#2040 )	2025-01-21 07:04:15 +08:00
Will Jones	f059372137	feat: add `drop_index()` method (#2039 ) Closes #1665	2025-01-20 10:08:51 -08:00
Lance Release	3dc1803c07	Bump version: 0.18.0 → 0.18.1-beta.0	2025-01-17 04:37:23 +00:00
BubbleCal	d0501f65f1	fix: linear reranker applies wrong score to combine (#2035 ) related to #2014 this fixes: - linear reranker may lost some results if the merging consumes all vector results earlier than fts results - linear reranker inverts the fts score but only vector distance can be inverted --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-01-17 11:33:48 +08:00
Bert	4703cc6894	chore: upgrade lance to v0.22.1-beta.3 (#2038 )	2025-01-16 12:42:42 -05:00
BubbleCal	493f9ce467	fix: can't infer the vector column for multivector (#2026 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-01-16 14:08:04 +08:00
Weston Pace	5c759505b8	feat: upgrade lance 0.22.1b1 (#2029 ) Now the version actually exists :)	2025-01-15 07:37:37 -08:00
BubbleCal	bb6a39727e	fix: missing distance type for auto index on RemoteTable (#2027 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-01-15 20:28:55 +08:00
BubbleCal	d57bed90e5	docs: add missing example code (#2025 )	2025-01-14 21:17:05 -08:00
BubbleCal	648327e90c	docs: show how to pack bits for binary vector (#2020 ) Signed-off-by: BubbleCal <bubble-cal@outlook.com>	2025-01-14 09:00:57 -08:00
Lance Release	6c7e81ee57	Updating package-lock.json	2025-01-14 02:14:37 +00:00
Lance Release	905e9d4738	Updating package-lock.json	2025-01-14 01:03:49 +00:00
Lance Release	38642e349c	Updating package-lock.json	2025-01-14 01:03:33 +00:00
Lance Release	6879861ea8	Bump version: 0.15.0-beta.1 → 0.15.0	2025-01-14 01:03:04 +00:00
Lance Release	88325e488e	Bump version: 0.15.0-beta.0 → 0.15.0-beta.1	2025-01-14 01:02:59 +00:00