Bump version: 0.22.1-beta.6 → 0.22.1

Bump version: 0.22.1-beta.5 → 0.22.1-beta.6
chore: use stable lance (#2398 )
2025-12-23 05:19:58 +00:00 · 2025-05-22 05:57:20 +00:00 · 2025-05-22 05:57:20 +00:00 · 2025-05-21 22:34:29 -07:00 · 2025-05-21 14:15:55 -07:00 · 2025-05-20 01:40:12 +05:30
89 changed files with 5916 additions and 1150 deletions
--- a/.bumpversion.toml
+++ b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.19.0-beta.7"
+current_version = "0.19.1-beta.5"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -18,17 +18,24 @@ concurrency:
  group: "pages"
  cancel-in-progress: true

+env:
+  # This reduces the disk space needed for the build
+  RUSTFLAGS: "-C debuginfo=0"
+  # according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
+  # CI builds are faster with incremental disabled.
+  CARGO_INCREMENTAL: "0"
+
 jobs:
  # Single deploy job since we're just deploying
  build:
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
-    runs-on: buildjet-8vcpu-ubuntu-2204
+    runs-on: ubuntu-24.04
    steps:
      - name: Checkout
        uses: actions/checkout@v4
-      - name: Install dependecies needed for ubuntu
+      - name: Install dependencies needed for ubuntu
        run: |
          sudo apt install -y protobuf-compiler libssl-dev
          rustup update && rustup default
@@ -38,6 +45,7 @@ jobs:
          python-version: "3.10"
          cache: "pip"
          cache-dependency-path: "docs/requirements.txt"
+      - uses: Swatinem/rust-cache@v2
      - name: Build Python
        working-directory: python
        run: |
@@ -49,7 +57,6 @@ jobs:
          node-version: 20
          cache: 'npm'
          cache-dependency-path: node/package-lock.json
-      - uses: Swatinem/rust-cache@v2
      - name: Install node dependencies
        working-directory: node
        run: |
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -136,9 +136,9 @@ jobs:
      - uses: ./.github/workflows/run_tests
        with:
          integration: true
-      - name: Test without pylance
+      - name: Test without pylance or pandas
        run: |
-          pip uninstall -y pylance
+          pip uninstall -y pylance pandas
          pytest -vv python/tests/test_table.py
      # Make sure wheels are not included in the Rust cache
      - name: Delete wheels
@@ -228,6 +228,7 @@ jobs:
      - name: Install lancedb
        run: |
          pip install "pydantic<2"
+          pip install pyarrow==16
          pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
          pip install tantivy
      - name: Run tests
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -40,6 +40,9 @@ jobs:
        with:
          fetch-depth: 0
          lfs: true
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          components: rustfmt, clippy
      - uses: Swatinem/rust-cache@v2
        with:
          workspaces: rust
@@ -160,8 +163,8 @@ jobs:
    strategy:
      matrix:
        target:
-        - x86_64-pc-windows-msvc
-        - aarch64-pc-windows-msvc
+          - x86_64-pc-windows-msvc
+          - aarch64-pc-windows-msvc
    defaults:
      run:
        working-directory: rust/lancedb
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,16 +21,14 @@ categories = ["database-implementations"]
 rust-version = "1.78.0"

 [workspace.dependencies]
-lance = { "version" = "=0.26.0", "features" = [
-    "dynamodb",
-], tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
-lance-io = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
-lance-index = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
-lance-linalg = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
-lance-table = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
-lance-testing = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
-lance-datafusion = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
-lance-encoding = { version = "=0.26.0", tag = "v0.26.0-beta.1", git = "https://github.com/lancedb/lance" }
+lance = { "version" = "=0.27.2", "features" = ["dynamodb"] }
+lance-io = { version = "=0.27.2" }
+lance-index = { version = "=0.27.2" }
+lance-linalg = { version = "=0.27.2" }
+lance-table = { version = "=0.27.2" }
+lance-testing = { version = "=0.27.2" }
+lance-datafusion = { version = "=0.27.2" }
+lance-encoding = { version = "=0.27.2" }
 # Note that this one does not include pyarrow
 arrow = { version = "54.1", optional = false }
 arrow-array = "54.1"
@@ -63,15 +61,12 @@ rand = "0.8"
 regex = "1.10"
 lazy_static = "1"
 semver = "1.0.25"
-
 # Temporary pins to work around downstream issues
 # https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
 chrono = "=0.4.39"
 # https://github.com/RustCrypto/formats/issues/1684
 base64ct = "=1.6.0"
-
 # Workaround for: https://github.com/eira-fransham/crunchy/issues/13
 crunchy = "=0.2.2"
-
 # Workaround for: https://github.com/Lokathor/bytemuck/issues/306
 bytemuck_derive = ">=1.8.1, <1.9.0"
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -205,6 +205,7 @@ nav:
          - PromptTools: integrations/prompttools.md
          - dlt: integrations/dlt.md
          - phidata: integrations/phidata.md
+          - Genkit: integrations/genkit.md
      - 🎯 Examples:
          - Overview: examples/index.md
          - 🐍 Python:
@@ -331,6 +332,7 @@ nav:
      - PromptTools: integrations/prompttools.md
      - dlt: integrations/dlt.md
      - phidata: integrations/phidata.md
+      - Genkit: integrations/genkit.md
  - Examples:
      - examples/index.md
      - 🐍 Python:
--- a/docs/src/guides/tables.md
+++ b/docs/src/guides/tables.md
@@ -765,7 +765,10 @@ This can be used to update zero to all rows depending on how many rows match the
        ];
        const tbl = await db.createTable("my_table", data)

-        await tbl.update({vector: [10, 10]}, { where: "x = 2"})
+        await tbl.update({ 
+            values: { vector: [10, 10] },
+            where: "x = 2"
+        });
        ```

    === "vectordb (deprecated)"
@@ -784,7 +787,10 @@ This can be used to update zero to all rows depending on how many rows match the
        ];
        const tbl = await db.createTable("my_table", data)

-        await tbl.update({ where: "x = 2", values: {vector: [10, 10]} })
+        await tbl.update({ 
+            where: "x = 2", 
+            values: { vector: [10, 10] } 
+        });
        ```

 #### Updating using a sql query
--- a/docs/src/integrations/genkit.md
+++ b/docs/src/integrations/genkit.md
@@ -0,0 +1,183 @@
+### genkitx-lancedb
+This is a lancedb plugin for genkit framework. It allows you to use LanceDB for ingesting and rereiving data using genkit framework.
+
+![integration-banner-genkit](https://github.com/user-attachments/assets/a6cc28af-98e9-4425-b87c-7ab139bd7893)
+
+### Installation
+```bash
+pnpm install genkitx-lancedb
+```
+
+### Usage
+
+Adding LanceDB plugin to your genkit instance.
+
+```ts
+import { lancedbIndexerRef, lancedb, lancedbRetrieverRef, WriteMode } from 'genkitx-lancedb';
+import { textEmbedding004, vertexAI } from '@genkit-ai/vertexai';
+import { gemini } from '@genkit-ai/vertexai';
+import { z, genkit } from 'genkit';
+import { Document } from 'genkit/retriever';
+import { chunk } from 'llm-chunk';
+import { readFile } from 'fs/promises';
+import path from 'path';
+import pdf from 'pdf-parse/lib/pdf-parse';
+
+const ai = genkit({
+  plugins: [
+    // vertexAI provides the textEmbedding004 embedder
+    vertexAI(),
+
+    // the local vector store requires an embedder to translate from text to vector
+    lancedb([
+      {
+        dbUri: '.db', // optional lancedb uri, default to .db
+        tableName: 'table', // optional table name, default to table
+        embedder: textEmbedding004,
+      },
+    ]),
+  ],
+});
+```
+
+You can run this app with the following command:
+```bash
+genkit start -- tsx --watch src/index.ts
+```
+
+This'll add LanceDB as a retriever and indexer to the genkit instance. You can see it in the GUI view
+<img width="1710" alt="Screenshot 2025-05-11 at 7 21 05 PM" src="https://github.com/user-attachments/assets/e752f7f4-785b-4797-a11e-72ab06a531b7" />
+
+**Testing retrieval on a sample table**
+Let's see the raw retrieval results
+
+<img width="1710" alt="Screenshot 2025-05-11 at 7 21 05 PM" src="https://github.com/user-attachments/assets/b8d356ed-8421-4790-8fc0-d6af563b9657" />
+On running this query, you'll 5 results fetched from the lancedb table, where each result looks something like this:
+<img width="1417" alt="Screenshot 2025-05-11 at 7 21 18 PM" src="https://github.com/user-attachments/assets/77429525-36e2-4da6-a694-e58c1cf9eb83" />
+
+
+
+## Creating a custom RAG flow
+
+Now that we've seen how you can use LanceDB for in a genkit pipeline, let's refine the flow and create a RAG. A RAG flow will consist of an index and a retreiver with its outputs postprocessed an fed into an LLM for final response
+
+### Creating custom indexer flows
+You can also create custom indexer flows, utilizing more options and features provided by LanceDB.
+
+```ts
+export const menuPdfIndexer = lancedbIndexerRef({
+   // Using all defaults, for dbUri, tableName, and embedder, etc
+});
+
+const chunkingConfig = {
+  minLength: 1000,
+  maxLength: 2000,
+  splitter: 'sentence',
+  overlap: 100,
+  delimiters: '',
+} as any;
+
+
+async function extractTextFromPdf(filePath: string) {
+  const pdfFile = path.resolve(filePath);
+  const dataBuffer = await readFile(pdfFile);
+  const data = await pdf(dataBuffer);
+  return data.text;
+}
+
+export const indexMenu = ai.defineFlow(
+  {
+    name: 'indexMenu',
+    inputSchema: z.string().describe('PDF file path'),
+    outputSchema: z.void(),
+  },
+  async (filePath: string) => {
+    filePath = path.resolve(filePath);
+
+    // Read the pdf.
+    const pdfTxt = await ai.run('extract-text', () =>
+      extractTextFromPdf(filePath)
+    );
+
+    // Divide the pdf text into segments.
+    const chunks = await ai.run('chunk-it', async () =>
+      chunk(pdfTxt, chunkingConfig)
+    );
+
+    // Convert chunks of text into documents to store in the index.
+    const documents = chunks.map((text) => {
+      return Document.fromText(text, { filePath });
+    });
+
+    // Add documents to the index.
+    await ai.index({
+      indexer: menuPdfIndexer,
+      documents,
+      options: {
+        writeMode: WriteMode.Overwrite,
+      } as any
+    });
+  }
+);
+```
+
+<img width="1316" alt="Screenshot 2025-05-11 at 8 35 56 PM" src="https://github.com/user-attachments/assets/e2a20ce4-d1d0-4fa2-9a84-f2cc26e3a29f" />
+
+In your console, you can see the logs
+
+<img width="511" alt="Screenshot 2025-05-11 at 7 19 14 PM" src="https://github.com/user-attachments/assets/243f26c5-ed38-40b6-b661-002f40f0423a" />
+
+### Creating custom retriever flows
+You can also create custom retriever flows, utilizing more options and features provided by LanceDB.
+```ts
+export const menuRetriever = lancedbRetrieverRef({
+  tableName: "table", // Use the same table name as the indexer.
+  displayName: "Menu", // Use a custom display name.
+
+export const menuQAFlow = ai.defineFlow(
+  { name: "Menu", inputSchema: z.string(), outputSchema: z.string() },
+  async (input: string) => {
+    // retrieve relevant documents
+    const docs = await ai.retrieve({
+      retriever: menuRetriever,
+      query: input,
+      options: { 
+        k: 3,
+      },
+    });
+
+    const extractedContent = docs.map(doc => {
+      if (doc.content && Array.isArray(doc.content) && doc.content.length > 0) {
+        if (doc.content[0].media && doc.content[0].media.url) {
+          return doc.content[0].media.url;
+        }
+      }
+      return "No content found";
+    });
+
+    console.log("Extracted content:", extractedContent);
+
+    const { text } = await ai.generate({
+      model: gemini('gemini-2.0-flash'),
+      prompt: `
+You are acting as a helpful AI assistant that can answer 
+questions about the food available on the menu at Genkit Grub Pub.
+
+Use only the context provided to answer the question.
+If you don't know, do not make up an answer.
+Do not add or change items on the menu.
+
+Context:
+${extractedContent.join('\n\n')}
+
+Question: ${input}`,
+      docs,
+    });
+    
+    return text;
+  }
+);
+```
+Now using our retrieval flow, we can ask question about the ingsted PDF
+<img width="1306" alt="Screenshot 2025-05-11 at 7 18 45 PM" src="https://github.com/user-attachments/assets/86c66b13-7c12-4d5f-9d81-ae36bfb1c346" />
+
--- a/docs/src/js/classes/MergeInsertBuilder.md
+++ b/docs/src/js/classes/MergeInsertBuilder.md
@@ -33,20 +33,22 @@ Construct a MergeInsertBuilder. __Internal use only.__
 ### execute()

 ```ts
-execute(data): Promise<void>
+execute(data, execOptions?): Promise<MergeResult>
 ```

 Executes the merge insert operation

-Nothing is returned but the `Table` is updated
-
 #### Parameters

 * **data**: [`Data`](../type-aliases/Data.md)

+* **execOptions?**: `Partial`&lt;[`WriteExecutionOptions`](../interfaces/WriteExecutionOptions.md)&gt;
+
 #### Returns

-`Promise`&lt;`void`&gt;
+`Promise`&lt;[`MergeResult`](../interfaces/MergeResult.md)&gt;
+
+the merge result

 ***

--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -40,7 +40,7 @@ Returns the name of the table
 ### add()

 ```ts
-abstract add(data, options?): Promise<void>
+abstract add(data, options?): Promise<AddResult>
 ```

 Insert records into this Table.
@@ -54,14 +54,17 @@ Insert records into this Table.

 #### Returns

-`Promise`&lt;`void`&gt;
+`Promise`&lt;[`AddResult`](../interfaces/AddResult.md)&gt;
+
+A promise that resolves to an object
+containing the new version number of the table

 ***

 ### addColumns()

 ```ts
-abstract addColumns(newColumnTransforms): Promise<void>
+abstract addColumns(newColumnTransforms): Promise<AddColumnsResult>
 ```

 Add new columns with defined values.
@@ -76,14 +79,17 @@ Add new columns with defined values.

 #### Returns

-`Promise`&lt;`void`&gt;
+`Promise`&lt;[`AddColumnsResult`](../interfaces/AddColumnsResult.md)&gt;
+
+A promise that resolves to an object
+containing the new version number of the table after adding the columns.

 ***

 ### alterColumns()

 ```ts
-abstract alterColumns(columnAlterations): Promise<void>
+abstract alterColumns(columnAlterations): Promise<AlterColumnsResult>
 ```

 Alter the name or nullability of columns.
@@ -96,7 +102,10 @@ Alter the name or nullability of columns.

 #### Returns

-`Promise`&lt;`void`&gt;
+`Promise`&lt;[`AlterColumnsResult`](../interfaces/AlterColumnsResult.md)&gt;
+
+A promise that resolves to an object
+containing the new version number of the table after altering the columns.

 ***

@@ -117,8 +126,8 @@ wish to return to standard mode, call `checkoutLatest`.

 #### Parameters

-* **version**: `number`
-    The version to checkout
+* **version**: `string` \| `number`
+    The version to checkout, could be version number or tag

 #### Returns

@@ -252,7 +261,7 @@ await table.createIndex("my_float_col");
 ### delete()

 ```ts
-abstract delete(predicate): Promise<void>
+abstract delete(predicate): Promise<DeleteResult>
 ```

 Delete the rows that satisfy the predicate.
@@ -263,7 +272,10 @@ Delete the rows that satisfy the predicate.

 #### Returns

-`Promise`&lt;`void`&gt;
+`Promise`&lt;[`DeleteResult`](../interfaces/DeleteResult.md)&gt;
+
+A promise that resolves to an object
+containing the new version number of the table

 ***

@@ -284,7 +296,7 @@ Return a brief description of the table
 ### dropColumns()

 ```ts
-abstract dropColumns(columnNames): Promise<void>
+abstract dropColumns(columnNames): Promise<DropColumnsResult>
 ```

 Drop one or more columns from the dataset
@@ -303,7 +315,10 @@ then call ``cleanup_files`` to remove the old files.

 #### Returns

-`Promise`&lt;`void`&gt;
+`Promise`&lt;[`DropColumnsResult`](../interfaces/DropColumnsResult.md)&gt;
+
+A promise that resolves to an object
+containing the new version number of the table after dropping the columns.

 ***

@@ -615,6 +630,50 @@ of the given query

 ***

+### stats()
+
+```ts
+abstract stats(): Promise<TableStatistics>
+```
+
+Returns table and fragment statistics
+
+#### Returns
+
+`Promise`&lt;[`TableStatistics`](../interfaces/TableStatistics.md)&gt;
+
+The table and fragment statistics
+
+***
+
+### tags()
+
+```ts
+abstract tags(): Promise<Tags>
+```
+
+Get a tags manager for this table.
+
+Tags allow you to label specific versions of a table with a human-readable name.
+The returned tags manager can be used to list, create, update, or delete tags.
+
+#### Returns
+
+`Promise`&lt;[`Tags`](Tags.md)&gt;
+
+A tags manager for this table
+
+#### Example
+
+```typescript
+const tagsManager = await table.tags();
+await tagsManager.create("v1", 1);
+const tags = await tagsManager.list();
+console.log(tags); // { "v1": { version: 1, manifestSize: ... } }
+```
+
+***
+
 ### toArrow()

 ```ts
@@ -634,7 +693,7 @@ Return the table as an arrow table
 #### update(opts)

 ```ts
-abstract update(opts): Promise<void>
+abstract update(opts): Promise<UpdateResult>
 ```

 Update existing records in the Table
@@ -645,7 +704,10 @@ Update existing records in the Table

 ##### Returns

-`Promise`&lt;`void`&gt;
+`Promise`&lt;[`UpdateResult`](../interfaces/UpdateResult.md)&gt;
+
+A promise that resolves to an object containing
+the number of rows updated and the new version number

 ##### Example

@@ -656,7 +718,7 @@ table.update({where:"x = 2", values:{"vector": [10, 10]}})
 #### update(opts)

 ```ts
-abstract update(opts): Promise<void>
+abstract update(opts): Promise<UpdateResult>
 ```

 Update existing records in the Table
@@ -667,7 +729,10 @@ Update existing records in the Table

 ##### Returns

-`Promise`&lt;`void`&gt;
+`Promise`&lt;[`UpdateResult`](../interfaces/UpdateResult.md)&gt;
+
+A promise that resolves to an object containing
+the number of rows updated and the new version number

 ##### Example

@@ -678,7 +743,7 @@ table.update({where:"x = 2", valuesSql:{"x": "x + 1"}})
 #### update(updates, options)

 ```ts
-abstract update(updates, options?): Promise<void>
+abstract update(updates, options?): Promise<UpdateResult>
 ```

 Update existing records in the Table
@@ -701,10 +766,6 @@ repeatedly calilng this method.
 * **updates**: `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
    the
    columns to update
-    Keys in the map should specify the name of the column to update.
-    Values in the map provide the new value of the column.  These can
-    be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
-    based on the row being updated (e.g. "my_col + 1")

 * **options?**: `Partial`&lt;[`UpdateOptions`](../interfaces/UpdateOptions.md)&gt;
    additional options to control
@@ -712,7 +773,15 @@ repeatedly calilng this method.

 ##### Returns

-`Promise`&lt;`void`&gt;
+`Promise`&lt;[`UpdateResult`](../interfaces/UpdateResult.md)&gt;
+
+A promise that resolves to an object
+containing the number of rows updated and the new version number
+
+Keys in the map should specify the name of the column to update.
+Values in the map provide the new value of the column.  These can
+be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
+based on the row being updated (e.g. "my_col + 1")

 ***

@@ -753,3 +822,26 @@ Retrieve the version of the table
 #### Returns

 `Promise`&lt;`number`&gt;
+
+***
+
+### waitForIndex()
+
+```ts
+abstract waitForIndex(indexNames, timeoutSeconds): Promise<void>
+```
+
+Waits for asynchronous indexing to complete on the table.
+
+#### Parameters
+
+* **indexNames**: `string`[]
+    The name of the indices to wait for
+
+* **timeoutSeconds**: `number`
+    The number of seconds to wait before timing out
+    This will raise an error if the indices are not created and fully indexed within the timeout.
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
--- a/docs/src/js/classes/TagContents.md
+++ b/docs/src/js/classes/TagContents.md
@@ -0,0 +1,35 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / TagContents
+
+# Class: TagContents
+
+## Constructors
+
+### new TagContents()
+
+```ts
+new TagContents(): TagContents
+```
+
+#### Returns
+
+[`TagContents`](TagContents.md)
+
+## Properties
+
+### manifestSize
+
+```ts
+manifestSize: number;
+```
+
+***
+
+### version
+
+```ts
+version: number;
+```
--- a/docs/src/js/classes/Tags.md
+++ b/docs/src/js/classes/Tags.md
@@ -0,0 +1,99 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / Tags
+
+# Class: Tags
+
+## Constructors
+
+### new Tags()
+
+```ts
+new Tags(): Tags
+```
+
+#### Returns
+
+[`Tags`](Tags.md)
+
+## Methods
+
+### create()
+
+```ts
+create(tag, version): Promise<void>
+```
+
+#### Parameters
+
+* **tag**: `string`
+
+* **version**: `number`
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
+
+***
+
+### delete()
+
+```ts
+delete(tag): Promise<void>
+```
+
+#### Parameters
+
+* **tag**: `string`
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
+
+***
+
+### getVersion()
+
+```ts
+getVersion(tag): Promise<number>
+```
+
+#### Parameters
+
+* **tag**: `string`
+
+#### Returns
+
+`Promise`&lt;`number`&gt;
+
+***
+
+### list()
+
+```ts
+list(): Promise<Record<string, TagContents>>
+```
+
+#### Returns
+
+`Promise`&lt;`Record`&lt;`string`, [`TagContents`](TagContents.md)&gt;&gt;
+
+***
+
+### update()
+
+```ts
+update(tag, version): Promise<void>
+```
+
+#### Parameters
+
+* **tag**: `string`
+
+* **version**: `number`
+
+#### Returns
+
+`Promise`&lt;`void`&gt;
--- a/docs/src/js/globals.md
+++ b/docs/src/js/globals.md
@@ -27,19 +27,28 @@
 - [QueryBase](classes/QueryBase.md)
 - [RecordBatchIterator](classes/RecordBatchIterator.md)
 - [Table](classes/Table.md)
+- [TagContents](classes/TagContents.md)
+- [Tags](classes/Tags.md)
 - [VectorColumnOptions](classes/VectorColumnOptions.md)
 - [VectorQuery](classes/VectorQuery.md)

 ## Interfaces

+- [AddColumnsResult](interfaces/AddColumnsResult.md)
 - [AddColumnsSql](interfaces/AddColumnsSql.md)
 - [AddDataOptions](interfaces/AddDataOptions.md)
+- [AddResult](interfaces/AddResult.md)
+- [AlterColumnsResult](interfaces/AlterColumnsResult.md)
 - [ClientConfig](interfaces/ClientConfig.md)
 - [ColumnAlteration](interfaces/ColumnAlteration.md)
 - [CompactionStats](interfaces/CompactionStats.md)
 - [ConnectionOptions](interfaces/ConnectionOptions.md)
 - [CreateTableOptions](interfaces/CreateTableOptions.md)
+- [DeleteResult](interfaces/DeleteResult.md)
+- [DropColumnsResult](interfaces/DropColumnsResult.md)
 - [ExecutableQuery](interfaces/ExecutableQuery.md)
+- [FragmentStatistics](interfaces/FragmentStatistics.md)
+- [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
 - [FtsOptions](interfaces/FtsOptions.md)
 - [FullTextQuery](interfaces/FullTextQuery.md)
 - [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
@@ -50,6 +59,7 @@
 - [IndexStatistics](interfaces/IndexStatistics.md)
 - [IvfFlatOptions](interfaces/IvfFlatOptions.md)
 - [IvfPqOptions](interfaces/IvfPqOptions.md)
+- [MergeResult](interfaces/MergeResult.md)
 - [OpenTableOptions](interfaces/OpenTableOptions.md)
 - [OptimizeOptions](interfaces/OptimizeOptions.md)
 - [OptimizeStats](interfaces/OptimizeStats.md)
@@ -57,9 +67,12 @@
 - [RemovalStats](interfaces/RemovalStats.md)
 - [RetryConfig](interfaces/RetryConfig.md)
 - [TableNamesOptions](interfaces/TableNamesOptions.md)
+- [TableStatistics](interfaces/TableStatistics.md)
 - [TimeoutConfig](interfaces/TimeoutConfig.md)
 - [UpdateOptions](interfaces/UpdateOptions.md)
+- [UpdateResult](interfaces/UpdateResult.md)
 - [Version](interfaces/Version.md)
+- [WriteExecutionOptions](interfaces/WriteExecutionOptions.md)

 ## Type Aliases

--- a/docs/src/js/interfaces/AddColumnsResult.md
+++ b/docs/src/js/interfaces/AddColumnsResult.md
@@ -0,0 +1,15 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / AddColumnsResult
+
+# Interface: AddColumnsResult
+
+## Properties
+
+### version
+
+```ts
+version: number;
+```
--- a/docs/src/js/interfaces/AddResult.md
+++ b/docs/src/js/interfaces/AddResult.md
@@ -0,0 +1,15 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / AddResult
+
+# Interface: AddResult
+
+## Properties
+
+### version
+
+```ts
+version: number;
+```
--- a/docs/src/js/interfaces/AlterColumnsResult.md
+++ b/docs/src/js/interfaces/AlterColumnsResult.md
@@ -0,0 +1,15 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / AlterColumnsResult
+
+# Interface: AlterColumnsResult
+
+## Properties
+
+### version
+
+```ts
+version: number;
+```
--- a/docs/src/js/interfaces/DeleteResult.md
+++ b/docs/src/js/interfaces/DeleteResult.md
@@ -0,0 +1,15 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / DeleteResult
+
+# Interface: DeleteResult
+
+## Properties
+
+### version
+
+```ts
+version: number;
+```
--- a/docs/src/js/interfaces/DropColumnsResult.md
+++ b/docs/src/js/interfaces/DropColumnsResult.md
@@ -0,0 +1,15 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / DropColumnsResult
+
+# Interface: DropColumnsResult
+
+## Properties
+
+### version
+
+```ts
+version: number;
+```
--- a/docs/src/js/interfaces/FragmentStatistics.md
+++ b/docs/src/js/interfaces/FragmentStatistics.md
@@ -0,0 +1,37 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / FragmentStatistics
+
+# Interface: FragmentStatistics
+
+## Properties
+
+### lengths
+
+```ts
+lengths: FragmentSummaryStats;
+```
+
+Statistics on the number of rows in the table fragments
+
+***
+
+### numFragments
+
+```ts
+numFragments: number;
+```
+
+The number of fragments in the table
+
+***
+
+### numSmallFragments
+
+```ts
+numSmallFragments: number;
+```
+
+The number of uncompacted fragments in the table
--- a/docs/src/js/interfaces/FragmentSummaryStats.md
+++ b/docs/src/js/interfaces/FragmentSummaryStats.md
@@ -0,0 +1,77 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / FragmentSummaryStats
+
+# Interface: FragmentSummaryStats
+
+## Properties
+
+### max
+
+```ts
+max: number;
+```
+
+The number of rows in the fragment with the most rows
+
+***
+
+### mean
+
+```ts
+mean: number;
+```
+
+The mean number of rows in the fragments
+
+***
+
+### min
+
+```ts
+min: number;
+```
+
+The number of rows in the fragment with the fewest rows
+
+***
+
+### p25
+
+```ts
+p25: number;
+```
+
+The 25th percentile of number of rows in the fragments
+
+***
+
+### p50
+
+```ts
+p50: number;
+```
+
+The 50th percentile of number of rows in the fragments
+
+***
+
+### p75
+
+```ts
+p75: number;
+```
+
+The 75th percentile of number of rows in the fragments
+
+***
+
+### p99
+
+```ts
+p99: number;
+```
+
+The 99th percentile of number of rows in the fragments
--- a/docs/src/js/interfaces/IndexOptions.md
+++ b/docs/src/js/interfaces/IndexOptions.md
@@ -39,3 +39,11 @@ and the same name, then an error will be returned.  This is true even if
 that index is out of date.

 The default is true
+
+***
+
+### waitTimeoutSeconds?
+
+```ts
+optional waitTimeoutSeconds: number;
+```
--- a/docs/src/js/interfaces/MergeResult.md
+++ b/docs/src/js/interfaces/MergeResult.md
@@ -0,0 +1,39 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / MergeResult
+
+# Interface: MergeResult
+
+## Properties
+
+### numDeletedRows
+
+```ts
+numDeletedRows: number;
+```
+
+***
+
+### numInsertedRows
+
+```ts
+numInsertedRows: number;
+```
+
+***
+
+### numUpdatedRows
+
+```ts
+numUpdatedRows: number;
+```
+
+***
+
+### version
+
+```ts
+version: number;
+```
--- a/docs/src/js/interfaces/TableStatistics.md
+++ b/docs/src/js/interfaces/TableStatistics.md
@@ -0,0 +1,47 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / TableStatistics
+
+# Interface: TableStatistics
+
+## Properties
+
+### fragmentStats
+
+```ts
+fragmentStats: FragmentStatistics;
+```
+
+Statistics on table fragments
+
+***
+
+### numIndices
+
+```ts
+numIndices: number;
+```
+
+The number of indices in the table
+
+***
+
+### numRows
+
+```ts
+numRows: number;
+```
+
+The number of rows in the table
+
+***
+
+### totalBytes
+
+```ts
+totalBytes: number;
+```
+
+The total number of bytes in the table
--- a/docs/src/js/interfaces/UpdateResult.md
+++ b/docs/src/js/interfaces/UpdateResult.md
@@ -0,0 +1,23 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / UpdateResult
+
+# Interface: UpdateResult
+
+## Properties
+
+### rowsUpdated
+
+```ts
+rowsUpdated: number;
+```
+
+***
+
+### version
+
+```ts
+version: number;
+```
--- a/docs/src/js/interfaces/WriteExecutionOptions.md
+++ b/docs/src/js/interfaces/WriteExecutionOptions.md
@@ -0,0 +1,26 @@
+[**@lancedb/lancedb**](../README.md) • **Docs**
+
+***
+
+[@lancedb/lancedb](../globals.md) / WriteExecutionOptions
+
+# Interface: WriteExecutionOptions
+
+## Properties
+
+### timeoutMs?
+
+```ts
+optional timeoutMs: number;
+```
+
+Maximum time to run the operation before cancelling it.
+
+By default, there is a 30-second timeout that is only enforced after the
+first attempt. This is to prevent spending too long retrying to resolve
+conflicts. For example, if a write attempt takes 20 seconds and fails,
+the second attempt will be cancelled after 10 seconds, hitting the
+30-second timeout. However, a write that takes one hour and succeeds on the
+first attempt will not be cancelled.
+
+When this is set, the timeout is enforced on all attempts, including the first.
--- a/java/core/pom.xml
+++ b/java/core/pom.xml
@@ -8,7 +8,7 @@
    <parent>
        <groupId>com.lancedb</groupId>
        <artifactId>lancedb-parent</artifactId>
-        <version>0.19.0-beta.7</version>
+        <version>0.19.1-beta.5</version>
        <relativePath>../pom.xml</relativePath>
    </parent>

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -6,7 +6,7 @@

    <groupId>com.lancedb</groupId>
    <artifactId>lancedb-parent</artifactId>
-    <version>0.19.0-beta.7</version>
+    <version>0.19.1-beta.5</version>
    <packaging>pom</packaging>

    <name>LanceDB Parent</name>
--- a/node/package-lock.json
+++ b/node/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "vectordb",
-  "version": "0.19.0-beta.7",
+  "version": "0.19.1-beta.5",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "vectordb",
-      "version": "0.19.0-beta.7",
+      "version": "0.19.1-beta.5",
      "cpu": [
        "x64",
        "arm64"
@@ -52,11 +52,11 @@
        "uuid": "^9.0.0"
      },
      "optionalDependencies": {
-        "@lancedb/vectordb-darwin-arm64": "0.19.0-beta.7",
-        "@lancedb/vectordb-darwin-x64": "0.19.0-beta.7",
-        "@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.7",
-        "@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.7",
-        "@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.7"
+        "@lancedb/vectordb-darwin-arm64": "0.19.1-beta.5",
+        "@lancedb/vectordb-darwin-x64": "0.19.1-beta.5",
+        "@lancedb/vectordb-linux-arm64-gnu": "0.19.1-beta.5",
+        "@lancedb/vectordb-linux-x64-gnu": "0.19.1-beta.5",
+        "@lancedb/vectordb-win32-x64-msvc": "0.19.1-beta.5"
      },
      "peerDependencies": {
        "@apache-arrow/ts": "^14.0.2",
@@ -327,9 +327,9 @@
      }
    },
    "node_modules/@lancedb/vectordb-darwin-arm64": {
-      "version": "0.19.0-beta.7",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.19.0-beta.7.tgz",
-      "integrity": "sha512-HpbVKw4Vs+mPv7uPwaK7ilJlGrGdjOrNlC2mSkMCj0OlEwGRVcEcrSyijI7LXQH7ybEgNnDhSds5TuzBV26SGg==",
+      "version": "0.19.1-beta.5",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.19.1-beta.5.tgz",
+      "integrity": "sha512-9WcTw67We5HYGayDt5jFquGoyAVzFSt/I65ag8+q7H9q4ZYKxeDhgNyQZJ8BmXEvbJtnYtYBSAtTEdFKYMce6w==",
      "cpu": [
        "arm64"
      ],
@@ -340,9 +340,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-darwin-x64": {
-      "version": "0.19.0-beta.7",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.19.0-beta.7.tgz",
-      "integrity": "sha512-x3X7nqIYVZtxaa0uZUk/M99vKvDinZ5G0+8k2NqZ696YXGWKGyRxR6k8ZzKYCoCTSuYXnBftgKoIlwJGtNt8Bw==",
+      "version": "0.19.1-beta.5",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.19.1-beta.5.tgz",
+      "integrity": "sha512-6Pe3PxEMi0VKGsu5R7IhOxTijUM3b5olRAqhxfcu5ti34gXIPNtu7g+T9lS78LKe+0D0v2BjZEY/JQakIFBNRw==",
      "cpu": [
        "x64"
      ],
@@ -353,9 +353,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
-      "version": "0.19.0-beta.7",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.19.0-beta.7.tgz",
-      "integrity": "sha512-Vwj0HI3+b4NgXKf+5+W/GfLBCGoQMBGM47vA/ts1dpe/PxraOQYPDv67I5kbXkCQKwhal7b0iZx/PbMu0JZPyw==",
+      "version": "0.19.1-beta.5",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.19.1-beta.5.tgz",
+      "integrity": "sha512-VJbBd+Y+6L2SREaOO1OzuUfTPHXyHE4AcsZuM6VMyoeX8k7lPnaA+vNk96o0w4V2KFEAI6o4QPgrRAXmMAzmbg==",
      "cpu": [
        "arm64"
      ],
@@ -366,9 +366,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-linux-x64-gnu": {
-      "version": "0.19.0-beta.7",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.19.0-beta.7.tgz",
-      "integrity": "sha512-Dx2B6UWQei9D7Rt+MgHWqPTYtEK2w3EgsNb5ENEWUTZxH7lD/CV7Sw0JMK5LDG209fFcpXFerveF6J8ZC8uGBQ==",
+      "version": "0.19.1-beta.5",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.19.1-beta.5.tgz",
+      "integrity": "sha512-3wS8Zn5NmHoszXfrY4JzMimHoh5LAmVi3pTX4gD+C9kVGoUJcDBP7/CrAbjnAz7VzzAIPmz8kvBuPz8l9X4hjw==",
      "cpu": [
        "x64"
      ],
@@ -379,9 +379,9 @@
      ]
    },
    "node_modules/@lancedb/vectordb-win32-x64-msvc": {
-      "version": "0.19.0-beta.7",
-      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.19.0-beta.7.tgz",
-      "integrity": "sha512-F5LZGa+gkUH1TgsWZWLLAMejwXFIWdash7+85ip4k2M0ThyqLF/dtlldOvteUEd5+flxihGjHg6TUtnSY8XBFA==",
+      "version": "0.19.1-beta.5",
+      "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.19.1-beta.5.tgz",
+      "integrity": "sha512-TemM9cvrPa2jFCjvYmKnrL0DTHegi/+LOQ3No9nPDHie2ka2fM9O2q60fAbYsYz+Mo9aV7MvL49ATbNCyl9MLA==",
      "cpu": [
        "x64"
      ],
--- a/node/package.json
+++ b/node/package.json
@@ -1,6 +1,6 @@
 {
  "name": "vectordb",
-  "version": "0.19.0-beta.7",
+  "version": "0.19.1-beta.5",
  "description": " Serverless, low-latency vector database for AI applications",
  "private": false,
  "main": "dist/index.js",
@@ -89,10 +89,10 @@
    }
  },
  "optionalDependencies": {
-    "@lancedb/vectordb-darwin-x64": "0.19.0-beta.7",
-    "@lancedb/vectordb-darwin-arm64": "0.19.0-beta.7",
-    "@lancedb/vectordb-linux-x64-gnu": "0.19.0-beta.7",
-    "@lancedb/vectordb-linux-arm64-gnu": "0.19.0-beta.7",
-    "@lancedb/vectordb-win32-x64-msvc": "0.19.0-beta.7"
+    "@lancedb/vectordb-darwin-x64": "0.19.1-beta.5",
+    "@lancedb/vectordb-darwin-arm64": "0.19.1-beta.5",
+    "@lancedb/vectordb-linux-x64-gnu": "0.19.1-beta.5",
+    "@lancedb/vectordb-linux-arm64-gnu": "0.19.1-beta.5",
+    "@lancedb/vectordb-win32-x64-msvc": "0.19.1-beta.5"
  }
 }
--- a/nodejs/Cargo.toml
+++ b/nodejs/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "lancedb-nodejs"
 edition.workspace = true
-version = "0.19.0-beta.7"
+version = "0.19.1-beta.5"
 license.workspace = true
 description.workspace = true
 repository.workspace = true
@@ -28,6 +28,9 @@ napi-derive = "2.16.4"
 lzma-sys = { version = "*", features = ["static"] }
 log.workspace = true

+# Workaround for build failure until we can fix it.
+aws-lc-sys = "=0.28.0"
+
 [build-dependencies]
 napi-build = "2.1"

--- a/nodejs/test/arrow.test.ts
+++ b/nodejs/test/arrow.test.ts
@@ -374,6 +374,71 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
        expect(table2.numRows).toBe(4);
        expect(table2.schema).toEqual(schema);
      });
+
+      it("should correctly retain values in nested struct fields", async function () {
+        // Define test data with nested struct
+        const testData = [
+          {
+            id: "doc1",
+            vector: [1, 2, 3],
+            metadata: {
+              filePath: "/path/to/file1.ts",
+              startLine: 10,
+              endLine: 20,
+              text: "function test() { return true; }",
+            },
+          },
+          {
+            id: "doc2",
+            vector: [4, 5, 6],
+            metadata: {
+              filePath: "/path/to/file2.ts",
+              startLine: 30,
+              endLine: 40,
+              text: "function test2() { return false; }",
+            },
+          },
+        ];
+
+        // Create Arrow table from the data
+        const table = makeArrowTable(testData);
+
+        // Verify schema has the nested struct fields
+        const metadataField = table.schema.fields.find(
+          (f) => f.name === "metadata",
+        );
+        expect(metadataField).toBeDefined();
+        // biome-ignore lint/suspicious/noExplicitAny: accessing fields in different Arrow versions
+        const childNames = metadataField?.type.children.map((c: any) => c.name);
+        expect(childNames).toEqual([
+          "filePath",
+          "startLine",
+          "endLine",
+          "text",
+        ]);
+
+        // Convert to buffer and back (simulating storage and retrieval)
+        const buf = await fromTableToBuffer(table);
+        const retrievedTable = tableFromIPC(buf);
+
+        // Verify the retrieved table has the same structure
+        const rows = [];
+        for (let i = 0; i < retrievedTable.numRows; i++) {
+          rows.push(retrievedTable.get(i));
+        }
+
+        // Check values in the first row
+        const firstRow = rows[0];
+        expect(firstRow.id).toBe("doc1");
+        expect(firstRow.vector.toJSON()).toEqual([1, 2, 3]);
+
+        // Verify metadata values are preserved (this is where the bug is)
+        expect(firstRow.metadata).toBeDefined();
+        expect(firstRow.metadata.filePath).toBe("/path/to/file1.ts");
+        expect(firstRow.metadata.startLine).toBe(10);
+        expect(firstRow.metadata.endLine).toBe(20);
+        expect(firstRow.metadata.text).toBe("function test() { return true; }");
+      });
    });

    class DummyEmbedding extends EmbeddingFunction<string> {
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -34,6 +34,7 @@ import {
 } from "../lancedb/embedding";
 import { Index } from "../lancedb/indices";
 import { instanceOfFullTextQuery } from "../lancedb/query";
+import exp = require("constants");

 describe.each([arrow15, arrow16, arrow17, arrow18])(
  "Given a table",
@@ -71,8 +72,33 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      await expect(table.countRows()).resolves.toBe(3);
    });

-    it("should overwrite data if asked", async () => {
+    it("should show table stats", async () => {
      await table.add([{ id: 1 }, { id: 2 }]);
+      await table.add([{ id: 1 }]);
+      await expect(table.stats()).resolves.toEqual({
+        fragmentStats: {
+          lengths: {
+            max: 2,
+            mean: 1,
+            min: 1,
+            p25: 1,
+            p50: 2,
+            p75: 2,
+            p99: 2,
+          },
+          numFragments: 2,
+          numSmallFragments: 2,
+        },
+        numIndices: 0,
+        numRows: 3,
+        totalBytes: 24,
+      });
+    });
+
+    it("should overwrite data if asked", async () => {
+      const addRes = await table.add([{ id: 1 }, { id: 2 }]);
+      expect(addRes).toHaveProperty("version");
+      expect(addRes.version).toBe(2);
      await table.add([{ id: 1 }], { mode: "overwrite" });
      await expect(table.countRows()).resolves.toBe(1);
    });
@@ -88,7 +114,11 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
      await table.add([{ id: 1 }]);
      expect(await table.countRows("id == 1")).toBe(1);
      expect(await table.countRows("id == 7")).toBe(0);
-      await table.update({ id: "7" });
+      const updateRes = await table.update({ id: "7" });
+      expect(updateRes).toHaveProperty("version");
+      expect(updateRes.version).toBe(3);
+      expect(updateRes).toHaveProperty("rowsUpdated");
+      expect(updateRes.rowsUpdated).toBe(1);
      expect(await table.countRows("id == 1")).toBe(0);
      expect(await table.countRows("id == 7")).toBe(1);
      await table.add([{ id: 2 }]);
@@ -315,11 +345,17 @@ describe("merge insert", () => {
      { a: 3, b: "y" },
      { a: 4, b: "z" },
    ];
-    await table
+    const mergeInsertRes = await table
      .mergeInsert("a")
      .whenMatchedUpdateAll()
      .whenNotMatchedInsertAll()
-      .execute(newData);
+      .execute(newData, { timeoutMs: 10_000 });
+    expect(mergeInsertRes).toHaveProperty("version");
+    expect(mergeInsertRes.version).toBe(2);
+    expect(mergeInsertRes.numInsertedRows).toBe(1);
+    expect(mergeInsertRes.numUpdatedRows).toBe(2);
+    expect(mergeInsertRes.numDeletedRows).toBe(0);
+
    const expected = [
      { a: 1, b: "a" },
      { a: 2, b: "x" },
@@ -337,10 +373,12 @@ describe("merge insert", () => {
      { a: 3, b: "y" },
      { a: 4, b: "z" },
    ];
-    await table
+    const mergeInsertRes = await table
      .mergeInsert("a")
      .whenMatchedUpdateAll({ where: "target.b = 'b'" })
      .execute(newData);
+    expect(mergeInsertRes).toHaveProperty("version");
+    expect(mergeInsertRes.version).toBe(2);

    const expected = [
      { a: 1, b: "a" },
@@ -425,6 +463,20 @@ describe("merge insert", () => {
    res = res.sort((a, b) => a.a - b.a);
    expect(res).toEqual(expected);
  });
+
+  test("timeout", async () => {
+    const newData = [
+      { a: 2, b: "x" },
+      { a: 4, b: "z" },
+    ];
+    await expect(
+      table
+        .mergeInsert("a")
+        .whenMatchedUpdateAll()
+        .whenNotMatchedInsertAll()
+        .execute(newData, { timeoutMs: 0 }),
+    ).rejects.toThrow("merge insert timed out");
+  });
 });

 describe("When creating an index", () => {
@@ -507,6 +559,15 @@ describe("When creating an index", () => {
    expect(indices2.length).toBe(0);
  });

+  it("should wait for index readiness", async () => {
+    // Create an index and then wait for it to be ready
+    await tbl.createIndex("vec");
+    const indices = await tbl.listIndices();
+    expect(indices.length).toBeGreaterThan(0);
+    const idxName = indices[0].name;
+    await expect(tbl.waitForIndex([idxName], 5)).resolves.toBeUndefined();
+  });
+
  it("should search with distance range", async () => {
    await tbl.createIndex("vec");

@@ -824,6 +885,7 @@ describe("When creating an index", () => {
    // Only build index over v1
    await tbl.createIndex("vec", {
      config: Index.ivfPq({ numPartitions: 2, numSubVectors: 2 }),
+      waitTimeoutSeconds: 30,
    });

    const rst = await tbl
@@ -990,15 +1052,19 @@ describe("schema evolution", function () {
      { id: 1n, vector: [0.1, 0.2] },
    ]);
    // Can create a non-nullable column only through addColumns at the moment.
-    await table.addColumns([
+    const addColumnsRes = await table.addColumns([
      { name: "price", valueSql: "cast(10.0 as double)" },
    ]);
+    expect(addColumnsRes).toHaveProperty("version");
+    expect(addColumnsRes.version).toBe(2);
    expect(await table.schema()).toEqual(schema);

-    await table.alterColumns([
+    const alterColumnsRes = await table.alterColumns([
      { path: "id", rename: "new_id" },
      { path: "price", nullable: true },
    ]);
+    expect(alterColumnsRes).toHaveProperty("version");
+    expect(alterColumnsRes.version).toBe(3);

    const expectedSchema = new Schema([
      new Field("new_id", new Int64(), true),
@@ -1116,7 +1182,9 @@ describe("schema evolution", function () {
    const table = await con.createTable("vectors", [
      { id: 1n, vector: [0.1, 0.2] },
    ]);
-    await table.dropColumns(["vector"]);
+    const dropColumnsRes = await table.dropColumns(["vector"]);
+    expect(dropColumnsRes).toHaveProperty("version");
+    expect(dropColumnsRes.version).toBe(2);

    const expectedSchema = new Schema([new Field("id", new Int64(), true)]);
    expect(await table.schema()).toEqual(expectedSchema);
@@ -1168,6 +1236,99 @@ describe("when dealing with versioning", () => {
  });
 });

+describe("when dealing with tags", () => {
+  let tmpDir: tmp.DirResult;
+  beforeEach(() => {
+    tmpDir = tmp.dirSync({ unsafeCleanup: true });
+  });
+  afterEach(() => {
+    tmpDir.removeCallback();
+  });
+
+  it("can manage tags", async () => {
+    const conn = await connect(tmpDir.name, {
+      readConsistencyInterval: 0,
+    });
+
+    const table = await conn.createTable("my_table", [
+      { id: 1n, vector: [0.1, 0.2] },
+    ]);
+    expect(await table.version()).toBe(1);
+
+    await table.add([{ id: 2n, vector: [0.3, 0.4] }]);
+    expect(await table.version()).toBe(2);
+
+    const tagsManager = await table.tags();
+
+    const initialTags = await tagsManager.list();
+    expect(Object.keys(initialTags).length).toBe(0);
+
+    const tag1 = "tag1";
+    await tagsManager.create(tag1, 1);
+    expect(await tagsManager.getVersion(tag1)).toBe(1);
+
+    const tagsAfterFirst = await tagsManager.list();
+    expect(Object.keys(tagsAfterFirst).length).toBe(1);
+    expect(tagsAfterFirst).toHaveProperty(tag1);
+    expect(tagsAfterFirst[tag1].version).toBe(1);
+
+    await tagsManager.create("tag2", 2);
+    expect(await tagsManager.getVersion("tag2")).toBe(2);
+
+    const tagsAfterSecond = await tagsManager.list();
+    expect(Object.keys(tagsAfterSecond).length).toBe(2);
+    expect(tagsAfterSecond).toHaveProperty(tag1);
+    expect(tagsAfterSecond[tag1].version).toBe(1);
+    expect(tagsAfterSecond).toHaveProperty("tag2");
+    expect(tagsAfterSecond["tag2"].version).toBe(2);
+
+    await table.add([{ id: 3n, vector: [0.5, 0.6] }]);
+    await tagsManager.update(tag1, 3);
+    expect(await tagsManager.getVersion(tag1)).toBe(3);
+
+    await tagsManager.delete("tag2");
+    const tagsAfterDelete = await tagsManager.list();
+    expect(Object.keys(tagsAfterDelete).length).toBe(1);
+    expect(tagsAfterDelete).toHaveProperty(tag1);
+    expect(tagsAfterDelete[tag1].version).toBe(3);
+
+    await table.add([{ id: 4n, vector: [0.7, 0.8] }]);
+    expect(await table.version()).toBe(4);
+
+    await table.checkout(tag1);
+    expect(await table.version()).toBe(3);
+
+    await table.checkoutLatest();
+    expect(await table.version()).toBe(4);
+  });
+
+  it("can checkout and restore tags", async () => {
+    const conn = await connect(tmpDir.name, {
+      readConsistencyInterval: 0,
+    });
+
+    const table = await conn.createTable("my_table", [
+      { id: 1n, vector: [0.1, 0.2] },
+    ]);
+    expect(await table.version()).toBe(1);
+    expect(await table.countRows()).toBe(1);
+    const tagsManager = await table.tags();
+    const tag1 = "tag1";
+    await tagsManager.create(tag1, 1);
+    await table.add([{ id: 2n, vector: [0.3, 0.4] }]);
+    const tag2 = "tag2";
+    await tagsManager.create(tag2, 2);
+    expect(await table.version()).toBe(2);
+    await table.checkout(tag1);
+    expect(await table.version()).toBe(1);
+    await table.restore();
+    expect(await table.version()).toBe(3);
+    expect(await table.countRows()).toBe(1);
+    await table.add([{ id: 3n, vector: [0.5, 0.6] }]);
+    expect(await table.countRows()).toBe(2);
+  });
+});
+
 describe("when optimizing a dataset", () => {
  let tmpDir: tmp.DirResult;
  let table: Table;
--- a/nodejs/lancedb/arrow.ts
+++ b/nodejs/lancedb/arrow.ts
@@ -639,8 +639,9 @@ function transposeData(
 ): Vector {
  if (field.type instanceof Struct) {
    const childFields = field.type.children;
+    const fullPath = [...path, field.name];
    const childVectors = childFields.map((child) => {
-      return transposeData(data, child, [...path, child.name]);
+      return transposeData(data, child, fullPath);
    });
    const structData = makeData({
      type: field.type,
@@ -652,7 +653,14 @@ function transposeData(
    const values = data.map((datum) => {
      let current: unknown = datum;
      for (const key of valuesPath) {
-        if (isObject(current) && Object.hasOwn(current, key)) {
+        if (current == null) {
+          return null;
+        }
+
+        if (
+          isObject(current) &&
+          (Object.hasOwn(current, key) || key in current)
+        ) {
          current = current[key];
        } else {
          return null;
--- a/nodejs/lancedb/index.ts
+++ b/nodejs/lancedb/index.ts
@@ -23,6 +23,18 @@ export {
  OptimizeStats,
  CompactionStats,
  RemovalStats,
+  TableStatistics,
+  FragmentStatistics,
+  FragmentSummaryStats,
+  Tags,
+  TagContents,
+  MergeResult,
+  AddResult,
+  AddColumnsResult,
+  AlterColumnsResult,
+  DeleteResult,
+  DropColumnsResult,
+  UpdateResult,
 } from "./native.js";

 export {
@@ -74,7 +86,7 @@ export {
  ColumnAlteration,
 } from "./table";

-export { MergeInsertBuilder } from "./merge";
+export { MergeInsertBuilder, WriteExecutionOptions } from "./merge";

 export * as embedding from "./embedding";
 export * as rerankers from "./rerankers";
--- a/nodejs/lancedb/indices.ts
+++ b/nodejs/lancedb/indices.ts
@@ -681,4 +681,6 @@ export interface IndexOptions {
   * The default is true
   */
  replace?: boolean;
+
+  waitTimeoutSeconds?: number;
 }
--- a/nodejs/lancedb/merge.ts
+++ b/nodejs/lancedb/merge.ts
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 import { Data, Schema, fromDataToBuffer } from "./arrow";
-import { NativeMergeInsertBuilder } from "./native";
+import { MergeResult, NativeMergeInsertBuilder } from "./native";

 /** A builder used to create and run a merge insert operation */
 export class MergeInsertBuilder {
@@ -73,9 +73,12 @@ export class MergeInsertBuilder {
  /**
   * Executes the merge insert operation
   *
-   * Nothing is returned but the `Table` is updated
+   * @returns {Promise<MergeResult>} the merge result
   */
-  async execute(data: Data): Promise<void> {
+  async execute(
+    data: Data,
+    execOptions?: Partial<WriteExecutionOptions>,
+  ): Promise<MergeResult> {
    let schema: Schema;
    if (this.#schema instanceof Promise) {
      schema = await this.#schema;
@@ -83,7 +86,28 @@ export class MergeInsertBuilder {
    } else {
      schema = this.#schema;
    }
+
+    if (execOptions?.timeoutMs !== undefined) {
+      this.#native.setTimeout(execOptions.timeoutMs);
+    }
+
    const buffer = await fromDataToBuffer(data, undefined, schema);
-    await this.#native.execute(buffer);
+    return await this.#native.execute(buffer);
  }
 }
+
+export interface WriteExecutionOptions {
+  /**
+   * Maximum time to run the operation before cancelling it.
+   *
+   * By default, there is a 30-second timeout that is only enforced after the
+   * first attempt. This is to prevent spending too long retrying to resolve
+   * conflicts. For example, if a write attempt takes 20 seconds and fails,
+   * the second attempt will be cancelled after 10 seconds, hitting the
+   * 30-second timeout. However, a write that takes one hour and succeeds on the
+   * first attempt will not be cancelled.
+   *
+   * When this is set, the timeout is enforced on all attempts, including the first.
+   */
+  timeoutMs?: number;
+}
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -16,10 +16,18 @@ import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
 import { IndexOptions } from "./indices";
 import { MergeInsertBuilder } from "./merge";
 import {
+  AddColumnsResult,
  AddColumnsSql,
+  AddResult,
+  AlterColumnsResult,
+  DeleteResult,
+  DropColumnsResult,
  IndexConfig,
  IndexStatistics,
  OptimizeStats,
+  TableStatistics,
+  Tags,
+  UpdateResult,
  Table as _NativeTable,
 } from "./native";
 import {
@@ -124,12 +132,19 @@ export abstract class Table {
  /**
   * Insert records into this Table.
   * @param {Data} data Records to be inserted into the Table
+   * @returns {Promise<AddResult>} A promise that resolves to an object
+   * containing the new version number of the table
   */
-  abstract add(data: Data, options?: Partial<AddDataOptions>): Promise<void>;
+  abstract add(
+    data: Data,
+    options?: Partial<AddDataOptions>,
+  ): Promise<AddResult>;
  /**
   * Update existing records in the Table
   * @param opts.values The values to update. The keys are the column names and the values
   * are the values to set.
+   * @returns {Promise<UpdateResult>} A promise that resolves to an object containing
+   * the number of rows updated and the new version number
   * @example
   * ```ts
   * table.update({where:"x = 2", values:{"vector": [10, 10]}})
@@ -139,11 +154,13 @@ export abstract class Table {
    opts: {
      values: Map<string, IntoSql> | Record<string, IntoSql>;
    } & Partial<UpdateOptions>,
-  ): Promise<void>;
+  ): Promise<UpdateResult>;
  /**
   * Update existing records in the Table
   * @param opts.valuesSql The values to update. The keys are the column names and the values
   * are the values to set. The values are SQL expressions.
+   * @returns {Promise<UpdateResult>} A promise that resolves to an object containing
+   * the number of rows updated and the new version number
   * @example
   * ```ts
   * table.update({where:"x = 2", valuesSql:{"x": "x + 1"}})
@@ -153,7 +170,7 @@ export abstract class Table {
    opts: {
      valuesSql: Map<string, string> | Record<string, string>;
    } & Partial<UpdateOptions>,
-  ): Promise<void>;
+  ): Promise<UpdateResult>;
  /**
   * Update existing records in the Table
   *
@@ -171,6 +188,8 @@ export abstract class Table {
   * repeatedly calilng this method.
   * @param {Map<string, string> | Record<string, string>} updates - the
   * columns to update
+   * @returns {Promise<UpdateResult>} A promise that resolves to an object
+   * containing the number of rows updated and the new version number
   *
   * Keys in the map should specify the name of the column to update.
   * Values in the map provide the new value of the column.  These can
@@ -182,12 +201,16 @@ export abstract class Table {
  abstract update(
    updates: Map<string, string> | Record<string, string>,
    options?: Partial<UpdateOptions>,
-  ): Promise<void>;
+  ): Promise<UpdateResult>;

  /** Count the total number of rows in the dataset. */
  abstract countRows(filter?: string): Promise<number>;
-  /** Delete the rows that satisfy the predicate. */
-  abstract delete(predicate: string): Promise<void>;
+  /**
+   * Delete the rows that satisfy the predicate.
+   * @returns {Promise<DeleteResult>} A promise that resolves to an object
+   * containing the new version number of the table
+   */
+  abstract delete(predicate: string): Promise<DeleteResult>;
  /**
   * Create an index to speed up queries.
   *
@@ -246,6 +269,19 @@ export abstract class Table {
   */
  abstract prewarmIndex(name: string): Promise<void>;

+  /**
+   * Waits for asynchronous indexing to complete on the table.
+   *
+   * @param indexNames The name of the indices to wait for
+   * @param timeoutSeconds The number of seconds to wait before timing out
+   *
+   * This will raise an error if the indices are not created and fully indexed within the timeout.
+   */
+  abstract waitForIndex(
+    indexNames: string[],
+    timeoutSeconds: number,
+  ): Promise<void>;
+
  /**
   * Create a {@link Query} Builder.
   *
@@ -328,15 +364,23 @@ export abstract class Table {
   * the SQL expression to use to calculate the value of the new column. These
   * expressions will be evaluated for each row in the table, and can
   * reference existing columns in the table.
+   * @returns {Promise<AddColumnsResult>} A promise that resolves to an object
+   * containing the new version number of the table after adding the columns.
   */
-  abstract addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void>;
+  abstract addColumns(
+    newColumnTransforms: AddColumnsSql[],
+  ): Promise<AddColumnsResult>;

  /**
   * Alter the name or nullability of columns.
   * @param {ColumnAlteration[]} columnAlterations One or more alterations to
   * apply to columns.
+   * @returns {Promise<AlterColumnsResult>} A promise that resolves to an object
+   * containing the new version number of the table after altering the columns.
   */
-  abstract alterColumns(columnAlterations: ColumnAlteration[]): Promise<void>;
+  abstract alterColumns(
+    columnAlterations: ColumnAlteration[],
+  ): Promise<AlterColumnsResult>;
  /**
   * Drop one or more columns from the dataset
   *
@@ -347,8 +391,10 @@ export abstract class Table {
   * @param {string[]} columnNames The names of the columns to drop. These can
   * be nested column references (e.g. "a.b.c") or top-level column names
   * (e.g. "a").
+   * @returns {Promise<DropColumnsResult>} A promise that resolves to an object
+   * containing the new version number of the table after dropping the columns.
   */
-  abstract dropColumns(columnNames: string[]): Promise<void>;
+  abstract dropColumns(columnNames: string[]): Promise<DropColumnsResult>;
  /** Retrieve the version of the table */

  abstract version(): Promise<number>;
@@ -361,7 +407,7 @@ export abstract class Table {
   *
   * Calling this method will set the table into time-travel mode. If you
   * wish to return to standard mode, call `checkoutLatest`.
-   * @param {number} version The version to checkout
+   * @param {number | string} version The version to checkout, could be version number or tag
   * @example
   * ```typescript
   * import * as lancedb from "@lancedb/lancedb"
@@ -377,7 +423,8 @@ export abstract class Table {
   * console.log(await table.version()); // 2
   * ```
   */
-  abstract checkout(version: number): Promise<void>;
+  abstract checkout(version: number | string): Promise<void>;
+
  /**
   * Checkout the latest version of the table. _This is an in-place operation._
   *
@@ -391,6 +438,23 @@ export abstract class Table {
   */
  abstract listVersions(): Promise<Version[]>;

+  /**
+   * Get a tags manager for this table.
+   *
+   * Tags allow you to label specific versions of a table with a human-readable name.
+   * The returned tags manager can be used to list, create, update, or delete tags.
+   *
+   * @returns {Tags} A tags manager for this table
+   * @example
+   * ```typescript
+   * const tagsManager = await table.tags();
+   * await tagsManager.create("v1", 1);
+   * const tags = await tagsManager.list();
+   * console.log(tags); // { "v1": { version: 1, manifestSize: ... } }
+   * ```
+   */
+  abstract tags(): Promise<Tags>;
+
  /**
   * Restore the table to the currently checked out version
   *
@@ -450,6 +514,13 @@ export abstract class Table {
   * Use {@link Table.listIndices} to find the names of the indices.
   */
  abstract indexStats(name: string): Promise<IndexStatistics | undefined>;
+
+  /** Returns table and fragment statistics
+   *
+   * @returns {TableStatistics} The table and fragment statistics
+   *
+   */
+  abstract stats(): Promise<TableStatistics>;
 }

 export class LocalTable extends Table {
@@ -489,12 +560,12 @@ export class LocalTable extends Table {
    return tbl.schema;
  }

-  async add(data: Data, options?: Partial<AddDataOptions>): Promise<void> {
+  async add(data: Data, options?: Partial<AddDataOptions>): Promise<AddResult> {
    const mode = options?.mode ?? "append";
    const schema = await this.schema();

    const buffer = await fromDataToBuffer(data, undefined, schema);
-    await this.inner.add(buffer, mode);
+    return await this.inner.add(buffer, mode);
  }

  async update(
@@ -507,7 +578,7 @@ export class LocalTable extends Table {
          valuesSql: Map<string, string> | Record<string, string>;
        } & Partial<UpdateOptions>),
    options?: Partial<UpdateOptions>,
-  ) {
+  ): Promise<UpdateResult> {
    const isValues =
      "values" in optsOrUpdates && typeof optsOrUpdates.values !== "string";
    const isValuesSql =
@@ -554,22 +625,27 @@ export class LocalTable extends Table {
        columns = Object.entries(optsOrUpdates as Record<string, string>);
        predicate = options?.where;
    }
-    await this.inner.update(predicate, columns);
+    return await this.inner.update(predicate, columns);
  }

  async countRows(filter?: string): Promise<number> {
    return await this.inner.countRows(filter);
  }

-  async delete(predicate: string): Promise<void> {
-    await this.inner.delete(predicate);
+  async delete(predicate: string): Promise<DeleteResult> {
+    return await this.inner.delete(predicate);
  }

  async createIndex(column: string, options?: Partial<IndexOptions>) {
    // Bit of a hack to get around the fact that TS has no package-scope.
    // biome-ignore lint/suspicious/noExplicitAny: skip
    const nativeIndex = (options?.config as any)?.inner;
-    await this.inner.createIndex(nativeIndex, column, options?.replace);
+    await this.inner.createIndex(
+      nativeIndex,
+      column,
+      options?.replace,
+      options?.waitTimeoutSeconds,
+    );
  }

  async dropIndex(name: string): Promise<void> {
@@ -580,6 +656,13 @@ export class LocalTable extends Table {
    await this.inner.prewarmIndex(name);
  }

+  async waitForIndex(
+    indexNames: string[],
+    timeoutSeconds: number,
+  ): Promise<void> {
+    await this.inner.waitForIndex(indexNames, timeoutSeconds);
+  }
+
  query(): Query {
    return new Query(this.inner);
  }
@@ -638,11 +721,15 @@ export class LocalTable extends Table {

  // TODO: Support BatchUDF

-  async addColumns(newColumnTransforms: AddColumnsSql[]): Promise<void> {
-    await this.inner.addColumns(newColumnTransforms);
+  async addColumns(
+    newColumnTransforms: AddColumnsSql[],
+  ): Promise<AddColumnsResult> {
+    return await this.inner.addColumns(newColumnTransforms);
  }

-  async alterColumns(columnAlterations: ColumnAlteration[]): Promise<void> {
+  async alterColumns(
+    columnAlterations: ColumnAlteration[],
+  ): Promise<AlterColumnsResult> {
    const processedAlterations = columnAlterations.map((alteration) => {
      if (typeof alteration.dataType === "string") {
        return {
@@ -663,19 +750,22 @@ export class LocalTable extends Table {
      }
    });

-    await this.inner.alterColumns(processedAlterations);
+    return await this.inner.alterColumns(processedAlterations);
  }

-  async dropColumns(columnNames: string[]): Promise<void> {
-    await this.inner.dropColumns(columnNames);
+  async dropColumns(columnNames: string[]): Promise<DropColumnsResult> {
+    return await this.inner.dropColumns(columnNames);
  }

  async version(): Promise<number> {
    return await this.inner.version();
  }

-  async checkout(version: number): Promise<void> {
-    await this.inner.checkout(version);
+  async checkout(version: number | string): Promise<void> {
+    if (typeof version === "string") {
+      return this.inner.checkoutTag(version);
+    }
+    return this.inner.checkout(version);
  }

  async checkoutLatest(): Promise<void> {
@@ -694,6 +784,10 @@ export class LocalTable extends Table {
    await this.inner.restore();
  }

+  async tags(): Promise<Tags> {
+    return await this.inner.tags();
+  }
+
  async optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats> {
    let cleanupOlderThanMs;
    if (
@@ -724,6 +818,11 @@ export class LocalTable extends Table {
    }
    return stats;
  }
+
+  async stats(): Promise<TableStatistics> {
+    return await this.inner.stats();
+  }
+
  mergeInsert(on: string | string[]): MergeInsertBuilder {
    on = Array.isArray(on) ? on : [on];
    return new MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());
--- a/nodejs/npm/darwin-arm64/package.json
+++ b/nodejs/npm/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-arm64",
-	"version": "0.19.0-beta.7",
+	"version": "0.19.1-beta.5",
 	"os": ["darwin"],
 	"cpu": ["arm64"],
 	"main": "lancedb.darwin-arm64.node",
--- a/nodejs/npm/darwin-x64/package.json
+++ b/nodejs/npm/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-darwin-x64",
-	"version": "0.19.0-beta.7",
+	"version": "0.19.1-beta.5",
 	"os": ["darwin"],
 	"cpu": ["x64"],
 	"main": "lancedb.darwin-x64.node",
--- a/nodejs/npm/linux-arm64-gnu/package.json
+++ b/nodejs/npm/linux-arm64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-gnu",
-	"version": "0.19.0-beta.7",
+	"version": "0.19.1-beta.5",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-gnu.node",
--- a/nodejs/npm/linux-arm64-musl/package.json
+++ b/nodejs/npm/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-arm64-musl",
-	"version": "0.19.0-beta.7",
+	"version": "0.19.1-beta.5",
 	"os": ["linux"],
 	"cpu": ["arm64"],
 	"main": "lancedb.linux-arm64-musl.node",
--- a/nodejs/npm/linux-x64-gnu/package.json
+++ b/nodejs/npm/linux-x64-gnu/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-gnu",
-	"version": "0.19.0-beta.7",
+	"version": "0.19.1-beta.5",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-gnu.node",
--- a/nodejs/npm/linux-x64-musl/package.json
+++ b/nodejs/npm/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-linux-x64-musl",
-	"version": "0.19.0-beta.7",
+	"version": "0.19.1-beta.5",
 	"os": ["linux"],
 	"cpu": ["x64"],
 	"main": "lancedb.linux-x64-musl.node",
--- a/nodejs/npm/win32-arm64-msvc/package.json
+++ b/nodejs/npm/win32-arm64-msvc/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@lancedb/lancedb-win32-arm64-msvc",
-  "version": "0.19.0-beta.7",
+  "version": "0.19.1-beta.5",
  "os": [
    "win32"
  ],
--- a/nodejs/npm/win32-x64-msvc/package.json
+++ b/nodejs/npm/win32-x64-msvc/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@lancedb/lancedb-win32-x64-msvc",
-	"version": "0.19.0-beta.7",
+	"version": "0.19.1-beta.5",
 	"os": ["win32"],
 	"cpu": ["x64"],
 	"main": "lancedb.win32-x64-msvc.node",
--- a/nodejs/package-lock.json
+++ b/nodejs/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "@lancedb/lancedb",
-  "version": "0.19.0-beta.7",
+  "version": "0.19.1-beta.5",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "@lancedb/lancedb",
-      "version": "0.19.0-beta.7",
+      "version": "0.19.1-beta.5",
      "cpu": [
        "x64",
        "arm64"
--- a/nodejs/package.json
+++ b/nodejs/package.json
@@ -11,7 +11,7 @@
    "ann"
  ],
  "private": false,
-  "version": "0.19.0-beta.7",
+  "version": "0.19.1-beta.5",
  "main": "dist/index.js",
  "exports": {
    ".": "./dist/index.js",
--- a/nodejs/src/merge.rs
+++ b/nodejs/src/merge.rs
@@ -1,11 +1,13 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

+use std::time::Duration;
+
 use lancedb::{arrow::IntoArrow, ipc::ipc_file_to_batches, table::merge::MergeInsertBuilder};
 use napi::bindgen_prelude::*;
 use napi_derive::napi;

-use crate::error::convert_error;
+use crate::{error::convert_error, table::MergeResult};

 #[napi]
 #[derive(Clone)]
@@ -36,8 +38,13 @@ impl NativeMergeInsertBuilder {
        this
    }

+    #[napi]
+    pub fn set_timeout(&mut self, timeout: u32) {
+        self.inner.timeout(Duration::from_millis(timeout as u64));
+    }
+
    #[napi(catch_unwind)]
-    pub async fn execute(&self, buf: Buffer) -> napi::Result<()> {
+    pub async fn execute(&self, buf: Buffer) -> napi::Result<MergeResult> {
        let data = ipc_file_to_batches(buf.to_vec())
            .and_then(IntoArrow::into_arrow)
            .map_err(|e| {
@@ -46,12 +53,13 @@ impl NativeMergeInsertBuilder {

        let this = self.clone();

-        this.inner.execute(data).await.map_err(|e| {
+        let res = this.inner.execute(data).await.map_err(|e| {
            napi::Error::from_reason(format!(
                "Failed to execute merge insert: {}",
                convert_error(&e)
            ))
-        })
+        })?;
+        Ok(res.into())
    }
 }

--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -75,7 +75,7 @@ impl Table {
    }

    #[napi(catch_unwind)]
-    pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<()> {
+    pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<AddResult> {
        let batches = ipc_file_to_batches(buf.to_vec())
            .map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
        let mut op = self.inner_ref()?.add(batches);
@@ -88,7 +88,8 @@ impl Table {
            return Err(napi::Error::from_reason(format!("Invalid mode: {}", mode)));
        };

-        op.execute().await.default_error()
+        let res = op.execute().await.default_error()?;
+        Ok(res.into())
    }

    #[napi(catch_unwind)]
@@ -101,8 +102,9 @@ impl Table {
    }

    #[napi(catch_unwind)]
-    pub async fn delete(&self, predicate: String) -> napi::Result<()> {
-        self.inner_ref()?.delete(&predicate).await.default_error()
+    pub async fn delete(&self, predicate: String) -> napi::Result<DeleteResult> {
+        let res = self.inner_ref()?.delete(&predicate).await.default_error()?;
+        Ok(res.into())
    }

    #[napi(catch_unwind)]
@@ -111,6 +113,7 @@ impl Table {
        index: Option<&Index>,
        column: String,
        replace: Option<bool>,
+        wait_timeout_s: Option<i64>,
    ) -> napi::Result<()> {
        let lancedb_index = if let Some(index) = index {
            index.consume()?
@@ -121,6 +124,10 @@ impl Table {
        if let Some(replace) = replace {
            builder = builder.replace(replace);
        }
+        if let Some(timeout) = wait_timeout_s {
+            builder =
+                builder.wait_timeout(std::time::Duration::from_secs(timeout.try_into().unwrap()));
+        }
        builder.execute().await.default_error()
    }

@@ -140,12 +147,30 @@ impl Table {
            .default_error()
    }

+    #[napi(catch_unwind)]
+    pub async fn wait_for_index(&self, index_names: Vec<String>, timeout_s: i64) -> Result<()> {
+        let timeout = std::time::Duration::from_secs(timeout_s.try_into().unwrap());
+        let index_names: Vec<&str> = index_names.iter().map(|s| s.as_str()).collect();
+        let slice: &[&str] = &index_names;
+
+        self.inner_ref()?
+            .wait_for_index(slice, timeout)
+            .await
+            .default_error()
+    }
+
+    #[napi(catch_unwind)]
+    pub async fn stats(&self) -> Result<TableStatistics> {
+        let stats = self.inner_ref()?.stats().await.default_error()?;
+        Ok(stats.into())
+    }
+
    #[napi(catch_unwind)]
    pub async fn update(
        &self,
        only_if: Option<String>,
        columns: Vec<(String, String)>,
-    ) -> napi::Result<u64> {
+    ) -> napi::Result<UpdateResult> {
        let mut op = self.inner_ref()?.update();
        if let Some(only_if) = only_if {
            op = op.only_if(only_if);
@@ -153,7 +178,8 @@ impl Table {
        for (column_name, value) in columns {
            op = op.column(column_name, value);
        }
-        op.execute().await.default_error()
+        let res = op.execute().await.default_error()?;
+        Ok(res.into())
    }

    #[napi(catch_unwind)]
@@ -167,21 +193,28 @@ impl Table {
    }

    #[napi(catch_unwind)]
-    pub async fn add_columns(&self, transforms: Vec<AddColumnsSql>) -> napi::Result<()> {
+    pub async fn add_columns(
+        &self,
+        transforms: Vec<AddColumnsSql>,
+    ) -> napi::Result<AddColumnsResult> {
        let transforms = transforms
            .into_iter()
            .map(|sql| (sql.name, sql.value_sql))
            .collect::<Vec<_>>();
        let transforms = NewColumnTransform::SqlExpressions(transforms);
-        self.inner_ref()?
+        let res = self
+            .inner_ref()?
            .add_columns(transforms, None)
            .await
            .default_error()?;
-        Ok(())
+        Ok(res.into())
    }

    #[napi(catch_unwind)]
-    pub async fn alter_columns(&self, alterations: Vec<ColumnAlteration>) -> napi::Result<()> {
+    pub async fn alter_columns(
+        &self,
+        alterations: Vec<ColumnAlteration>,
+    ) -> napi::Result<AlterColumnsResult> {
        for alteration in &alterations {
            if alteration.rename.is_none()
                && alteration.nullable.is_none()
@@ -198,21 +231,23 @@ impl Table {
            .collect::<std::result::Result<Vec<_>, String>>()
            .map_err(napi::Error::from_reason)?;

-        self.inner_ref()?
+        let res = self
+            .inner_ref()?
            .alter_columns(&alterations)
            .await
            .default_error()?;
-        Ok(())
+        Ok(res.into())
    }

    #[napi(catch_unwind)]
-    pub async fn drop_columns(&self, columns: Vec<String>) -> napi::Result<()> {
+    pub async fn drop_columns(&self, columns: Vec<String>) -> napi::Result<DropColumnsResult> {
        let col_refs = columns.iter().map(String::as_str).collect::<Vec<_>>();
-        self.inner_ref()?
+        let res = self
+            .inner_ref()?
            .drop_columns(&col_refs)
            .await
            .default_error()?;
-        Ok(())
+        Ok(res.into())
    }

    #[napi(catch_unwind)]
@@ -232,6 +267,14 @@ impl Table {
            .default_error()
    }

+    #[napi(catch_unwind)]
+    pub async fn checkout_tag(&self, tag: String) -> napi::Result<()> {
+        self.inner_ref()?
+            .checkout_tag(tag.as_str())
+            .await
+            .default_error()
+    }
+
    #[napi(catch_unwind)]
    pub async fn checkout_latest(&self) -> napi::Result<()> {
        self.inner_ref()?.checkout_latest().await.default_error()
@@ -264,6 +307,13 @@ impl Table {
        self.inner_ref()?.restore().await.default_error()
    }

+    #[napi(catch_unwind)]
+    pub async fn tags(&self) -> napi::Result<Tags> {
+        Ok(Tags {
+            inner: self.inner_ref()?.clone(),
+        })
+    }
+
    #[napi(catch_unwind)]
    pub async fn optimize(
        &self,
@@ -523,9 +573,257 @@ impl From<lancedb::index::IndexStatistics> for IndexStatistics {
    }
 }

+#[napi(object)]
+pub struct TableStatistics {
+    /// The total number of bytes in the table
+    pub total_bytes: i64,
+
+    /// The number of rows in the table
+    pub num_rows: i64,
+
+    /// The number of indices in the table
+    pub num_indices: i64,
+
+    /// Statistics on table fragments
+    pub fragment_stats: FragmentStatistics,
+}
+
+#[napi(object)]
+pub struct FragmentStatistics {
+    /// The number of fragments in the table
+    pub num_fragments: i64,
+
+    /// The number of uncompacted fragments in the table
+    pub num_small_fragments: i64,
+
+    /// Statistics on the number of rows in the table fragments
+    pub lengths: FragmentSummaryStats,
+}
+
+#[napi(object)]
+pub struct FragmentSummaryStats {
+    /// The number of rows in the fragment with the fewest rows
+    pub min: i64,
+
+    /// The number of rows in the fragment with the most rows
+    pub max: i64,
+
+    /// The mean number of rows in the fragments
+    pub mean: i64,
+
+    /// The 25th percentile of number of rows in the fragments
+    pub p25: i64,
+
+    /// The 50th percentile of number of rows in the fragments
+    pub p50: i64,
+
+    /// The 75th percentile of number of rows in the fragments
+    pub p75: i64,
+
+    /// The 99th percentile of number of rows in the fragments
+    pub p99: i64,
+}
+
+impl From<lancedb::table::TableStatistics> for TableStatistics {
+    fn from(v: lancedb::table::TableStatistics) -> Self {
+        Self {
+            total_bytes: v.total_bytes as i64,
+            num_rows: v.num_rows as i64,
+            num_indices: v.num_indices as i64,
+            fragment_stats: FragmentStatistics {
+                num_fragments: v.fragment_stats.num_fragments as i64,
+                num_small_fragments: v.fragment_stats.num_small_fragments as i64,
+                lengths: FragmentSummaryStats {
+                    min: v.fragment_stats.lengths.min as i64,
+                    max: v.fragment_stats.lengths.max as i64,
+                    mean: v.fragment_stats.lengths.mean as i64,
+                    p25: v.fragment_stats.lengths.p25 as i64,
+                    p50: v.fragment_stats.lengths.p50 as i64,
+                    p75: v.fragment_stats.lengths.p75 as i64,
+                    p99: v.fragment_stats.lengths.p99 as i64,
+                },
+            },
+        }
+    }
+}
+
 #[napi(object)]
 pub struct Version {
    pub version: i64,
    pub timestamp: i64,
    pub metadata: HashMap<String, String>,
 }
+
+#[napi(object)]
+pub struct UpdateResult {
+    pub rows_updated: i64,
+    pub version: i64,
+}
+
+impl From<lancedb::table::UpdateResult> for UpdateResult {
+    fn from(value: lancedb::table::UpdateResult) -> Self {
+        Self {
+            rows_updated: value.rows_updated as i64,
+            version: value.version as i64,
+        }
+    }
+}
+
+#[napi(object)]
+pub struct AddResult {
+    pub version: i64,
+}
+
+impl From<lancedb::table::AddResult> for AddResult {
+    fn from(value: lancedb::table::AddResult) -> Self {
+        Self {
+            version: value.version as i64,
+        }
+    }
+}
+
+#[napi(object)]
+pub struct DeleteResult {
+    pub version: i64,
+}
+
+impl From<lancedb::table::DeleteResult> for DeleteResult {
+    fn from(value: lancedb::table::DeleteResult) -> Self {
+        Self {
+            version: value.version as i64,
+        }
+    }
+}
+
+#[napi(object)]
+pub struct MergeResult {
+    pub version: i64,
+    pub num_inserted_rows: i64,
+    pub num_updated_rows: i64,
+    pub num_deleted_rows: i64,
+}
+
+impl From<lancedb::table::MergeResult> for MergeResult {
+    fn from(value: lancedb::table::MergeResult) -> Self {
+        Self {
+            version: value.version as i64,
+            num_inserted_rows: value.num_inserted_rows as i64,
+            num_updated_rows: value.num_updated_rows as i64,
+            num_deleted_rows: value.num_deleted_rows as i64,
+        }
+    }
+}
+
+#[napi(object)]
+pub struct AddColumnsResult {
+    pub version: i64,
+}
+
+impl From<lancedb::table::AddColumnsResult> for AddColumnsResult {
+    fn from(value: lancedb::table::AddColumnsResult) -> Self {
+        Self {
+            version: value.version as i64,
+        }
+    }
+}
+
+#[napi(object)]
+pub struct AlterColumnsResult {
+    pub version: i64,
+}
+
+impl From<lancedb::table::AlterColumnsResult> for AlterColumnsResult {
+    fn from(value: lancedb::table::AlterColumnsResult) -> Self {
+        Self {
+            version: value.version as i64,
+        }
+    }
+}
+
+#[napi(object)]
+pub struct DropColumnsResult {
+    pub version: i64,
+}
+
+impl From<lancedb::table::DropColumnsResult> for DropColumnsResult {
+    fn from(value: lancedb::table::DropColumnsResult) -> Self {
+        Self {
+            version: value.version as i64,
+        }
+    }
+}
+
+#[napi]
+pub struct TagContents {
+    pub version: i64,
+    pub manifest_size: i64,
+}
+
+#[napi]
+pub struct Tags {
+    inner: LanceDbTable,
+}
+
+#[napi]
+impl Tags {
+    #[napi]
+    pub async fn list(&self) -> napi::Result<HashMap<String, TagContents>> {
+        let rust_tags = self.inner.tags().await.default_error()?;
+        let tag_list = rust_tags.as_ref().list().await.default_error()?;
+        let tag_contents = tag_list
+            .into_iter()
+            .map(|(k, v)| {
+                (
+                    k,
+                    TagContents {
+                        version: v.version as i64,
+                        manifest_size: v.manifest_size as i64,
+                    },
+                )
+            })
+            .collect();
+
+        Ok(tag_contents)
+    }
+
+    #[napi]
+    pub async fn get_version(&self, tag: String) -> napi::Result<i64> {
+        let rust_tags = self.inner.tags().await.default_error()?;
+        rust_tags
+            .as_ref()
+            .get_version(tag.as_str())
+            .await
+            .map(|v| v as i64)
+            .default_error()
+    }
+
+    #[napi]
+    pub async unsafe fn create(&mut self, tag: String, version: i64) -> napi::Result<()> {
+        let mut rust_tags = self.inner.tags().await.default_error()?;
+        rust_tags
+            .as_mut()
+            .create(tag.as_str(), version as u64)
+            .await
+            .default_error()
+    }
+
+    #[napi]
+    pub async unsafe fn delete(&mut self, tag: String) -> napi::Result<()> {
+        let mut rust_tags = self.inner.tags().await.default_error()?;
+        rust_tags
+            .as_mut()
+            .delete(tag.as_str())
+            .await
+            .default_error()
+    }
+
+    #[napi]
+    pub async unsafe fn update(&mut self, tag: String, version: i64) -> napi::Result<()> {
+        let mut rust_tags = self.inner.tags().await.default_error()?;
+        rust_tags
+            .as_mut()
+            .update(tag.as_str(), version as u64)
+            .await
+            .default_error()
+    }
+}
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.22.0-beta.8"
+current_version = "0.22.1"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.22.0-beta.8"
+version = "0.22.1"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -7,7 +7,7 @@ dependencies = [
    "numpy",
    "overrides>=0.7",
    "packaging",
-    "pyarrow>=14",
+    "pyarrow>=16",
    "pydantic>=1.10",
    "tqdm>=4.27.0",
 ]
@@ -77,6 +77,7 @@ embeddings = [
    "pillow",
    "open-clip-torch",
    "cohere",
+    "colpali-engine>=0.3.10",
    "huggingface_hub",
    "InstructorEmbedding",
    "google.generativeai",
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -1,5 +1,5 @@
 from datetime import timedelta
-from typing import Dict, List, Optional, Tuple, Any, Union, Literal
+from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal

 import pyarrow as pa

@@ -36,8 +36,10 @@ class Table:
    async def schema(self) -> pa.Schema: ...
    async def add(
        self, data: pa.RecordBatchReader, mode: Literal["append", "overwrite"]
-    ) -> None: ...
-    async def update(self, updates: Dict[str, str], where: Optional[str]) -> None: ...
+    ) -> AddResult: ...
+    async def update(
+        self, updates: Dict[str, str], where: Optional[str]
+    ) -> UpdateResult: ...
    async def count_rows(self, filter: Optional[str]) -> int: ...
    async def create_index(
        self,
@@ -47,23 +49,34 @@ class Table:
    ): ...
    async def list_versions(self) -> List[Dict[str, Any]]: ...
    async def version(self) -> int: ...
-    async def checkout(self, version: int): ...
+    async def checkout(self, version: Union[int, str]): ...
    async def checkout_latest(self): ...
-    async def restore(self, version: Optional[int] = None): ...
+    async def restore(self, version: Optional[Union[int, str]] = None): ...
    async def list_indices(self) -> list[IndexConfig]: ...
-    async def delete(self, filter: str): ...
-    async def add_columns(self, columns: list[tuple[str, str]]) -> None: ...
-    async def add_columns_with_schema(self, schema: pa.Schema) -> None: ...
-    async def alter_columns(self, columns: list[dict[str, Any]]) -> None: ...
+    async def delete(self, filter: str) -> DeleteResult: ...
+    async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
+    async def add_columns_with_schema(self, schema: pa.Schema) -> AddColumnsResult: ...
+    async def alter_columns(
+        self, columns: list[dict[str, Any]]
+    ) -> AlterColumnsResult: ...
    async def optimize(
        self,
        *,
        cleanup_since_ms: Optional[int] = None,
        delete_unverified: Optional[bool] = None,
    ) -> OptimizeStats: ...
+    @property
+    def tags(self) -> Tags: ...
    def query(self) -> Query: ...
    def vector_search(self) -> VectorQuery: ...

+class Tags:
+    async def list(self) -> Dict[str, Tag]: ...
+    async def get_version(self, tag: str) -> int: ...
+    async def create(self, tag: str, version: int): ...
+    async def delete(self, tag: str): ...
+    async def update(self, tag: str, version: int): ...
+
 class IndexConfig:
    index_type: str
    columns: List[str]
@@ -195,3 +208,32 @@ class RemovalStats:
 class OptimizeStats:
    compaction: CompactionStats
    prune: RemovalStats
+
+class Tag(TypedDict):
+    version: int
+    manifest_size: int
+
+class AddResult:
+    version: int
+
+class DeleteResult:
+    version: int
+
+class UpdateResult:
+    rows_updated: int
+    version: int
+
+class MergeResult:
+    version: int
+    num_updated_rows: int
+    num_inserted_rows: int
+    num_deleted_rows: int
+
+class AddColumnsResult:
+    version: int
+
+class AlterColumnsResult:
+    version: int
+
+class DropColumnsResult:
+    version: int
--- a/python/python/lancedb/common.py
+++ b/python/python/lancedb/common.py
@@ -9,7 +9,7 @@ import numpy as np
 import pyarrow as pa
 import pyarrow.dataset

-from .dependencies import pandas as pd
+from .dependencies import _check_for_pandas, pandas as pd

 DATA = Union[List[dict], "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
 VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
@@ -63,7 +63,7 @@ def data_to_reader(
    data: DATA, schema: Optional[pa.Schema] = None
 ) -> pa.RecordBatchReader:
    """Convert various types of input into a RecordBatchReader"""
-    if pd is not None and isinstance(data, pd.DataFrame):
+    if _check_for_pandas(data) and isinstance(data, pd.DataFrame):
        return pa.Table.from_pandas(data, schema=schema).to_reader()
    elif isinstance(data, pa.Table):
        return data.to_reader()
--- a/python/python/lancedb/embeddings/init.py
+++ b/python/python/lancedb/embeddings/init.py
@@ -19,3 +19,4 @@ from .imagebind import ImageBindEmbeddings
 from .jinaai import JinaEmbeddings
 from .watsonx import WatsonxEmbeddings
 from .voyageai import VoyageAIEmbeddingFunction
+from .colpali import ColPaliEmbeddings
--- a/python/python/lancedb/embeddings/colpali.py
+++ b/python/python/lancedb/embeddings/colpali.py
@@ -0,0 +1,255 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+
+from functools import lru_cache
+from typing import List, Union, Optional, Any
+import numpy as np
+import io
+
+from ..util import attempt_import_or_raise
+from .base import EmbeddingFunction
+from .registry import register
+from .utils import TEXT, IMAGES, is_flash_attn_2_available
+
+
+@register("colpali")
+class ColPaliEmbeddings(EmbeddingFunction):
+    """
+    An embedding function that uses the ColPali engine for
+    multimodal multi-vector embeddings.
+
+    This embedding function supports ColQwen2.5 models, producing multivector outputs
+    for both text and image inputs. The output embeddings are lists of vectors, each
+    vector being 128-dimensional by default, represented as List[List[float]].
+
+    Parameters
+    ----------
+    model_name : str
+        The name of the model to use (e.g., "Metric-AI/ColQwen2.5-3b-multilingual-v1.0")
+    device : str
+        The device for inference (default "cuda:0").
+    dtype : str
+        Data type for model weights (default "bfloat16").
+    use_token_pooling : bool
+        Whether to use token pooling to reduce embedding size (default True).
+    pool_factor : int
+        Factor to reduce sequence length if token pooling is enabled (default 2).
+    quantization_config : Optional[BitsAndBytesConfig]
+        Quantization configuration for the model. (default None, bitsandbytes needed)
+    batch_size : int
+        Batch size for processing inputs (default 2).
+    """
+
+    model_name: str = "Metric-AI/ColQwen2.5-3b-multilingual-v1.0"
+    device: str = "auto"
+    dtype: str = "bfloat16"
+    use_token_pooling: bool = True
+    pool_factor: int = 2
+    quantization_config: Optional[Any] = None
+    batch_size: int = 2
+
+    _model = None
+    _processor = None
+    _token_pooler = None
+    _vector_dim = None
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        (
+            self._model,
+            self._processor,
+            self._token_pooler,
+        ) = self._load_model(
+            self.model_name,
+            self.dtype,
+            self.device,
+            self.use_token_pooling,
+            self.quantization_config,
+        )
+
+    @staticmethod
+    @lru_cache(maxsize=1)
+    def _load_model(
+        model_name: str,
+        dtype: str,
+        device: str,
+        use_token_pooling: bool,
+        quantization_config: Optional[Any],
+    ):
+        """
+        Initialize and cache the ColPali model, processor, and token pooler.
+        """
+        torch = attempt_import_or_raise("torch", "torch")
+        transformers = attempt_import_or_raise("transformers", "transformers")
+        colpali_engine = attempt_import_or_raise("colpali_engine", "colpali_engine")
+        from colpali_engine.compression.token_pooling import HierarchicalTokenPooler
+
+        if quantization_config is not None:
+            if not isinstance(quantization_config, transformers.BitsAndBytesConfig):
+                raise ValueError("quantization_config must be a BitsAndBytesConfig")
+
+        if dtype == "bfloat16":
+            torch_dtype = torch.bfloat16
+        elif dtype == "float16":
+            torch_dtype = torch.float16
+        elif dtype == "float64":
+            torch_dtype = torch.float64
+        else:
+            torch_dtype = torch.float32
+
+        model = colpali_engine.models.ColQwen2_5.from_pretrained(
+            model_name,
+            torch_dtype=torch_dtype,
+            device_map=device,
+            quantization_config=quantization_config
+            if quantization_config is not None
+            else None,
+            attn_implementation="flash_attention_2"
+            if is_flash_attn_2_available()
+            else None,
+        ).eval()
+        processor = colpali_engine.models.ColQwen2_5_Processor.from_pretrained(
+            model_name
+        )
+        token_pooler = HierarchicalTokenPooler() if use_token_pooling else None
+        return model, processor, token_pooler
+
+    def ndims(self):
+        """
+        Return the dimension of a vector in the multivector output (e.g., 128).
+        """
+        torch = attempt_import_or_raise("torch", "torch")
+        if self._vector_dim is None:
+            dummy_query = "test"
+            batch_queries = self._processor.process_queries([dummy_query]).to(
+                self._model.device
+            )
+            with torch.no_grad():
+                query_embeddings = self._model(**batch_queries)
+
+            if self.use_token_pooling and self._token_pooler is not None:
+                query_embeddings = self._token_pooler.pool_embeddings(
+                    query_embeddings,
+                    pool_factor=self.pool_factor,
+                    padding=True,
+                    padding_side=self._processor.tokenizer.padding_side,
+                )
+
+            self._vector_dim = query_embeddings[0].shape[-1]
+        return self._vector_dim
+
+    def _process_embeddings(self, embeddings):
+        """
+        Format model embeddings into List[List[float]].
+        Use token pooling if enabled.
+        """
+        torch = attempt_import_or_raise("torch", "torch")
+        if self.use_token_pooling and self._token_pooler is not None:
+            embeddings = self._token_pooler.pool_embeddings(
+                embeddings,
+                pool_factor=self.pool_factor,
+                padding=True,
+                padding_side=self._processor.tokenizer.padding_side,
+            )
+
+        if isinstance(embeddings, torch.Tensor):
+            tensors = embeddings.detach().cpu()
+            if tensors.dtype == torch.bfloat16:
+                tensors = tensors.to(torch.float32)
+            return (
+                tensors.numpy()
+                .astype(np.float64 if self.dtype == "float64" else np.float32)
+                .tolist()
+            )
+        return []
+
+    def generate_text_embeddings(self, text: TEXT) -> List[List[List[float]]]:
+        """
+        Generate embeddings for text input.
+        """
+        torch = attempt_import_or_raise("torch", "torch")
+        text = self.sanitize_input(text)
+        all_embeddings = []
+
+        for i in range(0, len(text), self.batch_size):
+            batch_text = text[i : i + self.batch_size]
+            batch_queries = self._processor.process_queries(batch_text).to(
+                self._model.device
+            )
+            with torch.no_grad():
+                query_embeddings = self._model(**batch_queries)
+            all_embeddings.extend(self._process_embeddings(query_embeddings))
+        return all_embeddings
+
+    def _prepare_images(self, images: IMAGES) -> List:
+        """
+        Convert image inputs to PIL Images.
+        """
+        PIL = attempt_import_or_raise("PIL", "pillow")
+        requests = attempt_import_or_raise("requests", "requests")
+        images = self.sanitize_input(images)
+        pil_images = []
+        try:
+            for image in images:
+                if isinstance(image, str):
+                    if image.startswith(("http://", "https://")):
+                        response = requests.get(image, timeout=10)
+                        response.raise_for_status()
+                        pil_images.append(PIL.Image.open(io.BytesIO(response.content)))
+                    else:
+                        with PIL.Image.open(image) as im:
+                            pil_images.append(im.copy())
+                elif isinstance(image, bytes):
+                    pil_images.append(PIL.Image.open(io.BytesIO(image)))
+                else:
+                    # Assume it's a PIL Image; will raise if invalid
+                    pil_images.append(image)
+        except Exception as e:
+            raise ValueError(f"Failed to process image: {e}")
+
+        return pil_images
+
+    def generate_image_embeddings(self, images: IMAGES) -> List[List[List[float]]]:
+        """
+        Generate embeddings for a batch of images.
+        """
+        torch = attempt_import_or_raise("torch", "torch")
+        pil_images = self._prepare_images(images)
+        all_embeddings = []
+
+        for i in range(0, len(pil_images), self.batch_size):
+            batch_images = pil_images[i : i + self.batch_size]
+            batch_images = self._processor.process_images(batch_images).to(
+                self._model.device
+            )
+            with torch.no_grad():
+                image_embeddings = self._model(**batch_images)
+            all_embeddings.extend(self._process_embeddings(image_embeddings))
+        return all_embeddings
+
+    def compute_query_embeddings(
+        self, query: Union[str, IMAGES], *args, **kwargs
+    ) -> List[List[List[float]]]:
+        """
+        Compute embeddings for a single user query (text only).
+        """
+        if not isinstance(query, str):
+            raise ValueError(
+                "Query must be a string, image to image search is not supported"
+            )
+        return self.generate_text_embeddings([query])
+
+    def compute_source_embeddings(
+        self, images: IMAGES, *args, **kwargs
+    ) -> List[List[List[float]]]:
+        """
+        Compute embeddings for a batch of source images.
+
+        Parameters
+        ----------
+        images : Union[str, bytes, List, pa.Array, pa.ChunkedArray, np.ndarray]
+            Batch of images (paths, URLs, bytes, or PIL Images).
+        """
+        images = self.sanitize_input(images)
+        return self.generate_image_embeddings(images)
--- a/python/python/lancedb/embeddings/utils.py
+++ b/python/python/lancedb/embeddings/utils.py
@@ -18,6 +18,7 @@ import numpy as np
 import pyarrow as pa

 from ..dependencies import pandas as pd
+from ..util import attempt_import_or_raise


 # ruff: noqa: PERF203
@@ -275,3 +276,12 @@ def url_retrieve(url: str):
 def api_key_not_found_help(provider):
    logging.error("Could not find API key for %s", provider)
    raise ValueError(f"Please set the {provider.upper()}_API_KEY environment variable.")
+
+
+def is_flash_attn_2_available():
+    try:
+        attempt_import_or_raise("flash_attn", "flash_attn")
+
+        return True
+    except ImportError:
+        return False
--- a/python/python/lancedb/merge.py
+++ b/python/python/lancedb/merge.py
@@ -4,10 +4,14 @@

 from __future__ import annotations

+from datetime import timedelta
 from typing import TYPE_CHECKING, List, Optional

 if TYPE_CHECKING:
    from .common import DATA
+    from ._lancedb import (
+        MergeInsertResult,
+    )


 class LanceMergeInsertBuilder(object):
@@ -28,6 +32,7 @@ class LanceMergeInsertBuilder(object):
        self._when_not_matched_insert_all = False
        self._when_not_matched_by_source_delete = False
        self._when_not_matched_by_source_condition = None
+        self._timeout = None

    def when_matched_update_all(
        self, *, where: Optional[str] = None
@@ -78,7 +83,8 @@ class LanceMergeInsertBuilder(object):
        new_data: DATA,
        on_bad_vectors: str = "error",
        fill_value: float = 0.0,
-    ):
+        timeout: Optional[timedelta] = None,
+    ) -> MergeInsertResult:
        """
        Executes the merge insert operation

@@ -95,5 +101,24 @@ class LanceMergeInsertBuilder(object):
            One of "error", "drop", "fill".
        fill_value: float, default 0.
            The value to use when filling vectors. Only used if on_bad_vectors="fill".
+        timeout: Optional[timedelta], default None
+            Maximum time to run the operation before cancelling it.
+
+            By default, there is a 30-second timeout that is only enforced after the
+            first attempt. This is to prevent spending too long retrying to resolve
+            conflicts. For example, if a write attempt takes 20 seconds and fails,
+            the second attempt will be cancelled after 10 seconds, hitting the
+            30-second timeout. However, a write that takes one hour and succeeds on the
+            first attempt will not be cancelled.
+
+            When this is set, the timeout is enforced on all attempts, including
+            the first.
+
+        Returns
+        -------
+        MergeInsertResult
+            version: the new version number of the table after doing merge insert.
        """
+        if timeout is not None:
+            self._timeout = timeout
        return self._table._do_merge(self, new_data, on_bad_vectors, fill_value)
--- a/python/python/lancedb/pydantic.py
+++ b/python/python/lancedb/pydantic.py
@@ -152,6 +152,104 @@ def Vector(
    return FixedSizeList


+def MultiVector(
+    dim: int, value_type: pa.DataType = pa.float32(), nullable: bool = True
+) -> Type:
+    """Pydantic MultiVector Type for multi-vector embeddings.
+
+    This type represents a list of vectors, each with the same dimension.
+    Useful for models that produce multiple embeddings per input, like ColPali.
+
+    Parameters
+    ----------
+    dim : int
+        The dimension of each vector in the multi-vector.
+    value_type : pyarrow.DataType, optional
+        The value type of the vectors, by default pa.float32()
+    nullable : bool, optional
+        Whether the multi-vector is nullable, by default it is True.
+
+    Examples
+    --------
+
+    >>> import pydantic
+    >>> from lancedb.pydantic import MultiVector
+    ...
+    >>> class MyModel(pydantic.BaseModel):
+    ...     id: int
+    ...     text: str
+    ...     embeddings: MultiVector(128)  # List of 128-dimensional vectors
+    >>> schema = pydantic_to_schema(MyModel)
+    >>> assert schema == pa.schema([
+    ...     pa.field("id", pa.int64(), False),
+    ...     pa.field("text", pa.utf8(), False),
+    ...     pa.field("embeddings", pa.list_(pa.list_(pa.float32(), 128)))
+    ... ])
+    """
+
+    class MultiVectorList(list, FixedSizeListMixin):
+        def __repr__(self):
+            return f"MultiVector(dim={dim})"
+
+        @staticmethod
+        def nullable() -> bool:
+            return nullable
+
+        @staticmethod
+        def dim() -> int:
+            return dim
+
+        @staticmethod
+        def value_arrow_type() -> pa.DataType:
+            return value_type
+
+        @staticmethod
+        def is_multi_vector() -> bool:
+            return True
+
+        @classmethod
+        def __get_pydantic_core_schema__(
+            cls, _source_type: Any, _handler: pydantic.GetCoreSchemaHandler
+        ) -> CoreSchema:
+            return core_schema.no_info_after_validator_function(
+                cls,
+                core_schema.list_schema(
+                    items_schema=core_schema.list_schema(
+                        min_length=dim,
+                        max_length=dim,
+                        items_schema=core_schema.float_schema(),
+                    ),
+                ),
+            )
+
+        @classmethod
+        def __get_validators__(cls) -> Generator[Callable, None, None]:
+            yield cls.validate
+
+        # For pydantic v1
+        @classmethod
+        def validate(cls, v):
+            if not isinstance(v, (list, range)):
+                raise TypeError("A list of vectors is needed")
+            for vec in v:
+                if not isinstance(vec, (list, range, np.ndarray)) or len(vec) != dim:
+                    raise TypeError(f"Each vector must be a list of {dim} numbers")
+            return cls(v)
+
+        if PYDANTIC_VERSION.major < 2:
+
+            @classmethod
+            def __modify_schema__(cls, field_schema: Dict[str, Any]):
+                field_schema["items"] = {
+                    "type": "array",
+                    "items": {"type": "number"},
+                    "minItems": dim,
+                    "maxItems": dim,
+                }
+
+    return MultiVectorList
+
+
 def _py_type_to_arrow_type(py_type: Type[Any], field: FieldInfo) -> pa.DataType:
    """Convert a field with native Python type to Arrow data type.

@@ -206,6 +304,9 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
            fields = _pydantic_model_to_fields(tp)
            return pa.struct(fields)
        if issubclass(tp, FixedSizeListMixin):
+            if getattr(tp, "is_multi_vector", lambda: False)():
+                return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
+            # For regular Vector
            return pa.list_(tp.value_arrow_type(), tp.dim())
    return _py_type_to_arrow_type(tp, field)

@@ -314,6 +415,7 @@ class LanceModel(pydantic.BaseModel):
    >>> table.add([
    ...     TestModel(name="test", vector=[1.0, 2.0])
    ... ])
+    AddResult(version=2)
    >>> table.search([0., 0.]).limit(1).to_pydantic(TestModel)
    [TestModel(name='test', vector=FixedSizeList(dim=2))]
    """
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -28,6 +28,8 @@ import pyarrow.compute as pc
 import pyarrow.fs as pa_fs
 import pydantic

+from lancedb.pydantic import PYDANTIC_VERSION
+
 from . import __version__
 from .arrow import AsyncRecordBatchReader
 from .dependencies import pandas as pd
@@ -498,10 +500,14 @@ class Query(pydantic.BaseModel):
            )
        return query

-    class Config:
-        # This tells pydantic to allow custom types (needed for the `vector` query since
-        # pa.Array wouln't be allowed otherwise)
-        arbitrary_types_allowed = True
+    # This tells pydantic to allow custom types (needed for the `vector` query since
+    # pa.Array wouln't be allowed otherwise)
+    if PYDANTIC_VERSION.major < 2:  # Pydantic 1.x compat
+
+        class Config:
+            arbitrary_types_allowed = True
+    else:
+        model_config = {"arbitrary_types_allowed": True}


 class LanceQueryBuilder(ABC):
@@ -1586,6 +1592,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        self._refine_factor = None
        self._distance_type = None
        self._phrase_query = None
+        self._lower_bound = None
+        self._upper_bound = None

    def _validate_query(self, query, vector=None, text=None):
        if query is not None and (vector is not None or text is not None):
@@ -1628,47 +1636,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        raise NotImplementedError("to_query_object not yet supported on a hybrid query")

    def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
-        vector_query, fts_query = self._validate_query(
-            self._query, self._vector, self._text
-        )
-        self._fts_query = LanceFtsQueryBuilder(
-            self._table, fts_query, fts_columns=self._fts_columns
-        )
-        vector_query = self._query_to_vector(
-            self._table, vector_query, self._vector_column
-        )
-        self._vector_query = LanceVectorQueryBuilder(
-            self._table, vector_query, self._vector_column
-        )
-
-        if self._limit:
-            self._vector_query.limit(self._limit)
-            self._fts_query.limit(self._limit)
-        if self._columns:
-            self._vector_query.select(self._columns)
-            self._fts_query.select(self._columns)
-        if self._where:
-            self._vector_query.where(self._where, self._postfilter)
-            self._fts_query.where(self._where, self._postfilter)
-        if self._with_row_id:
-            self._vector_query.with_row_id(True)
-            self._fts_query.with_row_id(True)
-        if self._phrase_query:
-            self._fts_query.phrase_query(True)
-        if self._distance_type:
-            self._vector_query.metric(self._distance_type)
-        if self._nprobes:
-            self._vector_query.nprobes(self._nprobes)
-        if self._refine_factor:
-            self._vector_query.refine_factor(self._refine_factor)
-        if self._ef:
-            self._vector_query.ef(self._ef)
-        if self._bypass_vector_index:
-            self._vector_query.bypass_vector_index()
-
-        if self._reranker is None:
-            self._reranker = RRFReranker()
-
+        self._create_query_builders()
        with ThreadPoolExecutor() as executor:
            fts_future = executor.submit(
                self._fts_query.with_row_id(True).to_arrow, timeout=timeout
@@ -1991,6 +1959,112 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        self._bypass_vector_index = True
        return self

+    def explain_plan(self, verbose: Optional[bool] = False) -> str:
+        """Return the execution plan for this query.
+
+        Examples
+        --------
+        >>> import lancedb
+        >>> db = lancedb.connect("./.lancedb")
+        >>> table = db.create_table("my_table", [{"vector": [99.0, 99]}])
+        >>> query = [100, 100]
+        >>> plan = table.search(query).explain_plan(True)
+        >>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+        ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
+        GlobalLimitExec: skip=0, fetch=10
+          FilterExec: _distance@2 IS NOT NULL
+            SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false]
+              KNNVectorDistance: metric=l2
+                LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
+
+        Parameters
+        ----------
+        verbose : bool, default False
+            Use a verbose output format.
+
+        Returns
+        -------
+        plan : str
+        """  # noqa: E501
+        self._create_query_builders()
+
+        results = ["Vector Search Plan:"]
+        results.append(
+            self._table._explain_plan(
+                self._vector_query.to_query_object(), verbose=verbose
+            )
+        )
+        results.append("FTS Search Plan:")
+        results.append(
+            self._table._explain_plan(
+                self._fts_query.to_query_object(), verbose=verbose
+            )
+        )
+        return "\n".join(results)
+
+    def analyze_plan(self):
+        """Execute the query and display with runtime metrics.
+
+        Returns
+        -------
+        plan : str
+        """
+        self._create_query_builders()
+
+        results = ["Vector Search Plan:"]
+        results.append(self._table._analyze_plan(self._vector_query.to_query_object()))
+        results.append("FTS Search Plan:")
+        results.append(self._table._analyze_plan(self._fts_query.to_query_object()))
+        return "\n".join(results)
+
+    def _create_query_builders(self):
+        """Set up and configure the vector and FTS query builders."""
+        vector_query, fts_query = self._validate_query(
+            self._query, self._vector, self._text
+        )
+        self._fts_query = LanceFtsQueryBuilder(
+            self._table, fts_query, fts_columns=self._fts_columns
+        )
+        vector_query = self._query_to_vector(
+            self._table, vector_query, self._vector_column
+        )
+        self._vector_query = LanceVectorQueryBuilder(
+            self._table, vector_query, self._vector_column
+        )
+
+        # Apply common configurations
+        if self._limit:
+            self._vector_query.limit(self._limit)
+            self._fts_query.limit(self._limit)
+        if self._columns:
+            self._vector_query.select(self._columns)
+            self._fts_query.select(self._columns)
+        if self._where:
+            self._vector_query.where(self._where, self._postfilter)
+            self._fts_query.where(self._where, self._postfilter)
+        if self._with_row_id:
+            self._vector_query.with_row_id(True)
+            self._fts_query.with_row_id(True)
+        if self._phrase_query:
+            self._fts_query.phrase_query(True)
+        if self._distance_type:
+            self._vector_query.metric(self._distance_type)
+        if self._nprobes:
+            self._vector_query.nprobes(self._nprobes)
+        if self._refine_factor:
+            self._vector_query.refine_factor(self._refine_factor)
+        if self._ef:
+            self._vector_query.ef(self._ef)
+        if self._bypass_vector_index:
+            self._vector_query.bypass_vector_index()
+        if self._lower_bound or self._upper_bound:
+            self._vector_query.distance_range(
+                lower_bound=self._lower_bound, upper_bound=self._upper_bound
+            )
+
+        if self._reranker is None:
+            self._reranker = RRFReranker()
+

 class AsyncQueryBase(object):
    def __init__(self, inner: Union[LanceQuery, LanceVectorQuery]):
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -7,7 +7,16 @@ from functools import cached_property
 from typing import Dict, Iterable, List, Optional, Union, Literal
 import warnings

-from lancedb._lancedb import IndexConfig
+from lancedb._lancedb import (
+    AddColumnsResult,
+    AddResult,
+    AlterColumnsResult,
+    DeleteResult,
+    DropColumnsResult,
+    IndexConfig,
+    MergeResult,
+    UpdateResult,
+)
 from lancedb.embeddings.base import EmbeddingFunctionConfig
 from lancedb.index import FTS, BTree, Bitmap, HnswPq, HnswSq, IvfFlat, IvfPq, LabelList
 from lancedb.remote.db import LOOP
@@ -18,7 +27,7 @@ from lancedb.merge import LanceMergeInsertBuilder
 from lancedb.embeddings import EmbeddingFunctionRegistry

 from ..query import LanceVectorQueryBuilder, LanceQueryBuilder
-from ..table import AsyncTable, IndexStatistics, Query, Table
+from ..table import AsyncTable, IndexStatistics, Query, Table, Tags


 class RemoteTable(Table):
@@ -38,9 +47,6 @@ class RemoteTable(Table):
    def __repr__(self) -> str:
        return f"RemoteTable({self.db_name}.{self.name})"

-    def __len__(self) -> int:
-        self.count_rows(None)
-
    @property
    def schema(self) -> pa.Schema:
        """The [Arrow Schema](https://arrow.apache.org/docs/python/api/datatypes.html#)
@@ -54,6 +60,10 @@ class RemoteTable(Table):
        """Get the current version of the table"""
        return LOOP.run(self._table.version())

+    @property
+    def tags(self) -> Tags:
+        return Tags(self._table)
+
    @cached_property
    def embedding_functions(self) -> Dict[str, EmbeddingFunctionConfig]:
        """
@@ -81,13 +91,13 @@ class RemoteTable(Table):
        """to_pandas() is not yet supported on LanceDB cloud."""
        return NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")

-    def checkout(self, version: int):
+    def checkout(self, version: Union[int, str]):
        return LOOP.run(self._table.checkout(version))

    def checkout_latest(self):
        return LOOP.run(self._table.checkout_latest())

-    def restore(self, version: Optional[int] = None):
+    def restore(self, version: Optional[Union[int, str]] = None):
        return LOOP.run(self._table.restore(version))

    def list_indices(self) -> Iterable[IndexConfig]:
@@ -104,6 +114,7 @@ class RemoteTable(Table):
        index_type: Literal["BTREE", "BITMAP", "LABEL_LIST", "scalar"] = "scalar",
        *,
        replace: bool = False,
+        wait_timeout: timedelta = None,
    ):
        """Creates a scalar index
        Parameters
@@ -126,13 +137,18 @@ class RemoteTable(Table):
        else:
            raise ValueError(f"Unknown index type: {index_type}")

-        LOOP.run(self._table.create_index(column, config=config, replace=replace))
+        LOOP.run(
+            self._table.create_index(
+                column, config=config, replace=replace, wait_timeout=wait_timeout
+            )
+        )

    def create_fts_index(
        self,
        column: str,
        *,
        replace: bool = False,
+        wait_timeout: timedelta = None,
        with_position: bool = True,
        # tokenizer configs:
        base_tokenizer: str = "simple",
@@ -153,7 +169,11 @@ class RemoteTable(Table):
            remove_stop_words=remove_stop_words,
            ascii_folding=ascii_folding,
        )
-        LOOP.run(self._table.create_index(column, config=config, replace=replace))
+        LOOP.run(
+            self._table.create_index(
+                column, config=config, replace=replace, wait_timeout=wait_timeout
+            )
+        )

    def create_index(
        self,
@@ -165,6 +185,7 @@ class RemoteTable(Table):
        replace: Optional[bool] = None,
        accelerator: Optional[str] = None,
        index_type="vector",
+        wait_timeout: Optional[timedelta] = None,
    ):
        """Create an index on the table.
        Currently, the only parameters that matter are
@@ -236,7 +257,11 @@ class RemoteTable(Table):
                " 'IVF_FLAT', 'IVF_PQ', 'IVF_HNSW_PQ', 'IVF_HNSW_SQ'"
            )

-        LOOP.run(self._table.create_index(vector_column_name, config=config))
+        LOOP.run(
+            self._table.create_index(
+                vector_column_name, config=config, wait_timeout=wait_timeout
+            )
+        )

    def add(
        self,
@@ -244,7 +269,7 @@ class RemoteTable(Table):
        mode: str = "append",
        on_bad_vectors: str = "error",
        fill_value: float = 0.0,
-    ) -> int:
+    ) -> AddResult:
        """Add more data to the [Table](Table). It has the same API signature as
        the OSS version.

@@ -267,8 +292,12 @@ class RemoteTable(Table):
        fill_value: float, default 0.
            The value to use when filling vectors. Only used if on_bad_vectors="fill".

+        Returns
+        -------
+        AddResult
+            An object containing the new version number of the table after adding data.
        """
-        LOOP.run(
+        return LOOP.run(
            self._table.add(
                data, mode=mode, on_bad_vectors=on_bad_vectors, fill_value=fill_value
            )
@@ -394,10 +423,12 @@ class RemoteTable(Table):
        new_data: DATA,
        on_bad_vectors: str,
        fill_value: float,
-    ):
-        LOOP.run(self._table._do_merge(merge, new_data, on_bad_vectors, fill_value))
+    ) -> MergeResult:
+        return LOOP.run(
+            self._table._do_merge(merge, new_data, on_bad_vectors, fill_value)
+        )

-    def delete(self, predicate: str):
+    def delete(self, predicate: str) -> DeleteResult:
        """Delete rows from the table.

        This can be used to delete a single row, many rows, all rows, or
@@ -412,6 +443,11 @@ class RemoteTable(Table):

            The filter must not be empty, or it will error.

+        Returns
+        -------
+        DeleteResult
+            An object containing the new version number of the table after deletion.
+
        Examples
        --------
        >>> import lancedb
@@ -444,7 +480,7 @@ class RemoteTable(Table):
           x      vector  _distance # doctest: +SKIP
        0  2  [3.0, 4.0]       85.0 # doctest: +SKIP
        """
-        LOOP.run(self._table.delete(predicate))
+        return LOOP.run(self._table.delete(predicate))

    def update(
        self,
@@ -452,7 +488,7 @@ class RemoteTable(Table):
        values: Optional[dict] = None,
        *,
        values_sql: Optional[Dict[str, str]] = None,
-    ):
+    ) -> UpdateResult:
        """
        This can be used to update zero to all rows depending on how many
        rows match the where clause.
@@ -470,6 +506,12 @@ class RemoteTable(Table):
            reference existing columns. For example, {"x": "x + 1"} will increment
            the x column by 1.

+        Returns
+        -------
+        UpdateResult
+            - rows_updated: The number of rows that were updated
+            - version: The new version number of the table after the update
+
        Examples
        --------
        >>> import lancedb
@@ -494,7 +536,7 @@ class RemoteTable(Table):
        2  2  [10.0, 10.0] # doctest: +SKIP

        """
-        LOOP.run(
+        return LOOP.run(
            self._table.update(where=where, updates=values, updates_sql=values_sql)
        )

@@ -542,18 +584,28 @@ class RemoteTable(Table):
    def count_rows(self, filter: Optional[str] = None) -> int:
        return LOOP.run(self._table.count_rows(filter))

-    def add_columns(self, transforms: Dict[str, str]):
+    def add_columns(self, transforms: Dict[str, str]) -> AddColumnsResult:
        return LOOP.run(self._table.add_columns(transforms))

-    def alter_columns(self, *alterations: Iterable[Dict[str, str]]):
+    def alter_columns(
+        self, *alterations: Iterable[Dict[str, str]]
+    ) -> AlterColumnsResult:
        return LOOP.run(self._table.alter_columns(*alterations))

-    def drop_columns(self, columns: Iterable[str]):
+    def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
        return LOOP.run(self._table.drop_columns(columns))

    def drop_index(self, index_name: str):
        return LOOP.run(self._table.drop_index(index_name))

+    def wait_for_index(
+        self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
+    ):
+        return LOOP.run(self._table.wait_for_index(index_names, timeout))
+
+    def stats(self):
+        return LOOP.run(self._table.stats())
+
    def uses_v2_manifest_paths(self) -> bool:
        raise NotImplementedError(
            "uses_v2_manifest_paths() is not supported on the LanceDB Cloud"
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
--- a/python/python/tests/docs/test_merge_insert.py
+++ b/python/python/tests/docs/test_merge_insert.py
@@ -18,15 +18,19 @@ def test_upsert(mem_db):
        {"id": 1, "name": "Bobby"},
        {"id": 2, "name": "Charlie"},
    ]
-    (
+    res = (
        table.merge_insert("id")
        .when_matched_update_all()
        .when_not_matched_insert_all()
        .execute(new_users)
    )
    table.count_rows()  # 3
+    res  # {'num_inserted_rows': 1, 'num_updated_rows': 1, 'num_deleted_rows': 0}
    # --8<-- [end:upsert_basic]
    assert table.count_rows() == 3
+    assert res.num_inserted_rows == 1
+    assert res.num_deleted_rows == 0
+    assert res.num_updated_rows == 1


@pytest.mark.asyncio
@@ -44,15 +48,22 @@ async def test_upsert_async(mem_db_async):
        {"id": 1, "name": "Bobby"},
        {"id": 2, "name": "Charlie"},
    ]
-    await (
+    res = await (
        table.merge_insert("id")
        .when_matched_update_all()
        .when_not_matched_insert_all()
        .execute(new_users)
    )
    await table.count_rows()  # 3
+    res
+    # MergeResult(version=2, num_updated_rows=1,
+    # num_inserted_rows=1, num_deleted_rows=0)
    # --8<-- [end:upsert_basic_async]
    assert await table.count_rows() == 3
+    assert res.version == 2
+    assert res.num_inserted_rows == 1
+    assert res.num_deleted_rows == 0
+    assert res.num_updated_rows == 1


 def test_insert_if_not_exists(mem_db):
@@ -69,10 +80,19 @@ def test_insert_if_not_exists(mem_db):
        {"domain": "google.com", "name": "Google"},
        {"domain": "facebook.com", "name": "Facebook"},
    ]
-    (table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains))
+    res = (
+        table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains)
+    )
    table.count_rows()  # 3
+    res
+    # MergeResult(version=2, num_updated_rows=0,
+    # num_inserted_rows=1, num_deleted_rows=0)
    # --8<-- [end:insert_if_not_exists]
    assert table.count_rows() == 3
+    assert res.version == 2
+    assert res.num_inserted_rows == 1
+    assert res.num_deleted_rows == 0
+    assert res.num_updated_rows == 0


@pytest.mark.asyncio
@@ -90,12 +110,19 @@ async def test_insert_if_not_exists_async(mem_db_async):
        {"domain": "google.com", "name": "Google"},
        {"domain": "facebook.com", "name": "Facebook"},
    ]
-    await (
+    res = await (
        table.merge_insert("domain").when_not_matched_insert_all().execute(new_domains)
    )
    await table.count_rows()  # 3
-    # --8<-- [end:insert_if_not_exists_async]
+    res
+    # MergeResult(version=2, num_updated_rows=0,
+    # num_inserted_rows=1, num_deleted_rows=0)
+    # --8<-- [end:insert_if_not_exists]
    assert await table.count_rows() == 3
+    assert res.version == 2
+    assert res.num_inserted_rows == 1
+    assert res.num_deleted_rows == 0
+    assert res.num_updated_rows == 0


 def test_replace_range(mem_db):
@@ -113,7 +140,7 @@ def test_replace_range(mem_db):
    new_chunks = [
        {"doc_id": 1, "chunk_id": 0, "text": "Baz"},
    ]
-    (
+    res = (
        table.merge_insert(["doc_id", "chunk_id"])
        .when_matched_update_all()
        .when_not_matched_insert_all()
@@ -121,8 +148,15 @@ def test_replace_range(mem_db):
        .execute(new_chunks)
    )
    table.count_rows("doc_id = 1")  # 1
-    # --8<-- [end:replace_range]
+    res
+    # MergeResult(version=2, num_updated_rows=1,
+    # num_inserted_rows=0, num_deleted_rows=1)
+    # --8<-- [end:insert_if_not_exists]
    assert table.count_rows("doc_id = 1") == 1
+    assert res.version == 2
+    assert res.num_inserted_rows == 0
+    assert res.num_deleted_rows == 1
+    assert res.num_updated_rows == 1


@pytest.mark.asyncio
@@ -141,7 +175,7 @@ async def test_replace_range_async(mem_db_async):
    new_chunks = [
        {"doc_id": 1, "chunk_id": 0, "text": "Baz"},
    ]
-    await (
+    res = await (
        table.merge_insert(["doc_id", "chunk_id"])
        .when_matched_update_all()
        .when_not_matched_insert_all()
@@ -149,5 +183,12 @@ async def test_replace_range_async(mem_db_async):
        .execute(new_chunks)
    )
    await table.count_rows("doc_id = 1")  # 1
-    # --8<-- [end:replace_range_async]
+    res
+    # MergeResult(version=2, num_updated_rows=1,
+    # num_inserted_rows=0, num_deleted_rows=1)
+    # --8<-- [end:insert_if_not_exists]
    assert await table.count_rows("doc_id = 1") == 1
+    assert res.version == 2
+    assert res.num_inserted_rows == 0
+    assert res.num_deleted_rows == 1
+    assert res.num_updated_rows == 1
--- a/python/python/tests/test_embeddings_slow.py
+++ b/python/python/tests/test_embeddings_slow.py
@@ -11,7 +11,7 @@ import pandas as pd
 import pyarrow as pa
 import pytest
 from lancedb.embeddings import get_registry
-from lancedb.pydantic import LanceModel, Vector
+from lancedb.pydantic import LanceModel, Vector, MultiVector
 import requests

 # These are integration tests for embedding functions.
@@ -575,3 +575,67 @@ def test_voyageai_multimodal_embedding_text_function():

    tbl.add(df)
    assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
+
+
+@pytest.mark.slow
+@pytest.mark.skipif(
+    importlib.util.find_spec("colpali_engine") is None,
+    reason="colpali_engine not installed",
+)
+def test_colpali(tmp_path):
+    import requests
+    from lancedb.pydantic import LanceModel
+
+    db = lancedb.connect(tmp_path)
+    registry = get_registry()
+    func = registry.get("colpali").create()
+
+    class MediaItems(LanceModel):
+        text: str
+        image_uri: str = func.SourceField()
+        image_bytes: bytes = func.SourceField()
+        image_vectors: MultiVector(func.ndims()) = (
+            func.VectorField()
+        )  # Multivector image embeddings
+
+    table = db.create_table("media", schema=MediaItems)
+
+    texts = [
+        "a cute cat playing with yarn",
+        "a puppy in a flower field",
+        "a red sports car on the highway",
+        "a vintage bicycle leaning against a wall",
+        "a plate of delicious pasta",
+        "fresh fruit salad in a bowl",
+    ]
+
+    uris = [
+        "http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
+        "http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
+        "http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
+        "http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
+        "http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
+        "http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
+    ]
+
+    # Get images as bytes
+    image_bytes = [requests.get(uri).content for uri in uris]
+
+    table.add(
+        pd.DataFrame({"text": texts, "image_uri": uris, "image_bytes": image_bytes})
+    )
+
+    # Test text-to-image search
+    image_results = (
+        table.search("fluffy companion", vector_column_name="image_vectors")
+        .limit(1)
+        .to_pydantic(MediaItems)[0]
+    )
+    assert "cat" in image_results.text.lower() or "puppy" in image_results.text.lower()
+
+    # Verify multivector dimensions
+    first_row = table.to_arrow().to_pylist()[0]
+    assert len(first_row["image_vectors"]) > 1, "Should have multiple image vectors"
+    assert len(first_row["image_vectors"][0]) == func.ndims(), (
+        "Vector dimension mismatch"
+    )
--- a/python/python/tests/test_hybrid_query.py
+++ b/python/python/tests/test_hybrid_query.py
@@ -4,13 +4,32 @@
 import lancedb

 from lancedb.query import LanceHybridQueryBuilder
+from lancedb.rerankers.rrf import RRFReranker
 import pyarrow as pa
 import pyarrow.compute as pc
 import pytest
 import pytest_asyncio

 from lancedb.index import FTS
-from lancedb.table import AsyncTable
+from lancedb.table import AsyncTable, Table
+
+
+@pytest.fixture
+def sync_table(tmpdir_factory) -> Table:
+    tmp_path = str(tmpdir_factory.mktemp("data"))
+    db = lancedb.connect(tmp_path)
+    data = pa.table(
+        {
+            "text": pa.array(["a", "b", "cat", "dog"]),
+            "vector": pa.array(
+                [[0.1, 0.1], [2, 2], [-0.1, -0.1], [0.5, -0.5]],
+                type=pa.list_(pa.float32(), list_size=2),
+            ),
+        }
+    )
+    table = db.create_table("test", data)
+    table.create_fts_index("text", with_position=False, use_tantivy=False)
+    return table


@pytest_asyncio.fixture
@@ -102,6 +121,42 @@ async def test_async_hybrid_query_default_limit(table: AsyncTable):
    assert texts.count("a") == 1


+def test_hybrid_query_distance_range(sync_table: Table):
+    reranker = RRFReranker(return_score="all")
+    result = (
+        sync_table.search(query_type="hybrid")
+        .vector([0.0, 0.4])
+        .text("cat and dog")
+        .distance_range(lower_bound=0.2, upper_bound=0.5)
+        .rerank(reranker)
+        .limit(2)
+        .to_arrow()
+    )
+    assert len(result) == 2
+    print(result)
+    for dist in result["_distance"]:
+        if dist.is_valid:
+            assert 0.2 <= dist.as_py() <= 0.5
+
+
+@pytest.mark.asyncio
+async def test_hybrid_query_distance_range_async(table: AsyncTable):
+    reranker = RRFReranker(return_score="all")
+    result = await (
+        table.query()
+        .nearest_to([0.0, 0.4])
+        .nearest_to_text("cat and dog")
+        .distance_range(lower_bound=0.2, upper_bound=0.5)
+        .rerank(reranker)
+        .limit(2)
+        .to_arrow()
+    )
+    assert len(result) == 2
+    for dist in result["_distance"]:
+        if dist.is_valid:
+            assert 0.2 <= dist.as_py() <= 0.5
+
+
@pytest.mark.asyncio
 async def test_explain_plan(table: AsyncTable):
    plan = await (
--- a/python/python/tests/test_pydantic.py
+++ b/python/python/tests/test_pydantic.py
@@ -9,7 +9,13 @@ from typing import List, Optional, Tuple
 import pyarrow as pa
 import pydantic
 import pytest
-from lancedb.pydantic import PYDANTIC_VERSION, LanceModel, Vector, pydantic_to_schema
+from lancedb.pydantic import (
+    PYDANTIC_VERSION,
+    LanceModel,
+    Vector,
+    pydantic_to_schema,
+    MultiVector,
+)
 from pydantic import BaseModel
 from pydantic import Field

@@ -354,3 +360,55 @@ def test_optional_nested_model():
            ),
        ]
    )
+
+
+def test_multi_vector():
+    class TestModel(pydantic.BaseModel):
+        vec: MultiVector(8)
+
+    schema = pydantic_to_schema(TestModel)
+    assert schema == pa.schema(
+        [pa.field("vec", pa.list_(pa.list_(pa.float32(), 8)), True)]
+    )
+
+    with pytest.raises(pydantic.ValidationError):
+        TestModel(vec=[[1.0] * 7])
+
+    with pytest.raises(pydantic.ValidationError):
+        TestModel(vec=[[1.0] * 9])
+
+    TestModel(vec=[[1.0] * 8])
+    TestModel(vec=[[1.0] * 8, [2.0] * 8])
+
+    TestModel(vec=[])
+
+
+def test_multi_vector_nullable():
+    class NullableModel(pydantic.BaseModel):
+        vec: MultiVector(16, nullable=False)
+
+    schema = pydantic_to_schema(NullableModel)
+    assert schema == pa.schema(
+        [pa.field("vec", pa.list_(pa.list_(pa.float32(), 16)), False)]
+    )
+
+    class DefaultModel(pydantic.BaseModel):
+        vec: MultiVector(16)
+
+    schema = pydantic_to_schema(DefaultModel)
+    assert schema == pa.schema(
+        [pa.field("vec", pa.list_(pa.list_(pa.float32(), 16)), True)]
+    )
+
+
+def test_multi_vector_in_lance_model():
+    class TestModel(LanceModel):
+        id: int
+        vectors: MultiVector(16) = Field(default=[[0.0] * 16])
+
+    schema = pydantic_to_schema(TestModel)
+    assert schema == TestModel.to_arrow_schema()
+    assert TestModel.field_names() == ["id", "vectors"]
+
+    t = TestModel(id=1)
+    assert t.vectors == [[0.0] * 16]
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -257,7 +257,9 @@ async def test_distance_range_with_new_rows_async():
        }
    )
    table = await conn.create_table("test", data)
-    table.create_index("vector", config=IvfPq(num_partitions=1, num_sub_vectors=2))
+    await table.create_index(
+        "vector", config=IvfPq(num_partitions=1, num_sub_vectors=2)
+    )

    q = [0, 0]
    rs = await table.query().nearest_to(q).to_arrow()
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright The LanceDB Authors
-
+import re
 from concurrent.futures import ThreadPoolExecutor
 import contextlib
 from datetime import timedelta
@@ -149,6 +149,24 @@ async def test_async_checkout():
        assert await table.count_rows() == 300


+def test_table_len_sync():
+    def handler(request):
+        if request.path == "/v1/table/test/create/?mode=create":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            request.wfile.write(b"{}")
+
+        request.send_response(200)
+        request.send_header("Content-Type", "application/json")
+        request.end_headers()
+        request.wfile.write(json.dumps(1).encode())
+
+    with mock_lancedb_connection(handler) as db:
+        table = db.create_table("test", [{"id": 1}])
+        assert len(table) == 1
+
+
@pytest.mark.asyncio
 async def test_http_error():
    request_id_holder = {"request_id": None}
@@ -235,6 +253,10 @@ def test_table_add_in_threadpool():

 def test_table_create_indices():
    def handler(request):
+        index_stats = dict(
+            index_type="IVF_PQ", num_indexed_rows=1000, num_unindexed_rows=0
+        )
+
        if request.path == "/v1/table/test/create_index/":
            request.send_response(200)
            request.end_headers()
@@ -258,6 +280,47 @@ def test_table_create_indices():
                )
            )
            request.wfile.write(payload.encode())
+        elif request.path == "/v1/table/test/index/list/":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            payload = json.dumps(
+                dict(
+                    indexes=[
+                        {
+                            "index_name": "id_idx",
+                            "columns": ["id"],
+                        },
+                        {
+                            "index_name": "text_idx",
+                            "columns": ["text"],
+                        },
+                        {
+                            "index_name": "vector_idx",
+                            "columns": ["vector"],
+                        },
+                    ]
+                )
+            )
+            request.wfile.write(payload.encode())
+        elif request.path == "/v1/table/test/index/id_idx/stats/":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            payload = json.dumps(index_stats)
+            request.wfile.write(payload.encode())
+        elif request.path == "/v1/table/test/index/text_idx/stats/":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            payload = json.dumps(index_stats)
+            request.wfile.write(payload.encode())
+        elif request.path == "/v1/table/test/index/vector_idx/stats/":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            payload = json.dumps(index_stats)
+            request.wfile.write(payload.encode())
        elif "/drop/" in request.path:
            request.send_response(200)
            request.end_headers()
@@ -269,14 +332,125 @@ def test_table_create_indices():
        # Parameters are well-tested through local and async tests.
        # This is a smoke-test.
        table = db.create_table("test", [{"id": 1}])
-        table.create_scalar_index("id")
-        table.create_fts_index("text")
-        table.create_scalar_index("vector")
+        table.create_scalar_index("id", wait_timeout=timedelta(seconds=2))
+        table.create_fts_index("text", wait_timeout=timedelta(seconds=2))
+        table.create_index(
+            vector_column_name="vector", wait_timeout=timedelta(seconds=10)
+        )
+        table.wait_for_index(["id_idx"], timedelta(seconds=2))
+        table.wait_for_index(["text_idx", "vector_idx"], timedelta(seconds=2))
        table.drop_index("vector_idx")
        table.drop_index("id_idx")
        table.drop_index("text_idx")


+def test_table_wait_for_index_timeout():
+    def handler(request):
+        index_stats = dict(
+            index_type="BTREE", num_indexed_rows=1000, num_unindexed_rows=1
+        )
+
+        if request.path == "/v1/table/test/create/?mode=create":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            request.wfile.write(b"{}")
+        elif request.path == "/v1/table/test/describe/":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            payload = json.dumps(
+                dict(
+                    version=1,
+                    schema=dict(
+                        fields=[
+                            dict(name="id", type={"type": "int64"}, nullable=False),
+                        ]
+                    ),
+                )
+            )
+            request.wfile.write(payload.encode())
+        elif request.path == "/v1/table/test/index/list/":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            payload = json.dumps(
+                dict(
+                    indexes=[
+                        {
+                            "index_name": "id_idx",
+                            "columns": ["id"],
+                        },
+                    ]
+                )
+            )
+            request.wfile.write(payload.encode())
+        elif request.path == "/v1/table/test/index/id_idx/stats/":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            payload = json.dumps(index_stats)
+            print(f"{index_stats=}")
+            request.wfile.write(payload.encode())
+        else:
+            request.send_response(404)
+            request.end_headers()
+
+    with mock_lancedb_connection(handler) as db:
+        table = db.create_table("test", [{"id": 1}])
+        with pytest.raises(
+            RuntimeError,
+            match=re.escape(
+                'Timeout error: timed out waiting for indices: ["id_idx"] after 1s'
+            ),
+        ):
+            table.wait_for_index(["id_idx"], timedelta(seconds=1))
+
+
+def test_stats():
+    stats = {
+        "total_bytes": 38,
+        "num_rows": 2,
+        "num_indices": 0,
+        "fragment_stats": {
+            "num_fragments": 1,
+            "num_small_fragments": 1,
+            "lengths": {
+                "min": 2,
+                "max": 2,
+                "mean": 2,
+                "p25": 2,
+                "p50": 2,
+                "p75": 2,
+                "p99": 2,
+            },
+        },
+    }
+
+    def handler(request):
+        if request.path == "/v1/table/test/create/?mode=create":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            request.wfile.write(b"{}")
+        elif request.path == "/v1/table/test/stats/":
+            request.send_response(200)
+            request.send_header("Content-Type", "application/json")
+            request.end_headers()
+            payload = json.dumps(stats)
+            request.wfile.write(payload.encode())
+        else:
+            print(request.path)
+            request.send_response(404)
+            request.end_headers()
+
+    with mock_lancedb_connection(handler) as db:
+        table = db.create_table("test", [{"id": 1}])
+        res = table.stats()
+        print(f"{res=}")
+        assert res == stats
+
+
@contextlib.contextmanager
 def query_test_table(query_handler, *, server_version=Version("0.1.0")):
    def handler(request):
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -9,9 +9,9 @@ from typing import List
 from unittest.mock import patch

 import lancedb
+from lancedb.dependencies import _PANDAS_AVAILABLE
 from lancedb.index import HnswPq, HnswSq, IvfPq
 import numpy as np
-import pandas as pd
 import polars as pl
 import pyarrow as pa
 import pyarrow.dataset
@@ -106,15 +106,22 @@ async def test_update_async(mem_db_async: AsyncConnection):
    table = await mem_db_async.create_table("some_table", data=[{"id": 0}])
    assert await table.count_rows("id == 0") == 1
    assert await table.count_rows("id == 7") == 0
-    await table.update({"id": 7})
+    update_res = await table.update({"id": 7})
+    assert update_res.rows_updated == 1
+    assert update_res.version == 2
    assert await table.count_rows("id == 7") == 1
    assert await table.count_rows("id == 0") == 0
-    await table.add([{"id": 2}])
-    await table.update(where="id % 2 == 0", updates_sql={"id": "5"})
+    add_res = await table.add([{"id": 2}])
+    assert add_res.version == 3
+    update_res = await table.update(where="id % 2 == 0", updates_sql={"id": "5"})
+    assert update_res.rows_updated == 1
+    assert update_res.version == 4
    assert await table.count_rows("id == 7") == 1
    assert await table.count_rows("id == 2") == 0
    assert await table.count_rows("id == 5") == 1
-    await table.update({"id": 10}, where="id == 5")
+    update_res = await table.update({"id": 10}, where="id == 5")
+    assert update_res.rows_updated == 1
+    assert update_res.version == 5
    assert await table.count_rows("id == 10") == 1


@@ -138,13 +145,16 @@ def test_create_table(mem_db: DBConnection):
        {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
        {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
    ]
-    df = pd.DataFrame(rows)
-    pa_table = pa.Table.from_pandas(df, schema=schema)
+    pa_table = pa.Table.from_pylist(rows, schema=schema)
    data = [
        ("Rows", rows),
-        ("pd_DataFrame", df),
        ("pa_Table", pa_table),
    ]
+    if _PANDAS_AVAILABLE:
+        import pandas as pd
+
+        df = pd.DataFrame(rows)
+        data.append(("pd_DataFrame", df))

    for name, d in data:
        tbl = mem_db.create_table(name, data=d, schema=schema).to_arrow()
@@ -296,7 +306,7 @@ def test_add_subschema(mem_db: DBConnection):

    data = {"price": 10.0, "item": "foo"}
    table.add([data])
-    data = pd.DataFrame({"price": [2.0], "vector": [[3.1, 4.1]]})
+    data = pa.Table.from_pydict({"price": [2.0], "vector": [[3.1, 4.1]]})
    table.add(data)
    data = {"price": 3.0, "vector": [5.9, 26.5], "item": "bar"}
    table.add([data])
@@ -405,6 +415,7 @@ def test_add_nullability(mem_db: DBConnection):


 def test_add_pydantic_model(mem_db: DBConnection):
+    pytest.importorskip("pandas")
    # https://github.com/lancedb/lancedb/issues/562

    class Metadata(BaseModel):
@@ -433,7 +444,8 @@ def test_add_pydantic_model(mem_db: DBConnection):
            content="foo", meta=Metadata(source="bar", timestamp=datetime.now())
        ),
    )
-    tbl.add([expected])
+    add_res = tbl.add([expected])
+    assert add_res.version == 2

    result = tbl.search([0.0, 0.0]).limit(1).to_pydantic(LanceSchema)[0]
    assert result == expected
@@ -455,11 +467,12 @@ async def test_add_async(mem_db_async: AsyncConnection):
        ],
    )
    assert await table.count_rows() == 2
-    await table.add(
+    add_res = await table.add(
        data=[
            {"vector": [10.0, 11.0], "item": "baz", "price": 30.0},
        ],
    )
+    assert add_res.version == 2
    assert await table.count_rows() == 3


@@ -473,10 +486,10 @@ def test_polars(mem_db: DBConnection):
    table = mem_db.create_table("test", data=pl.DataFrame(data))
    assert len(table) == 2

-    result = table.to_pandas()
-    assert np.allclose(result["vector"].tolist(), data["vector"])
-    assert result["item"].tolist() == data["item"]
-    assert np.allclose(result["price"].tolist(), data["price"])
+    result = table.to_arrow()
+    assert np.allclose(result["vector"].to_pylist(), data["vector"])
+    assert result["item"].to_pylist() == data["item"]
+    assert np.allclose(result["price"].to_pylist(), data["price"])

    schema = pa.schema(
        [
@@ -525,6 +538,113 @@ def test_versioning(mem_db: DBConnection):
    assert len(table) == 2


+def test_tags(mem_db: DBConnection):
+    table = mem_db.create_table(
+        "test",
+        data=[
+            {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+        ],
+    )
+
+    table.tags.create("tag1", 1)
+    tags = table.tags.list()
+    assert "tag1" in tags
+    assert tags["tag1"]["version"] == 1
+
+    table.add(
+        data=[
+            {"vector": [10.0, 11.0], "item": "baz", "price": 30.0},
+        ],
+    )
+
+    table.tags.create("tag2", 2)
+    tags = table.tags.list()
+    assert "tag1" in tags
+    assert "tag2" in tags
+    assert tags["tag1"]["version"] == 1
+    assert tags["tag2"]["version"] == 2
+
+    table.tags.delete("tag2")
+    table.tags.update("tag1", 2)
+    tags = table.tags.list()
+    assert "tag1" in tags
+    assert tags["tag1"]["version"] == 2
+
+    table.tags.update("tag1", 1)
+    tags = table.tags.list()
+    assert "tag1" in tags
+    assert tags["tag1"]["version"] == 1
+
+    table.checkout("tag1")
+    assert table.version == 1
+    assert table.count_rows() == 2
+    table.tags.create("tag2", 2)
+    table.checkout("tag2")
+    assert table.version == 2
+    assert table.count_rows() == 3
+    table.checkout_latest()
+    table.add(
+        data=[
+            {"vector": [12.0, 13.0], "item": "baz", "price": 40.0},
+        ],
+    )
+
+
+@pytest.mark.asyncio
+async def test_async_tags(mem_db_async: AsyncConnection):
+    table = await mem_db_async.create_table(
+        "test",
+        data=[
+            {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+        ],
+    )
+
+    await table.tags.create("tag1", 1)
+    tags = await table.tags.list()
+    assert "tag1" in tags
+    assert tags["tag1"]["version"] == 1
+
+    await table.add(
+        data=[
+            {"vector": [10.0, 11.0], "item": "baz", "price": 30.0},
+        ],
+    )
+
+    await table.tags.create("tag2", 2)
+    tags = await table.tags.list()
+    assert "tag1" in tags
+    assert "tag2" in tags
+    assert tags["tag1"]["version"] == 1
+    assert tags["tag2"]["version"] == 2
+
+    await table.tags.delete("tag2")
+    await table.tags.update("tag1", 2)
+    tags = await table.tags.list()
+    assert "tag1" in tags
+    assert tags["tag1"]["version"] == 2
+
+    await table.tags.update("tag1", 1)
+    tags = await table.tags.list()
+    assert "tag1" in tags
+    assert tags["tag1"]["version"] == 1
+
+    await table.checkout("tag1")
+    assert await table.version() == 1
+    assert await table.count_rows() == 2
+    await table.tags.create("tag2", 2)
+    await table.checkout("tag2")
+    assert await table.version() == 2
+    assert await table.count_rows() == 3
+    await table.checkout_latest()
+    await table.add(
+        data=[
+            {"vector": [12.0, 13.0], "item": "baz", "price": 40.0},
+        ],
+    )
+
+
@patch("lancedb.table.AsyncTable.create_index")
 def test_create_index_method(mock_create_index, mem_db: DBConnection):
    table = mem_db.create_table(
@@ -649,6 +769,29 @@ def test_restore(mem_db: DBConnection):
        table.restore(0)


+def test_restore_with_tags(mem_db: DBConnection):
+    table = mem_db.create_table(
+        "my_table",
+        data=[{"vector": [1.1, 0.9], "type": "vector"}],
+    )
+    tag = "tag1"
+    table.tags.create(tag, 1)
+    table.add([{"vector": [0.5, 0.2], "type": "vector"}])
+    table.restore(tag)
+    assert len(table.list_versions()) == 3
+    assert len(table) == 1
+    expected = table.to_arrow()
+
+    table.add([{"vector": [0.3, 0.3], "type": "vector"}])
+    table.checkout("tag1")
+    table.restore()
+    assert len(table.list_versions()) == 5
+    assert table.to_arrow() == expected
+
+    with pytest.raises(ValueError):
+        table.restore("tag_unknown")
+
+
 def test_merge(tmp_db: DBConnection, tmp_path):
    pytest.importorskip("lance")
    import lance
@@ -684,11 +827,12 @@ def test_delete(mem_db: DBConnection):
    )
    assert len(table) == 2
    assert len(table.list_versions()) == 1
-    table.delete("id=0")
+    delete_res = table.delete("id=0")
+    assert delete_res.version == 2
    assert len(table.list_versions()) == 2
    assert table.version == 2
    assert len(table) == 1
-    assert table.to_pandas()["id"].tolist() == [1]
+    assert table.to_arrow()["id"].to_pylist() == [1]


 def test_update(mem_db: DBConnection):
@@ -698,7 +842,9 @@ def test_update(mem_db: DBConnection):
    )
    assert len(table) == 2
    assert len(table.list_versions()) == 1
-    table.update(where="id=0", values={"vector": [1.1, 1.1]})
+    update_res = table.update(where="id=0", values={"vector": [1.1, 1.1]})
+    assert update_res.version == 2
+    assert update_res.rows_updated == 1
    assert len(table.list_versions()) == 2
    assert table.version == 2
    assert len(table) == 2
@@ -787,9 +933,16 @@ def test_merge_insert(mem_db: DBConnection):
    new_data = pa.table({"a": [2, 3, 4], "b": ["x", "y", "z"]})

    # upsert
-    table.merge_insert(
-        "a"
-    ).when_matched_update_all().when_not_matched_insert_all().execute(new_data)
+    merge_insert_res = (
+        table.merge_insert("a")
+        .when_matched_update_all()
+        .when_not_matched_insert_all()
+        .execute(new_data, timeout=timedelta(seconds=10))
+    )
+    assert merge_insert_res.version == 2
+    assert merge_insert_res.num_inserted_rows == 1
+    assert merge_insert_res.num_updated_rows == 2
+    assert merge_insert_res.num_deleted_rows == 0

    expected = pa.table({"a": [1, 2, 3, 4], "b": ["a", "x", "y", "z"]})
    assert table.to_arrow().sort_by("a") == expected
@@ -797,17 +950,28 @@ def test_merge_insert(mem_db: DBConnection):
    table.restore(version)

    # conditional update
-    table.merge_insert("a").when_matched_update_all(where="target.b = 'b'").execute(
-        new_data
+    merge_insert_res = (
+        table.merge_insert("a")
+        .when_matched_update_all(where="target.b = 'b'")
+        .execute(new_data)
    )
+    assert merge_insert_res.version == 4
+    assert merge_insert_res.num_inserted_rows == 0
+    assert merge_insert_res.num_updated_rows == 1
+    assert merge_insert_res.num_deleted_rows == 0
    expected = pa.table({"a": [1, 2, 3], "b": ["a", "x", "c"]})
    assert table.to_arrow().sort_by("a") == expected

    table.restore(version)

    # insert-if-not-exists
-    table.merge_insert("a").when_not_matched_insert_all().execute(new_data)
-
+    merge_insert_res = (
+        table.merge_insert("a").when_not_matched_insert_all().execute(new_data)
+    )
+    assert merge_insert_res.version == 6
+    assert merge_insert_res.num_inserted_rows == 1
+    assert merge_insert_res.num_updated_rows == 0
+    assert merge_insert_res.num_deleted_rows == 0
    expected = pa.table({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "z"]})
    assert table.to_arrow().sort_by("a") == expected

@@ -816,13 +980,17 @@ def test_merge_insert(mem_db: DBConnection):
    new_data = pa.table({"a": [2, 4], "b": ["x", "z"]})

    # replace-range
-    (
+    merge_insert_res = (
        table.merge_insert("a")
        .when_matched_update_all()
        .when_not_matched_insert_all()
        .when_not_matched_by_source_delete("a > 2")
        .execute(new_data)
    )
+    assert merge_insert_res.version == 8
+    assert merge_insert_res.num_inserted_rows == 1
+    assert merge_insert_res.num_updated_rows == 1
+    assert merge_insert_res.num_deleted_rows == 1

    expected = pa.table({"a": [1, 2, 4], "b": ["a", "x", "z"]})
    assert table.to_arrow().sort_by("a") == expected
@@ -830,15 +998,27 @@ def test_merge_insert(mem_db: DBConnection):
    table.restore(version)

    # replace-range no condition
-    table.merge_insert(
-        "a"
-    ).when_matched_update_all().when_not_matched_insert_all().when_not_matched_by_source_delete().execute(
-        new_data
+    merge_insert_res = (
+        table.merge_insert("a")
+        .when_matched_update_all()
+        .when_not_matched_insert_all()
+        .when_not_matched_by_source_delete()
+        .execute(new_data)
    )
+    assert merge_insert_res.version == 10
+    assert merge_insert_res.num_inserted_rows == 1
+    assert merge_insert_res.num_updated_rows == 1
+    assert merge_insert_res.num_deleted_rows == 2

    expected = pa.table({"a": [2, 4], "b": ["x", "z"]})
    assert table.to_arrow().sort_by("a") == expected

+    # timeout
+    with pytest.raises(Exception, match="merge insert timed out"):
+        table.merge_insert("a").when_matched_update_all().execute(
+            new_data, timeout=timedelta(0)
+        )
+

 # We vary the data format because there are slight differences in how
 # subschemas are handled in different formats
@@ -852,6 +1032,7 @@ def test_merge_insert(mem_db: DBConnection):
    ids=["pa.Table", "pd.DataFrame", "rows"],
 )
 def test_merge_insert_subschema(mem_db: DBConnection, data_format):
+    pytest.importorskip("pandas")
    initial_data = pa.table(
        {"id": range(3), "a": [1.0, 2.0, 3.0], "c": ["x", "x", "x"]}
    )
@@ -948,7 +1129,7 @@ def test_create_with_embedding_function(mem_db: DBConnection):

    func = MockTextEmbeddingFunction.create()
    texts = ["hello world", "goodbye world", "foo bar baz fizz buzz"]
-    df = pd.DataFrame({"text": texts, "vector": func.compute_source_embeddings(texts)})
+    df = pa.table({"text": texts, "vector": func.compute_source_embeddings(texts)})

    conf = EmbeddingFunctionConfig(
        source_column="text", vector_column="vector", function=func
@@ -973,7 +1154,7 @@ def test_create_f16_table(mem_db: DBConnection):
        text: str
        vector: Vector(32, value_type=pa.float16())

-    df = pd.DataFrame(
+    df = pa.table(
        {
            "text": [f"s-{i}" for i in range(512)],
            "vector": [np.random.randn(32).astype(np.float16) for _ in range(512)],
@@ -986,7 +1167,7 @@ def test_create_f16_table(mem_db: DBConnection):
    table.add(df)
    table.create_index(num_partitions=2, num_sub_vectors=2)

-    query = df.vector.iloc[2]
+    query = df["vector"][2].as_py()
    expected = table.search(query).limit(2).to_arrow()

    assert "s-2" in expected["text"].to_pylist()
@@ -1002,7 +1183,7 @@ def test_add_with_embedding_function(mem_db: DBConnection):
    table = mem_db.create_table("my_table", schema=MyTable)

    texts = ["hello world", "goodbye world", "foo bar baz fizz buzz"]
-    df = pd.DataFrame({"text": texts})
+    df = pa.table({"text": texts})
    table.add(df)

    texts = ["the quick brown fox", "jumped over the lazy dog"]
@@ -1033,14 +1214,14 @@ def test_multiple_vector_columns(mem_db: DBConnection):
        {"vector1": v1, "vector2": v2, "text": "foo"},
        {"vector1": v2, "vector2": v1, "text": "bar"},
    ]
-    df = pd.DataFrame(data)
+    df = pa.Table.from_pylist(data)
    table.add(df)

    q = np.random.randn(10)
-    result1 = table.search(q, vector_column_name="vector1").limit(1).to_pandas()
-    result2 = table.search(q, vector_column_name="vector2").limit(1).to_pandas()
+    result1 = table.search(q, vector_column_name="vector1").limit(1).to_arrow()
+    result2 = table.search(q, vector_column_name="vector2").limit(1).to_arrow()

-    assert result1["text"].iloc[0] != result2["text"].iloc[0]
+    assert result1["text"][0] != result2["text"][0]


 def test_create_scalar_index(mem_db: DBConnection):
@@ -1078,22 +1259,22 @@ def test_empty_query(mem_db: DBConnection):
        "my_table",
        data=[{"text": "foo", "id": 0}, {"text": "bar", "id": 1}],
    )
-    df = table.search().select(["id"]).where("text='bar'").limit(1).to_pandas()
-    val = df.id.iloc[0]
+    df = table.search().select(["id"]).where("text='bar'").limit(1).to_arrow()
+    val = df["id"][0].as_py()
    assert val == 1

    table = mem_db.create_table("my_table2", data=[{"id": i} for i in range(100)])
-    df = table.search().select(["id"]).to_pandas()
-    assert len(df) == 100
+    df = table.search().select(["id"]).to_arrow()
+    assert df.num_rows == 100
    # None is the same as default
-    df = table.search().select(["id"]).limit(None).to_pandas()
-    assert len(df) == 100
+    df = table.search().select(["id"]).limit(None).to_arrow()
+    assert df.num_rows == 100
    # invalid limist is the same as None, wihch is the same as default
-    df = table.search().select(["id"]).limit(-1).to_pandas()
-    assert len(df) == 100
+    df = table.search().select(["id"]).limit(-1).to_arrow()
+    assert df.num_rows == 100
    # valid limit should work
-    df = table.search().select(["id"]).limit(42).to_pandas()
-    assert len(df) == 42
+    df = table.search().select(["id"]).limit(42).to_arrow()
+    assert df.num_rows == 42


 def test_search_with_schema_inf_single_vector(mem_db: DBConnection):
@@ -1112,14 +1293,14 @@ def test_search_with_schema_inf_single_vector(mem_db: DBConnection):
        {"vector_col": v1, "text": "foo"},
        {"vector_col": v2, "text": "bar"},
    ]
-    df = pd.DataFrame(data)
+    df = pa.Table.from_pylist(data)
    table.add(df)

    q = np.random.randn(10)
-    result1 = table.search(q, vector_column_name="vector_col").limit(1).to_pandas()
-    result2 = table.search(q).limit(1).to_pandas()
+    result1 = table.search(q, vector_column_name="vector_col").limit(1).to_arrow()
+    result2 = table.search(q).limit(1).to_arrow()

-    assert result1["text"].iloc[0] == result2["text"].iloc[0]
+    assert result1["text"][0].as_py() == result2["text"][0].as_py()


 def test_search_with_schema_inf_multiple_vector(mem_db: DBConnection):
@@ -1139,12 +1320,12 @@ def test_search_with_schema_inf_multiple_vector(mem_db: DBConnection):
        {"vector1": v1, "vector2": v2, "text": "foo"},
        {"vector1": v2, "vector2": v1, "text": "bar"},
    ]
-    df = pd.DataFrame(data)
+    df = pa.Table.from_pylist(data)
    table.add(df)

    q = np.random.randn(10)
    with pytest.raises(ValueError):
-        table.search(q).limit(1).to_pandas()
+        table.search(q).limit(1).to_arrow()


 def test_compact_cleanup(tmp_db: DBConnection):
@@ -1366,11 +1547,13 @@ def test_restore_consistency(tmp_path):
 def test_add_columns(mem_db: DBConnection):
    data = pa.table({"id": [0, 1]})
    table = LanceTable.create(mem_db, "my_table", data=data)
-    table.add_columns({"new_col": "id + 2"})
+    add_columns_res = table.add_columns({"new_col": "id + 2"})
+    assert add_columns_res.version == 2
    assert table.to_arrow().column_names == ["id", "new_col"]
    assert table.to_arrow()["new_col"].to_pylist() == [2, 3]

-    table.add_columns({"null_int": "cast(null as bigint)"})
+    add_columns_res = table.add_columns({"null_int": "cast(null as bigint)"})
+    assert add_columns_res.version == 3
    assert table.schema.field("null_int").type == pa.int64()


@@ -1378,7 +1561,8 @@ def test_add_columns(mem_db: DBConnection):
 async def test_add_columns_async(mem_db_async: AsyncConnection):
    data = pa.table({"id": [0, 1]})
    table = await mem_db_async.create_table("my_table", data=data)
-    await table.add_columns({"new_col": "id + 2"})
+    add_columns_res = await table.add_columns({"new_col": "id + 2"})
+    assert add_columns_res.version == 2
    data = await table.to_arrow()
    assert data.column_names == ["id", "new_col"]
    assert data["new_col"].to_pylist() == [2, 3]
@@ -1388,9 +1572,10 @@ async def test_add_columns_async(mem_db_async: AsyncConnection):
 async def test_add_columns_with_schema(mem_db_async: AsyncConnection):
    data = pa.table({"id": [0, 1]})
    table = await mem_db_async.create_table("my_table", data=data)
-    await table.add_columns(
+    add_columns_res = await table.add_columns(
        [pa.field("x", pa.int64()), pa.field("vector", pa.list_(pa.float32(), 8))]
    )
+    assert add_columns_res.version == 2

    assert await table.schema() == pa.schema(
        [
@@ -1401,11 +1586,12 @@ async def test_add_columns_with_schema(mem_db_async: AsyncConnection):
    )

    table = await mem_db_async.create_table("table2", data=data)
-    await table.add_columns(
+    add_columns_res = await table.add_columns(
        pa.schema(
            [pa.field("y", pa.int64()), pa.field("emb", pa.list_(pa.float32(), 8))]
        )
    )
+    assert add_columns_res.version == 2
    assert await table.schema() == pa.schema(
        [
            pa.field("id", pa.int64()),
@@ -1418,7 +1604,8 @@ async def test_add_columns_with_schema(mem_db_async: AsyncConnection):
 def test_alter_columns(mem_db: DBConnection):
    data = pa.table({"id": [0, 1]})
    table = mem_db.create_table("my_table", data=data)
-    table.alter_columns({"path": "id", "rename": "new_id"})
+    alter_columns_res = table.alter_columns({"path": "id", "rename": "new_id"})
+    assert alter_columns_res.version == 2
    assert table.to_arrow().column_names == ["new_id"]


@@ -1426,9 +1613,13 @@ def test_alter_columns(mem_db: DBConnection):
 async def test_alter_columns_async(mem_db_async: AsyncConnection):
    data = pa.table({"id": [0, 1]})
    table = await mem_db_async.create_table("my_table", data=data)
-    await table.alter_columns({"path": "id", "rename": "new_id"})
+    alter_columns_res = await table.alter_columns({"path": "id", "rename": "new_id"})
+    assert alter_columns_res.version == 2
    assert (await table.to_arrow()).column_names == ["new_id"]
-    await table.alter_columns(dict(path="new_id", data_type=pa.int16(), nullable=True))
+    alter_columns_res = await table.alter_columns(
+        dict(path="new_id", data_type=pa.int16(), nullable=True)
+    )
+    assert alter_columns_res.version == 3
    data = await table.to_arrow()
    assert data.column(0).type == pa.int16()
    assert data.schema.field(0).nullable
@@ -1437,7 +1628,8 @@ async def test_alter_columns_async(mem_db_async: AsyncConnection):
 def test_drop_columns(mem_db: DBConnection):
    data = pa.table({"id": [0, 1], "category": ["a", "b"]})
    table = mem_db.create_table("my_table", data=data)
-    table.drop_columns(["category"])
+    drop_columns_res = table.drop_columns(["category"])
+    assert drop_columns_res.version == 2
    assert table.to_arrow().column_names == ["id"]


@@ -1445,7 +1637,8 @@ def test_drop_columns(mem_db: DBConnection):
 async def test_drop_columns_async(mem_db_async: AsyncConnection):
    data = pa.table({"id": [0, 1], "category": ["a", "b"]})
    table = await mem_db_async.create_table("my_table", data=data)
-    await table.drop_columns(["category"])
+    drop_columns_res = await table.drop_columns(["category"])
+    assert drop_columns_res.version == 2
    assert (await table.to_arrow()).column_names == ["id"]


@@ -1583,3 +1776,31 @@ def test_replace_field_metadata(tmp_path):
    schema = table.schema
    field = schema[0].metadata
    assert field == {b"foo": b"bar"}
+
+
+def test_stats(mem_db: DBConnection):
+    table = mem_db.create_table(
+        "my_table",
+        data=[{"text": "foo", "id": 0}, {"text": "bar", "id": 1}],
+    )
+    assert len(table) == 2
+    stats = table.stats()
+    print(f"{stats=}")
+    assert stats == {
+        "total_bytes": 38,
+        "num_rows": 2,
+        "num_indices": 0,
+        "fragment_stats": {
+            "num_fragments": 1,
+            "num_small_fragments": 1,
+            "lengths": {
+                "min": 2,
+                "max": 2,
+                "mean": 2,
+                "p25": 2,
+                "p50": 2,
+                "p75": 2,
+                "p99": 2,
+            },
+        },
+    }
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -11,7 +11,10 @@ use pyo3::{
    wrap_pyfunction, Bound, PyResult, Python,
 };
 use query::{FTSQuery, HybridQuery, Query, VectorQuery};
-use table::Table;
+use table::{
+    AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
+    Table, UpdateResult,
+};

 pub mod arrow;
 pub mod connection;
@@ -35,6 +38,13 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<HybridQuery>()?;
    m.add_class::<VectorQuery>()?;
    m.add_class::<RecordBatchStream>()?;
+    m.add_class::<AddColumnsResult>()?;
+    m.add_class::<AlterColumnsResult>()?;
+    m.add_class::<AddResult>()?;
+    m.add_class::<MergeResult>()?;
+    m.add_class::<DeleteResult>()?;
+    m.add_class::<DropColumnsResult>()?;
+    m.add_class::<UpdateResult>()?;
    m.add_function(wrap_pyfunction!(connect, m)?)?;
    m.add_function(wrap_pyfunction!(util::validate_table_name, m)?)?;
    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -652,6 +652,11 @@ impl HybridQuery {
        self.inner_vec.bypass_vector_index();
    }

+    #[pyo3(signature = (lower_bound=None, upper_bound=None))]
+    pub fn distance_range(&mut self, lower_bound: Option<f32>, upper_bound: Option<f32>) {
+        self.inner_vec.distance_range(lower_bound, upper_bound);
+    }
+
    pub fn to_vector_query(&mut self) -> PyResult<VectorQuery> {
        Ok(VectorQuery {
            inner: self.inner_vec.inner.clone(),
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -2,6 +2,11 @@
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors
 use std::{collections::HashMap, sync::Arc};

+use crate::{
+    error::PythonErrorExt,
+    index::{extract_index_params, IndexConfig},
+    query::Query,
+};
 use arrow::{
    datatypes::{DataType, Schema},
    ffi_stream::ArrowArrayStreamReader,
@@ -19,12 +24,6 @@ use pyo3::{
 };
 use pyo3_async_runtimes::tokio::future_into_py;

-use crate::{
-    error::PythonErrorExt,
-    index::{extract_index_params, IndexConfig},
-    query::Query,
-};
-
 /// Statistics about a compaction operation.
 #[pyclass(get_all)]
 #[derive(Clone, Debug)]
@@ -59,6 +58,170 @@ pub struct OptimizeStats {
    pub prune: RemovalStats,
 }

+#[pyclass(get_all)]
+#[derive(Clone, Debug)]
+pub struct UpdateResult {
+    pub rows_updated: u64,
+    pub version: u64,
+}
+
+#[pymethods]
+impl UpdateResult {
+    pub fn __repr__(&self) -> String {
+        format!(
+            "UpdateResult(rows_updated={}, version={})",
+            self.rows_updated, self.version
+        )
+    }
+}
+
+impl From<lancedb::table::UpdateResult> for UpdateResult {
+    fn from(result: lancedb::table::UpdateResult) -> Self {
+        Self {
+            rows_updated: result.rows_updated,
+            version: result.version,
+        }
+    }
+}
+
+#[pyclass(get_all)]
+#[derive(Clone, Debug)]
+pub struct AddResult {
+    pub version: u64,
+}
+
+#[pymethods]
+impl AddResult {
+    pub fn __repr__(&self) -> String {
+        format!("AddResult(version={})", self.version)
+    }
+}
+
+impl From<lancedb::table::AddResult> for AddResult {
+    fn from(result: lancedb::table::AddResult) -> Self {
+        Self {
+            version: result.version,
+        }
+    }
+}
+
+#[pyclass(get_all)]
+#[derive(Clone, Debug)]
+pub struct DeleteResult {
+    pub version: u64,
+}
+
+#[pymethods]
+impl DeleteResult {
+    pub fn __repr__(&self) -> String {
+        format!("DeleteResult(version={})", self.version)
+    }
+}
+
+impl From<lancedb::table::DeleteResult> for DeleteResult {
+    fn from(result: lancedb::table::DeleteResult) -> Self {
+        Self {
+            version: result.version,
+        }
+    }
+}
+
+#[pyclass(get_all)]
+#[derive(Clone, Debug)]
+pub struct MergeResult {
+    pub version: u64,
+    pub num_updated_rows: u64,
+    pub num_inserted_rows: u64,
+    pub num_deleted_rows: u64,
+}
+
+#[pymethods]
+impl MergeResult {
+    pub fn __repr__(&self) -> String {
+        format!(
+            "MergeResult(version={}, num_updated_rows={}, num_inserted_rows={}, num_deleted_rows={})",
+            self.version,
+            self.num_updated_rows,
+            self.num_inserted_rows,
+            self.num_deleted_rows
+        )
+    }
+}
+
+impl From<lancedb::table::MergeResult> for MergeResult {
+    fn from(result: lancedb::table::MergeResult) -> Self {
+        Self {
+            version: result.version,
+            num_updated_rows: result.num_updated_rows,
+            num_inserted_rows: result.num_inserted_rows,
+            num_deleted_rows: result.num_deleted_rows,
+        }
+    }
+}
+
+#[pyclass(get_all)]
+#[derive(Clone, Debug)]
+pub struct AddColumnsResult {
+    pub version: u64,
+}
+
+#[pymethods]
+impl AddColumnsResult {
+    pub fn __repr__(&self) -> String {
+        format!("AddColumnsResult(version={})", self.version)
+    }
+}
+
+impl From<lancedb::table::AddColumnsResult> for AddColumnsResult {
+    fn from(result: lancedb::table::AddColumnsResult) -> Self {
+        Self {
+            version: result.version,
+        }
+    }
+}
+
+#[pyclass(get_all)]
+#[derive(Clone, Debug)]
+pub struct AlterColumnsResult {
+    pub version: u64,
+}
+
+#[pymethods]
+impl AlterColumnsResult {
+    pub fn __repr__(&self) -> String {
+        format!("AlterColumnsResult(version={})", self.version)
+    }
+}
+
+impl From<lancedb::table::AlterColumnsResult> for AlterColumnsResult {
+    fn from(result: lancedb::table::AlterColumnsResult) -> Self {
+        Self {
+            version: result.version,
+        }
+    }
+}
+
+#[pyclass(get_all)]
+#[derive(Clone, Debug)]
+pub struct DropColumnsResult {
+    pub version: u64,
+}
+
+#[pymethods]
+impl DropColumnsResult {
+    pub fn __repr__(&self) -> String {
+        format!("DropColumnsResult(version={})", self.version)
+    }
+}
+
+impl From<lancedb::table::DropColumnsResult> for DropColumnsResult {
+    fn from(result: lancedb::table::DropColumnsResult) -> Self {
+        Self {
+            version: result.version,
+        }
+    }
+}
+
 #[pyclass]
 pub struct Table {
    // We keep a copy of the name to use if the inner table is dropped
@@ -133,15 +296,16 @@ impl Table {
        }

        future_into_py(self_.py(), async move {
-            op.execute().await.infer_error()?;
-            Ok(())
+            let result = op.execute().await.infer_error()?;
+            Ok(AddResult::from(result))
        })
    }

    pub fn delete(self_: PyRef<'_, Self>, condition: String) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
-            inner.delete(&condition).await.infer_error()
+            let result = inner.delete(&condition).await.infer_error()?;
+            Ok(DeleteResult::from(result))
        })
    }

@@ -161,8 +325,8 @@ impl Table {
            op = op.column(column_name, value);
        }
        future_into_py(self_.py(), async move {
-            op.execute().await.infer_error()?;
-            Ok(())
+            let result = op.execute().await.infer_error()?;
+            Ok(UpdateResult::from(result))
        })
    }

@@ -177,15 +341,19 @@ impl Table {
        })
    }

-    #[pyo3(signature = (column, index=None, replace=None))]
+    #[pyo3(signature = (column, index=None, replace=None, wait_timeout=None))]
    pub fn create_index<'a>(
        self_: PyRef<'a, Self>,
        column: String,
        index: Option<Bound<'_, PyAny>>,
        replace: Option<bool>,
+        wait_timeout: Option<Bound<'_, PyAny>>,
    ) -> PyResult<Bound<'a, PyAny>> {
        let index = extract_index_params(&index)?;
-        let mut op = self_.inner_ref()?.create_index(&[column], index);
+        let timeout = wait_timeout.map(|t| t.extract::<std::time::Duration>().unwrap());
+        let mut op = self_
+            .inner_ref()?
+            .create_index_with_timeout(&[column], index, timeout);
        if let Some(replace) = replace {
            op = op.replace(replace);
        }
@@ -204,6 +372,26 @@ impl Table {
        })
    }

+    pub fn wait_for_index<'a>(
+        self_: PyRef<'a, Self>,
+        index_names: Vec<String>,
+        timeout: Bound<'_, PyAny>,
+    ) -> PyResult<Bound<'a, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        let timeout = timeout.extract::<std::time::Duration>()?;
+        future_into_py(self_.py(), async move {
+            let index_refs = index_names
+                .iter()
+                .map(String::as_str)
+                .collect::<Vec<&str>>();
+            inner
+                .wait_for_index(&index_refs, timeout)
+                .await
+                .infer_error()?;
+            Ok(())
+        })
+    }
+
    pub fn prewarm_index(self_: PyRef<'_, Self>, index_name: String) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
@@ -256,6 +444,40 @@ impl Table {
        })
    }

+    pub fn stats(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move {
+            let stats = inner.stats().await.infer_error()?;
+            Python::with_gil(|py| {
+                let dict = PyDict::new(py);
+                dict.set_item("total_bytes", stats.total_bytes)?;
+                dict.set_item("num_rows", stats.num_rows)?;
+                dict.set_item("num_indices", stats.num_indices)?;
+
+                let fragment_stats = PyDict::new(py);
+                fragment_stats.set_item("num_fragments", stats.fragment_stats.num_fragments)?;
+                fragment_stats.set_item(
+                    "num_small_fragments",
+                    stats.fragment_stats.num_small_fragments,
+                )?;
+
+                let fragment_lengths = PyDict::new(py);
+                fragment_lengths.set_item("min", stats.fragment_stats.lengths.min)?;
+                fragment_lengths.set_item("max", stats.fragment_stats.lengths.max)?;
+                fragment_lengths.set_item("mean", stats.fragment_stats.lengths.mean)?;
+                fragment_lengths.set_item("p25", stats.fragment_stats.lengths.p25)?;
+                fragment_lengths.set_item("p50", stats.fragment_stats.lengths.p50)?;
+                fragment_lengths.set_item("p75", stats.fragment_stats.lengths.p75)?;
+                fragment_lengths.set_item("p99", stats.fragment_stats.lengths.p99)?;
+
+                fragment_stats.set_item("lengths", fragment_lengths)?;
+                dict.set_item("fragment_stats", fragment_stats)?;
+
+                Ok(Some(dict.unbind()))
+            })
+        })
+    }
+
    pub fn __repr__(&self) -> String {
        match &self.inner {
            None => format!("ClosedTable({})", self.name),
@@ -298,10 +520,16 @@ impl Table {
        })
    }

-    pub fn checkout(self_: PyRef<'_, Self>, version: u64) -> PyResult<Bound<'_, PyAny>> {
+    pub fn checkout(self_: PyRef<'_, Self>, version: LanceVersion) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
-        future_into_py(self_.py(), async move {
-            inner.checkout(version).await.infer_error()
+        let py = self_.py();
+        future_into_py(py, async move {
+            match version {
+                LanceVersion::Version(version_num) => {
+                    inner.checkout(version_num).await.infer_error()
+                }
+                LanceVersion::Tag(tag) => inner.checkout_tag(&tag).await.infer_error(),
+            }
        })
    }

@@ -313,12 +541,19 @@ impl Table {
    }

    #[pyo3(signature = (version=None))]
-    pub fn restore(self_: PyRef<'_, Self>, version: Option<u64>) -> PyResult<Bound<'_, PyAny>> {
+    pub fn restore(
+        self_: PyRef<'_, Self>,
+        version: Option<LanceVersion>,
+    ) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
+        let py = self_.py();

-        future_into_py(self_.py(), async move {
+        future_into_py(py, async move {
            if let Some(version) = version {
-                inner.checkout(version).await.infer_error()?;
+                match version {
+                    LanceVersion::Version(num) => inner.checkout(num).await.infer_error()?,
+                    LanceVersion::Tag(tag) => inner.checkout_tag(&tag).await.infer_error()?,
+                }
            }
            inner.restore().await.infer_error()
        })
@@ -328,6 +563,11 @@ impl Table {
        Query::new(self.inner_ref().unwrap().query())
    }

+    #[getter]
+    pub fn tags(&self) -> PyResult<Tags> {
+        Ok(Tags::new(self.inner_ref()?.clone()))
+    }
+
    /// Optimize the on-disk data by compacting and pruning old data, for better performance.
    #[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None, retrain=None))]
    pub fn optimize(
@@ -409,10 +649,13 @@ impl Table {
            builder
                .when_not_matched_by_source_delete(parameters.when_not_matched_by_source_condition);
        }
+        if let Some(timeout) = parameters.timeout {
+            builder.timeout(timeout);
+        }

        future_into_py(self_.py(), async move {
-            builder.execute(Box::new(batches)).await.infer_error()?;
-            Ok(())
+            let res = builder.execute(Box::new(batches)).await.infer_error()?;
+            Ok(MergeResult::from(res))
        })
    }

@@ -448,8 +691,8 @@ impl Table {

        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
-            inner.add_columns(definitions, None).await.infer_error()?;
-            Ok(())
+            let result = inner.add_columns(definitions, None).await.infer_error()?;
+            Ok(AddColumnsResult::from(result))
        })
    }

@@ -462,8 +705,8 @@ impl Table {

        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
-            inner.add_columns(transform, None).await.infer_error()?;
-            Ok(())
+            let result = inner.add_columns(transform, None).await.infer_error()?;
+            Ok(AddColumnsResult::from(result))
        })
    }

@@ -506,8 +749,8 @@ impl Table {

        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
-            inner.alter_columns(&alterations).await.infer_error()?;
-            Ok(())
+            let result = inner.alter_columns(&alterations).await.infer_error()?;
+            Ok(AlterColumnsResult::from(result))
        })
    }

@@ -515,8 +758,8 @@ impl Table {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
            let column_refs = columns.iter().map(String::as_str).collect::<Vec<&str>>();
-            inner.drop_columns(&column_refs).await.infer_error()?;
-            Ok(())
+            let result = inner.drop_columns(&column_refs).await.infer_error()?;
+            Ok(DropColumnsResult::from(result))
        })
    }

@@ -552,6 +795,12 @@ impl Table {
    }
 }

+#[derive(FromPyObject)]
+pub enum LanceVersion {
+    Version(u64),
+    Tag(String),
+}
+
 #[derive(FromPyObject)]
 #[pyo3(from_item_all)]
 pub struct MergeInsertParams {
@@ -561,4 +810,74 @@ pub struct MergeInsertParams {
    when_not_matched_insert_all: bool,
    when_not_matched_by_source_delete: bool,
    when_not_matched_by_source_condition: Option<String>,
+    timeout: Option<std::time::Duration>,
+}
+
+#[pyclass]
+pub struct Tags {
+    inner: LanceDbTable,
+}
+
+impl Tags {
+    pub fn new(table: LanceDbTable) -> Self {
+        Self { inner: table }
+    }
+}
+
+#[pymethods]
+impl Tags {
+    pub fn list(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner.clone();
+        future_into_py(self_.py(), async move {
+            let tags = inner.tags().await.infer_error()?;
+            let res = tags.list().await.infer_error()?;
+
+            Python::with_gil(|py| {
+                let py_dict = PyDict::new(py);
+                for (key, contents) in res {
+                    let value_dict = PyDict::new(py);
+                    value_dict.set_item("version", contents.version)?;
+                    value_dict.set_item("manifest_size", contents.manifest_size)?;
+                    py_dict.set_item(key, value_dict)?;
+                }
+                Ok(py_dict.unbind())
+            })
+        })
+    }
+
+    pub fn get_version(self_: PyRef<'_, Self>, tag: String) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner.clone();
+        future_into_py(self_.py(), async move {
+            let tags = inner.tags().await.infer_error()?;
+            let res = tags.get_version(tag.as_str()).await.infer_error()?;
+            Ok(res)
+        })
+    }
+
+    pub fn create(self_: PyRef<Self>, tag: String, version: u64) -> PyResult<Bound<PyAny>> {
+        let inner = self_.inner.clone();
+        future_into_py(self_.py(), async move {
+            let mut tags = inner.tags().await.infer_error()?;
+            tags.create(tag.as_str(), version).await.infer_error()?;
+            Ok(())
+        })
+    }
+
+    pub fn delete(self_: PyRef<Self>, tag: String) -> PyResult<Bound<PyAny>> {
+        let inner = self_.inner.clone();
+        future_into_py(self_.py(), async move {
+            let mut tags = inner.tags().await.infer_error()?;
+            tags.delete(tag.as_str()).await.infer_error()?;
+            Ok(())
+        })
+    }
+
+    pub fn update(self_: PyRef<Self>, tag: String, version: u64) -> PyResult<Bound<PyAny>> {
+        let inner = self_.inner.clone();
+        future_into_py(self_.py(), async move {
+            let mut tags = inner.tags().await.infer_error()?;
+            tags.update(tag.as_str(), version).await.infer_error()?;
+            Ok(())
+        })
+    }
 }
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "1.83.0"
+channel = "1.86.0"
--- a/rust/ffi/node/Cargo.toml
+++ b/rust/ffi/node/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-node"
-version = "0.19.0-beta.7"
+version = "0.19.1-beta.5"
 description = "Serverless, low-latency vector database for AI applications"
 license.workspace = true
 edition.workspace = true
--- a/rust/lancedb/Cargo.toml
+++ b/rust/lancedb/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb"
-version = "0.19.0-beta.7"
+version = "0.19.1-beta.5"
 edition.workspace = true
 description = "LanceDB: A serverless, low-latency vector database for AI applications"
 license.workspace = true
--- a/rust/lancedb/src/catalog/listing.rs
+++ b/rust/lancedb/src/catalog/listing.rs
@@ -81,7 +81,7 @@ impl ListingCatalogOptionsBuilder {
 /// [`crate::database::listing::ListingDatabase`]
 #[derive(Debug)]
 pub struct ListingCatalog {
-    object_store: ObjectStore,
+    object_store: Arc<ObjectStore>,

    uri: String,

@@ -105,7 +105,7 @@ impl ListingCatalog {
    }

    async fn open_path(path: &str) -> Result<Self> {
-        let (object_store, base_path) = ObjectStore::from_path(path).unwrap();
+        let (object_store, base_path) = ObjectStore::from_uri(path).await.unwrap();
        if object_store.is_local() {
            Self::try_create_dir(path).context(CreateDirSnafu { path })?;
        }
--- a/rust/lancedb/src/database/listing.rs
+++ b/rust/lancedb/src/database/listing.rs
@@ -201,7 +201,7 @@ impl ListingDatabaseOptionsBuilder {
 /// We will have two tables named `table1` and `table2`.
 #[derive(Debug)]
 pub struct ListingDatabase {
-    object_store: ObjectStore,
+    object_store: Arc<ObjectStore>,
    query_string: Option<String>,

    pub(crate) uri: String,
--- a/rust/lancedb/src/error.rs
+++ b/rust/lancedb/src/error.rs
@@ -35,6 +35,8 @@ pub enum Error {
    Schema { message: String },
    #[snafu(display("Runtime error: {message}"))]
    Runtime { message: String },
+    #[snafu(display("Timeout error: {message}"))]
+    Timeout { message: String },

    // 3rd party / external errors
    #[snafu(display("object_store error: {source}"))]
--- a/rust/lancedb/src/index.rs
+++ b/rust/lancedb/src/index.rs
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use std::sync::Arc;
-
 use scalar::FtsIndexBuilder;
 use serde::Deserialize;
 use serde_with::skip_serializing_none;
+use std::sync::Arc;
+use std::time::Duration;
 use vector::IvfFlatIndexBuilder;

 use crate::{table::BaseTable, DistanceType, Error, Result};
@@ -17,6 +17,7 @@ use self::{

 pub mod scalar;
 pub mod vector;
+pub mod waiter;

 /// Supported index types.
 #[derive(Debug, Clone)]
@@ -69,6 +70,7 @@ pub struct IndexBuilder {
    pub(crate) index: Index,
    pub(crate) columns: Vec<String>,
    pub(crate) replace: bool,
+    pub(crate) wait_timeout: Option<Duration>,
 }

 impl IndexBuilder {
@@ -78,6 +80,7 @@ impl IndexBuilder {
            index,
            columns,
            replace: true,
+            wait_timeout: None,
        }
    }

@@ -91,6 +94,15 @@ impl IndexBuilder {
        self
    }

+    /// Duration of time to wait for asynchronous indexing to complete. If not set,
+    /// `create_index()` will not wait.
+    ///
+    /// This is not supported for `NativeTable` since indexing is synchronous.
+    pub fn wait_timeout(mut self, d: Duration) -> Self {
+        self.wait_timeout = Some(d);
+        self
+    }
+
    pub async fn execute(self) -> Result<()> {
        self.parent.clone().create_index(self).await
    }
--- a/rust/lancedb/src/index/waiter.rs
+++ b/rust/lancedb/src/index/waiter.rs
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+use crate::error::Result;
+use crate::table::BaseTable;
+use crate::Error;
+use log::debug;
+use std::time::{Duration, Instant};
+use tokio::time::sleep;
+
+const DEFAULT_SLEEP_MS: u64 = 1000;
+const MAX_WAIT: Duration = Duration::from_secs(2 * 60 * 60);
+
+/// Poll the table using list_indices() and index_stats() until all of the indices have 0 un-indexed rows.
+/// Will return Error::Timeout if the columns are not fully indexed within the timeout.
+pub async fn wait_for_index(
+    table: &dyn BaseTable,
+    index_names: &[&str],
+    timeout: Duration,
+) -> Result<()> {
+    if timeout > MAX_WAIT {
+        return Err(Error::InvalidInput {
+            message: format!("timeout must be less than {:?}", MAX_WAIT),
+        });
+    }
+    let start = Instant::now();
+    let mut remaining = index_names.to_vec();
+
+    // poll via list_indices() and index_stats() until all indices are created and fully indexed
+    while start.elapsed() < timeout {
+        let mut completed = vec![];
+        let indices = table.list_indices().await?;
+
+        for &idx in &remaining {
+            if !indices.iter().any(|i| i.name == *idx) {
+                debug!("still waiting for new index '{}'", idx);
+                continue;
+            }
+
+            let stats = table.index_stats(idx.as_ref()).await?;
+            match stats {
+                None => {
+                    debug!("still waiting for new index '{}'", idx);
+                    continue;
+                }
+                Some(s) => {
+                    if s.num_unindexed_rows == 0 {
+                        // note: this may never stabilize under constant writes.
+                        // we should later replace this with a status/job model
+                        completed.push(idx);
+                        debug!(
+                            "fully indexed '{}'. indexed rows: {}",
+                            idx, s.num_indexed_rows
+                        );
+                    } else {
+                        debug!(
+                            "still waiting for index '{}'. unindexed rows: {}",
+                            idx, s.num_unindexed_rows
+                        );
+                    }
+                }
+            }
+        }
+        remaining.retain(|idx| !completed.contains(idx));
+        if remaining.is_empty() {
+            return Ok(());
+        }
+        sleep(Duration::from_millis(DEFAULT_SLEEP_MS)).await;
+    }
+
+    // debug log index diagnostics
+    for &r in &remaining {
+        let stats = table.index_stats(r.as_ref()).await?;
+        match stats {
+            Some(s) => debug!(
+                "index '{}' not fully indexed after {:?}. stats: {:?}",
+                r, timeout, s
+            ),
+            None => debug!("index '{}' not found after {:?}", r, timeout),
+        }
+    }
+
+    Err(Error::Timeout {
+        message: format!(
+            "timed out waiting for indices: {:?} after {:?}",
+            remaining, timeout
+        ),
+    })
+}
--- a/rust/lancedb/src/remote.rs
+++ b/rust/lancedb/src/remote.rs
@@ -8,6 +8,7 @@

 pub(crate) mod client;
 pub(crate) mod db;
+mod retry;
 pub(crate) mod table;
 pub(crate) mod util;

--- a/rust/lancedb/src/remote/client.rs
+++ b/rust/lancedb/src/remote/client.rs
@@ -1,17 +1,17 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use std::{collections::HashMap, future::Future, str::FromStr, time::Duration};
-
 use http::HeaderName;
 use log::debug;
 use reqwest::{
    header::{HeaderMap, HeaderValue},
-    Request, RequestBuilder, Response,
+    Body, Request, RequestBuilder, Response,
 };
+use std::{collections::HashMap, future::Future, str::FromStr, time::Duration};

 use crate::error::{Error, Result};
 use crate::remote::db::RemoteOptions;
+use crate::remote::retry::{ResolvedRetryConfig, RetryCounter};

 const REQUEST_ID_HEADER: HeaderName = HeaderName::from_static("x-request-id");

@@ -118,41 +118,14 @@ pub struct RetryConfig {
    /// You can also set the `LANCE_CLIENT_RETRY_STATUSES` environment variable
    /// to set this value. Use a comma-separated list of integer values.
    ///
-    /// The default is 429, 500, 502, 503.
+    /// Note that write operations will never be retried on 5xx errors as this may
+    /// result in duplicated writes.
+    ///
+    /// The default is 409, 429, 500, 502, 503, 504.
    pub statuses: Option<Vec<u16>>,
    // TODO: should we allow customizing methods?
 }

-#[derive(Debug, Clone)]
-struct ResolvedRetryConfig {
-    retries: u8,
-    connect_retries: u8,
-    read_retries: u8,
-    backoff_factor: f32,
-    backoff_jitter: f32,
-    statuses: Vec<reqwest::StatusCode>,
-}
-
-impl TryFrom<RetryConfig> for ResolvedRetryConfig {
-    type Error = Error;
-
-    fn try_from(retry_config: RetryConfig) -> Result<Self> {
-        Ok(Self {
-            retries: retry_config.retries.unwrap_or(3),
-            connect_retries: retry_config.connect_retries.unwrap_or(3),
-            read_retries: retry_config.read_retries.unwrap_or(3),
-            backoff_factor: retry_config.backoff_factor.unwrap_or(0.25),
-            backoff_jitter: retry_config.backoff_jitter.unwrap_or(0.25),
-            statuses: retry_config
-                .statuses
-                .unwrap_or_else(|| vec![429, 500, 502, 503])
-                .into_iter()
-                .map(|status| reqwest::StatusCode::from_u16(status).unwrap())
-                .collect(),
-        })
-    }
-}
-
 // We use the `HttpSend` trait to abstract over the `reqwest::Client` so that
 // we can mock responses in tests. Based on the patterns from this blog post:
 // https://write.as/balrogboogie/testing-reqwest-based-clients
@@ -160,8 +133,8 @@ impl TryFrom<RetryConfig> for ResolvedRetryConfig {
 pub struct RestfulLanceDbClient<S: HttpSend = Sender> {
    client: reqwest::Client,
    host: String,
-    retry_config: ResolvedRetryConfig,
-    sender: S,
+    pub(crate) retry_config: ResolvedRetryConfig,
+    pub(crate) sender: S,
 }

 pub trait HttpSend: Clone + Send + Sync + std::fmt::Debug + 'static {
@@ -375,74 +348,69 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
        self.client.post(full_uri)
    }

-    pub async fn send(&self, req: RequestBuilder, with_retry: bool) -> Result<(String, Response)> {
+    pub async fn send(&self, req: RequestBuilder) -> Result<(String, Response)> {
        let (client, request) = req.build_split();
        let mut request = request.unwrap();
+        let request_id = self.extract_request_id(&mut request);
+        self.log_request(&request, &request_id);

-        // Set a request id.
-        // TODO: allow the user to supply this, through middleware?
-        let request_id = if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) {
-            request_id.to_str().unwrap().to_string()
-        } else {
-            let request_id = uuid::Uuid::new_v4().to_string();
-            let header = HeaderValue::from_str(&request_id).unwrap();
-            request.headers_mut().insert(REQUEST_ID_HEADER, header);
-            request_id
-        };
-
-        if log::log_enabled!(log::Level::Debug) {
-            let content_type = request
-                .headers()
-                .get("content-type")
-                .map(|v| v.to_str().unwrap());
-            if content_type == Some("application/json") {
-                let body = request.body().as_ref().unwrap().as_bytes().unwrap();
-                let body = String::from_utf8_lossy(body);
-                debug!(
-                    "Sending request_id={}: {:?} with body {}",
-                    request_id, request, body
-                );
-            } else {
-                debug!("Sending request_id={}: {:?}", request_id, request);
-            }
-        }
-
-        if with_retry {
-            self.send_with_retry_impl(client, request, request_id).await
-        } else {
-            let response = self
-                .sender
-                .send(&client, request)
-                .await
-                .err_to_http(request_id.clone())?;
-            debug!(
-                "Received response for request_id={}: {:?}",
-                request_id, &response
-            );
-            Ok((request_id, response))
-        }
+        let response = self
+            .sender
+            .send(&client, request)
+            .await
+            .err_to_http(request_id.clone())?;
+        debug!(
+            "Received response for request_id={}: {:?}",
+            request_id, &response
+        );
+        Ok((request_id, response))
    }

-    async fn send_with_retry_impl(
+    /// Send the request using retries configured in the RetryConfig.
+    /// If retry_5xx is false, 5xx requests will not be retried regardless of the statuses configured
+    /// in the RetryConfig.
+    /// Since this requires arrow serialization, this is implemented here instead of in RestfulLanceDbClient
+    pub async fn send_with_retry(
        &self,
-        client: reqwest::Client,
-        req: Request,
-        request_id: String,
+        req_builder: RequestBuilder,
+        mut make_body: Option<Box<dyn FnMut() -> Result<Body> + Send + 'static>>,
+        retry_5xx: bool,
    ) -> Result<(String, Response)> {
-        let mut retry_counter = RetryCounter::new(&self.retry_config, request_id);
+        let retry_config = &self.retry_config;
+        let non_5xx_statuses = retry_config
+            .statuses
+            .iter()
+            .filter(|s| !s.is_server_error())
+            .cloned()
+            .collect::<Vec<_>>();
+
+        // clone and build the request to extract the request id
+        let tmp_req = req_builder.try_clone().ok_or_else(|| Error::Runtime {
+            message: "Attempted to retry a request that cannot be cloned".to_string(),
+        })?;
+        let (_, r) = tmp_req.build_split();
+        let mut r = r.unwrap();
+        let request_id = self.extract_request_id(&mut r);
+        let mut retry_counter = RetryCounter::new(retry_config, request_id.clone());

        loop {
-            // This only works if the request body is not a stream. If it is
-            // a stream, we can't use the retry path. We would need to implement
-            // an outer retry.
-            let request = req.try_clone().ok_or_else(|| Error::Runtime {
+            let mut req_builder = req_builder.try_clone().ok_or_else(|| Error::Runtime {
                message: "Attempted to retry a request that cannot be cloned".to_string(),
            })?;
-            let response = self
-                .sender
-                .send(&client, request)
-                .await
-                .map(|r| (r.status(), r));
+
+            // set the streaming body on the request builder after clone
+            if let Some(body_gen) = make_body.as_mut() {
+                let body = body_gen()?;
+                req_builder = req_builder.body(body);
+            }
+
+            let (c, request) = req_builder.build_split();
+            let mut request = request.unwrap();
+            self.set_request_id(&mut request, &request_id.clone());
+            self.log_request(&request, &request_id);
+
+            let response = self.sender.send(&c, request).await.map(|r| (r.status(), r));
+
            match response {
                Ok((status, response)) if status.is_success() => {
                    debug!(
@@ -451,7 +419,10 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
                    );
                    return Ok((retry_counter.request_id, response));
                }
-                Ok((status, response)) if self.retry_config.statuses.contains(&status) => {
+                Ok((status, response))
+                    if (retry_5xx && retry_config.statuses.contains(&status))
+                        || non_5xx_statuses.contains(&status) =>
+                {
                    let source = self
                        .check_response(&retry_counter.request_id, response)
                        .await
@@ -480,6 +451,47 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
        }
    }

+    fn log_request(&self, request: &Request, request_id: &String) {
+        if log::log_enabled!(log::Level::Debug) {
+            let content_type = request
+                .headers()
+                .get("content-type")
+                .map(|v| v.to_str().unwrap());
+            if content_type == Some("application/json") {
+                let body = request.body().as_ref().unwrap().as_bytes().unwrap();
+                let body = String::from_utf8_lossy(body);
+                debug!(
+                    "Sending request_id={}: {:?} with body {}",
+                    request_id, request, body
+                );
+            } else {
+                debug!("Sending request_id={}: {:?}", request_id, request);
+            }
+        }
+    }
+
+    /// Extract the request ID from the request headers.
+    /// If the request ID header is not set, this will generate a new one and set
+    /// it on the request headers
+    pub fn extract_request_id(&self, request: &mut Request) -> String {
+        // Set a request id.
+        // TODO: allow the user to supply this, through middleware?
+        let request_id = if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) {
+            request_id.to_str().unwrap().to_string()
+        } else {
+            let request_id = uuid::Uuid::new_v4().to_string();
+            self.set_request_id(request, &request_id);
+            request_id
+        };
+        request_id
+    }
+
+    /// Set the request ID header
+    pub fn set_request_id(&self, request: &mut Request, request_id: &str) {
+        let header = HeaderValue::from_str(request_id).unwrap();
+        request.headers_mut().insert(REQUEST_ID_HEADER, header);
+    }
+
    pub async fn check_response(&self, request_id: &str, response: Response) -> Result<Response> {
        // Try to get the response text, but if that fails, just return the status code
        let status = response.status();
@@ -501,91 +513,6 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
    }
 }

-struct RetryCounter<'a> {
-    request_failures: u8,
-    connect_failures: u8,
-    read_failures: u8,
-    config: &'a ResolvedRetryConfig,
-    request_id: String,
-}
-
-impl<'a> RetryCounter<'a> {
-    fn new(config: &'a ResolvedRetryConfig, request_id: String) -> Self {
-        Self {
-            request_failures: 0,
-            connect_failures: 0,
-            read_failures: 0,
-            config,
-            request_id,
-        }
-    }
-
-    fn check_out_of_retries(
-        &self,
-        source: Box<dyn std::error::Error + Send + Sync>,
-        status_code: Option<reqwest::StatusCode>,
-    ) -> Result<()> {
-        if self.request_failures >= self.config.retries
-            || self.connect_failures >= self.config.connect_retries
-            || self.read_failures >= self.config.read_retries
-        {
-            Err(Error::Retry {
-                request_id: self.request_id.clone(),
-                request_failures: self.request_failures,
-                max_request_failures: self.config.retries,
-                connect_failures: self.connect_failures,
-                max_connect_failures: self.config.connect_retries,
-                read_failures: self.read_failures,
-                max_read_failures: self.config.read_retries,
-                source,
-                status_code,
-            })
-        } else {
-            Ok(())
-        }
-    }
-
-    fn increment_request_failures(&mut self, source: crate::Error) -> Result<()> {
-        self.request_failures += 1;
-        let status_code = if let crate::Error::Http { status_code, .. } = &source {
-            *status_code
-        } else {
-            None
-        };
-        self.check_out_of_retries(Box::new(source), status_code)
-    }
-
-    fn increment_connect_failures(&mut self, source: reqwest::Error) -> Result<()> {
-        self.connect_failures += 1;
-        let status_code = source.status();
-        self.check_out_of_retries(Box::new(source), status_code)
-    }
-
-    fn increment_read_failures(&mut self, source: reqwest::Error) -> Result<()> {
-        self.read_failures += 1;
-        let status_code = source.status();
-        self.check_out_of_retries(Box::new(source), status_code)
-    }
-
-    fn next_sleep_time(&self) -> Duration {
-        let backoff = self.config.backoff_factor * (2.0f32.powi(self.request_failures as i32));
-        let jitter = rand::random::<f32>() * self.config.backoff_jitter;
-        let sleep_time = Duration::from_secs_f32(backoff + jitter);
-        debug!(
-            "Retrying request {:?} ({}/{} connect, {}/{} read, {}/{} read) in {:?}",
-            self.request_id,
-            self.connect_failures,
-            self.config.connect_retries,
-            self.request_failures,
-            self.config.retries,
-            self.read_failures,
-            self.config.read_retries,
-            sleep_time
-        );
-        sleep_time
-    }
-}
-
 pub trait RequestResultExt {
    type Output;
    fn err_to_http(self, request_id: String) -> Result<Self::Output>;
--- a/rust/lancedb/src/remote/db.rs
+++ b/rust/lancedb/src/remote/db.rs
@@ -255,7 +255,7 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
        if let Some(start_after) = request.start_after {
            req = req.query(&[("page_token", start_after)]);
        }
-        let (request_id, rsp) = self.client.send(req, true).await?;
+        let (request_id, rsp) = self.client.send_with_retry(req, None, true).await?;
        let rsp = self.client.check_response(&request_id, rsp).await?;
        let version = parse_server_version(&request_id, &rsp)?;
        let tables = rsp
@@ -302,7 +302,7 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
            .body(data_buffer)
            .header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);

-        let (request_id, rsp) = self.client.send(req, false).await?;
+        let (request_id, rsp) = self.client.send(req).await?;

        if rsp.status() == StatusCode::BAD_REQUEST {
            let body = rsp.text().await.err_to_http(request_id.clone())?;
@@ -362,7 +362,7 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
            let req = self
                .client
                .post(&format!("/v1/table/{}/describe/", request.name));
-            let (request_id, rsp) = self.client.send(req, true).await?;
+            let (request_id, rsp) = self.client.send_with_retry(req, None, true).await?;
            if rsp.status() == StatusCode::NOT_FOUND {
                return Err(crate::Error::TableNotFound { name: request.name });
            }
@@ -383,7 +383,7 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
            .client
            .post(&format!("/v1/table/{}/rename/", current_name));
        let req = req.json(&serde_json::json!({ "new_table_name": new_name }));
-        let (request_id, resp) = self.client.send(req, false).await?;
+        let (request_id, resp) = self.client.send(req).await?;
        self.client.check_response(&request_id, resp).await?;
        let table = self.table_cache.remove(current_name).await;
        if let Some(table) = table {
@@ -394,7 +394,7 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {

    async fn drop_table(&self, name: &str) -> Result<()> {
        let req = self.client.post(&format!("/v1/table/{}/drop/", name));
-        let (request_id, resp) = self.client.send(req, true).await?;
+        let (request_id, resp) = self.client.send(req).await?;
        self.client.check_response(&request_id, resp).await?;
        self.table_cache.remove(name).await;
        Ok(())
--- a/rust/lancedb/src/remote/retry.rs
+++ b/rust/lancedb/src/remote/retry.rs
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: Copyright The LanceDB Authors
+
+use crate::remote::RetryConfig;
+use crate::Error;
+use log::debug;
+use std::time::Duration;
+
+pub struct RetryCounter<'a> {
+    pub request_failures: u8,
+    pub connect_failures: u8,
+    pub read_failures: u8,
+    pub config: &'a ResolvedRetryConfig,
+    pub request_id: String,
+}
+
+impl<'a> RetryCounter<'a> {
+    pub(crate) fn new(config: &'a ResolvedRetryConfig, request_id: String) -> Self {
+        Self {
+            request_failures: 0,
+            connect_failures: 0,
+            read_failures: 0,
+            config,
+            request_id,
+        }
+    }
+
+    fn check_out_of_retries(
+        &self,
+        source: Box<dyn std::error::Error + Send + Sync>,
+        status_code: Option<reqwest::StatusCode>,
+    ) -> crate::Result<()> {
+        if self.request_failures >= self.config.retries
+            || self.connect_failures >= self.config.connect_retries
+            || self.read_failures >= self.config.read_retries
+        {
+            Err(Error::Retry {
+                request_id: self.request_id.clone(),
+                request_failures: self.request_failures,
+                max_request_failures: self.config.retries,
+                connect_failures: self.connect_failures,
+                max_connect_failures: self.config.connect_retries,
+                read_failures: self.read_failures,
+                max_read_failures: self.config.read_retries,
+                source,
+                status_code,
+            })
+        } else {
+            Ok(())
+        }
+    }
+
+    pub fn increment_request_failures(&mut self, source: crate::Error) -> crate::Result<()> {
+        self.request_failures += 1;
+        let status_code = if let crate::Error::Http { status_code, .. } = &source {
+            *status_code
+        } else {
+            None
+        };
+        self.check_out_of_retries(Box::new(source), status_code)
+    }
+
+    pub fn increment_connect_failures(&mut self, source: reqwest::Error) -> crate::Result<()> {
+        self.connect_failures += 1;
+        let status_code = source.status();
+        self.check_out_of_retries(Box::new(source), status_code)
+    }
+
+    pub fn increment_read_failures(&mut self, source: reqwest::Error) -> crate::Result<()> {
+        self.read_failures += 1;
+        let status_code = source.status();
+        self.check_out_of_retries(Box::new(source), status_code)
+    }
+
+    pub fn next_sleep_time(&self) -> Duration {
+        let backoff = self.config.backoff_factor * (2.0f32.powi(self.request_failures as i32));
+        let jitter = rand::random::<f32>() * self.config.backoff_jitter;
+        let sleep_time = Duration::from_secs_f32(backoff + jitter);
+        debug!(
+            "Retrying request {:?} ({}/{} connect, {}/{} read, {}/{} read) in {:?}",
+            self.request_id,
+            self.connect_failures,
+            self.config.connect_retries,
+            self.request_failures,
+            self.config.retries,
+            self.read_failures,
+            self.config.read_retries,
+            sleep_time
+        );
+        sleep_time
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ResolvedRetryConfig {
+    pub retries: u8,
+    pub connect_retries: u8,
+    pub read_retries: u8,
+    pub backoff_factor: f32,
+    pub backoff_jitter: f32,
+    pub statuses: Vec<reqwest::StatusCode>,
+}
+
+impl TryFrom<RetryConfig> for ResolvedRetryConfig {
+    type Error = Error;
+
+    fn try_from(retry_config: RetryConfig) -> crate::Result<Self> {
+        Ok(Self {
+            retries: retry_config.retries.unwrap_or(3),
+            connect_retries: retry_config.connect_retries.unwrap_or(3),
+            read_retries: retry_config.read_retries.unwrap_or(3),
+            backoff_factor: retry_config.backoff_factor.unwrap_or(0.25),
+            backoff_jitter: retry_config.backoff_jitter.unwrap_or(0.25),
+            statuses: retry_config
+                .statuses
+                .unwrap_or_else(|| vec![409, 429, 500, 502, 503, 504])
+                .into_iter()
+                .map(|status| reqwest::StatusCode::from_u16(status).unwrap())
+                .collect(),
+        })
+    }
+}
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -3,10 +3,6 @@

 //! LanceDB Table APIs

-use std::collections::HashMap;
-use std::path::Path;
-use std::sync::Arc;
-
 use arrow::array::{AsArray, FixedSizeListBuilder, Float32Builder};
 use arrow::datatypes::{Float32Type, UInt8Type};
 use arrow_array::{RecordBatchIterator, RecordBatchReader};
@@ -18,7 +14,7 @@ use datafusion_physical_plan::projection::ProjectionExec;
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::union::UnionExec;
 use datafusion_physical_plan::ExecutionPlan;
-use futures::{StreamExt, TryStreamExt};
+use futures::{FutureExt, StreamExt, TryFutureExt, TryStreamExt};
 use lance::dataset::builder::DatasetBuilder;
 use lance::dataset::cleanup::RemovalStats;
 use lance::dataset::optimize::{compact_files, CompactionMetrics, IndexRemapperOptions};
@@ -45,6 +41,10 @@ use lance_table::format::Manifest;
 use lance_table::io::commit::ManifestNamingScheme;
 use log::info;
 use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::format;
+use std::path::Path;
+use std::sync::Arc;

 use crate::arrow::IntoArrow;
 use crate::connection::NoData;
@@ -78,10 +78,15 @@ pub mod datafusion;
 pub(crate) mod dataset;
 pub mod merge;

+use crate::index::waiter::wait_for_index;
 pub use chrono::Duration;
+use futures::future::{join_all, Either};
 pub use lance::dataset::optimize::CompactionOptions;
+pub use lance::dataset::refs::{TagContents, Tags as LanceTags};
 pub use lance::dataset::scanner::DatasetRecordBatchStream;
+use lance::dataset::statistics::DatasetStatisticsExt;
 pub use lance_index::optimize::OptimizeOptions;
+use serde_with::skip_serializing_none;

 /// Defines the type of column
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -306,7 +311,7 @@ impl<T: IntoArrow> AddDataBuilder<T> {
        self
    }

-    pub async fn execute(self) -> Result<()> {
+    pub async fn execute(self) -> Result<AddResult> {
        let parent = self.parent.clone();
        let data = self.data.into_arrow()?;
        let without_data = AddDataBuilder::<NoData> {
@@ -374,8 +379,8 @@ impl UpdateBuilder {
    }

    /// Executes the update operation.
-    /// Returns the number of rows that were updated.
-    pub async fn execute(self) -> Result<u64> {
+    /// Returns the update result
+    pub async fn execute(self) -> Result<UpdateResult> {
        if self.columns.is_empty() {
            Err(Error::InvalidInput {
                message: "at least one column must be specified in an update operation".to_string(),
@@ -400,6 +405,100 @@ pub enum AnyQuery {
    VectorQuery(VectorQueryRequest),
 }

+#[async_trait]
+pub trait Tags: Send + Sync {
+    /// List the tags of the table.
+    async fn list(&self) -> Result<HashMap<String, TagContents>>;
+
+    /// Get the version of the table referenced by a tag.
+    async fn get_version(&self, tag: &str) -> Result<u64>;
+
+    /// Create a new tag for the given version of the table.
+    async fn create(&mut self, tag: &str, version: u64) -> Result<()>;
+
+    /// Delete a tag from the table.
+    async fn delete(&mut self, tag: &str) -> Result<()>;
+
+    /// Update an existing tag to point to a new version of the table.
+    async fn update(&mut self, tag: &str, version: u64) -> Result<()>;
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub struct UpdateResult {
+    #[serde(default)]
+    pub rows_updated: u64,
+    // The commit version associated with the operation.
+    // A version of `0` indicates compatibility with legacy servers that do not return
+    /// a commit version.
+    #[serde(default)]
+    pub version: u64,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub struct AddResult {
+    // The commit version associated with the operation.
+    // A version of `0` indicates compatibility with legacy servers that do not return
+    /// a commit version.
+    #[serde(default)]
+    pub version: u64,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub struct DeleteResult {
+    // The commit version associated with the operation.
+    // A version of `0` indicates compatibility with legacy servers that do not return
+    /// a commit version.
+    #[serde(default)]
+    pub version: u64,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub struct MergeResult {
+    // The commit version associated with the operation.
+    // A version of `0` indicates compatibility with legacy servers that do not return
+    /// a commit version.
+    #[serde(default)]
+    pub version: u64,
+    /// Number of inserted rows (for user statistics)
+    #[serde(default)]
+    pub num_inserted_rows: u64,
+    /// Number of updated rows (for user statistics)
+    #[serde(default)]
+    pub num_updated_rows: u64,
+    /// Number of deleted rows (for user statistics)
+    /// Note: This is different from internal references to 'deleted_rows', since we technically "delete" updated rows during processing.
+    /// However those rows are not shared with the user.
+    #[serde(default)]
+    pub num_deleted_rows: u64,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub struct AddColumnsResult {
+    // The commit version associated with the operation.
+    // A version of `0` indicates compatibility with legacy servers that do not return
+    /// a commit version.
+    #[serde(default)]
+    pub version: u64,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub struct AlterColumnsResult {
+    // The commit version associated with the operation.
+    // A version of `0` indicates compatibility with legacy servers that do not return
+    /// a commit version.
+    #[serde(default)]
+    pub version: u64,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+pub struct DropColumnsResult {
+    // The commit version associated with the operation.
+    // A version of `0` indicates compatibility with legacy servers that do not return
+    /// a commit version.
+    #[serde(default)]
+    pub version: u64,
+}
+
 /// A trait for anything "table-like".  This is used for both native tables (which target
 /// Lance datasets) and remote tables (which target LanceDB cloud)
 ///
@@ -444,11 +543,11 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
        &self,
        add: AddDataBuilder<NoData>,
        data: Box<dyn arrow_array::RecordBatchReader + Send>,
-    ) -> Result<()>;
+    ) -> Result<AddResult>;
    /// Delete rows from the table.
-    async fn delete(&self, predicate: &str) -> Result<()>;
+    async fn delete(&self, predicate: &str) -> Result<DeleteResult>;
    /// Update rows in the table.
-    async fn update(&self, update: UpdateBuilder) -> Result<u64>;
+    async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult>;
    /// Create an index on the provided column(s).
    async fn create_index(&self, index: IndexBuilder) -> Result<()>;
    /// List the indices on the table.
@@ -464,7 +563,9 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
        &self,
        params: MergeInsertBuilder,
        new_data: Box<dyn RecordBatchReader + Send>,
-    ) -> Result<()>;
+    ) -> Result<MergeResult>;
+    /// Gets the table tag manager.
+    async fn tags(&self) -> Result<Box<dyn Tags + '_>>;
    /// Optimize the dataset.
    async fn optimize(&self, action: OptimizeAction) -> Result<OptimizeStats>;
    /// Add columns to the table.
@@ -472,15 +573,18 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
        &self,
        transforms: NewColumnTransform,
        read_columns: Option<Vec<String>>,
-    ) -> Result<()>;
+    ) -> Result<AddColumnsResult>;
    /// Alter columns in the table.
-    async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<()>;
+    async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<AlterColumnsResult>;
    /// Drop columns from the table.
-    async fn drop_columns(&self, columns: &[&str]) -> Result<()>;
+    async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult>;
    /// Get the version of the table.
    async fn version(&self) -> Result<u64>;
    /// Checkout a specific version of the table.
    async fn checkout(&self, version: u64) -> Result<()>;
+    /// Checkout a table version referenced by a tag.
+    /// Tags provide a human-readable way to reference specific versions of the table.
+    async fn checkout_tag(&self, tag: &str) -> Result<()>;
    /// Checkout the latest version of the table.
    async fn checkout_latest(&self) -> Result<()>;
    /// Restore the table to the currently checked out version.
@@ -491,6 +595,15 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
    async fn table_definition(&self) -> Result<TableDefinition>;
    /// Get the table URI
    fn dataset_uri(&self) -> &str;
+    /// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
+    /// are not fully indexed within the timeout.
+    async fn wait_for_index(
+        &self,
+        index_names: &[&str],
+        timeout: std::time::Duration,
+    ) -> Result<()>;
+    /// Get statistics on the table
+    async fn stats(&self) -> Result<TableStatistics>;
 }

 /// A Table is a collection of strong typed Rows.
@@ -693,7 +806,7 @@ impl Table {
    /// tbl.delete("id > 5").await.unwrap();
    /// # });
    /// ```
-    pub async fn delete(&self, predicate: &str) -> Result<()> {
+    pub async fn delete(&self, predicate: &str) -> Result<DeleteResult> {
        self.inner.delete(predicate).await
    }

@@ -769,6 +882,28 @@ impl Table {
        )
    }

+    /// See [Table::create_index]
+    /// For remote tables, this allows an optional wait_timeout to poll until asynchronous indexing is complete
+    pub fn create_index_with_timeout(
+        &self,
+        columns: &[impl AsRef<str>],
+        index: Index,
+        wait_timeout: Option<std::time::Duration>,
+    ) -> IndexBuilder {
+        let mut builder = IndexBuilder::new(
+            self.inner.clone(),
+            columns
+                .iter()
+                .map(|val| val.as_ref().to_string())
+                .collect::<Vec<_>>(),
+            index,
+        );
+        if let Some(timeout) = wait_timeout {
+            builder = builder.wait_timeout(timeout);
+        }
+        builder
+    }
+
    /// Create a builder for a merge insert operation
    ///
    /// This operation can add rows, update rows, and remove rows all in a single
@@ -986,17 +1121,20 @@ impl Table {
        &self,
        transforms: NewColumnTransform,
        read_columns: Option<Vec<String>>,
-    ) -> Result<()> {
+    ) -> Result<AddColumnsResult> {
        self.inner.add_columns(transforms, read_columns).await
    }

    /// Change a column's name or nullability.
-    pub async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<()> {
+    pub async fn alter_columns(
+        &self,
+        alterations: &[ColumnAlteration],
+    ) -> Result<AlterColumnsResult> {
        self.inner.alter_columns(alterations).await
    }

    /// Remove columns from the table.
-    pub async fn drop_columns(&self, columns: &[&str]) -> Result<()> {
+    pub async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
        self.inner.drop_columns(columns).await
    }

@@ -1028,6 +1166,24 @@ impl Table {
        self.inner.checkout(version).await
    }

+    /// Checks out a specific version of the Table by tag
+    ///
+    /// Any read operation on the table will now access the data at the version referenced by the tag.
+    /// As a consequence, calling this method will disable any read consistency interval
+    /// that was previously set.
+    ///
+    /// This is a read-only operation that turns the table into a sort of "view"
+    /// or "detached head".  Other table instances will not be affected.  To make the change
+    /// permanent you can use the `[Self::restore]` method.
+    ///
+    /// Any operation that modifies the table will fail while the table is in a checked
+    /// out state.
+    ///
+    /// To return the table to a normal state use `[Self::checkout_latest]`
+    pub async fn checkout_tag(&self, tag: &str) -> Result<()> {
+        self.inner.checkout_tag(tag).await
+    }
+
    /// Ensures the table is pointing at the latest version
    ///
    /// This can be used to manually update a table when the read_consistency_interval is None
@@ -1104,6 +1260,21 @@ impl Table {
        self.inner.prewarm_index(name).await
    }

+    /// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
+    /// are not fully indexed within the timeout.
+    pub async fn wait_for_index(
+        &self,
+        index_names: &[&str],
+        timeout: std::time::Duration,
+    ) -> Result<()> {
+        self.inner.wait_for_index(index_names, timeout).await
+    }
+
+    /// Get the tags manager.
+    pub async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
+        self.inner.tags().await
+    }
+
    // Take many execution plans and map them into a single plan that adds
    // a query_index column and unions them.
    pub(crate) fn multi_vector_plan(
@@ -1154,6 +1325,40 @@ impl Table {
        .unwrap();
        Ok(Arc::new(repartitioned))
    }
+
+    /// Retrieve statistics on the table
+    pub async fn stats(&self) -> Result<TableStatistics> {
+        self.inner.stats().await
+    }
+}
+
+pub struct NativeTags {
+    inner: LanceTags,
+}
+#[async_trait]
+impl Tags for NativeTags {
+    async fn list(&self) -> Result<HashMap<String, TagContents>> {
+        Ok(self.inner.list().await?)
+    }
+
+    async fn get_version(&self, tag: &str) -> Result<u64> {
+        Ok(self.inner.get_version(tag).await?)
+    }
+
+    async fn create(&mut self, tag: &str, version: u64) -> Result<()> {
+        self.inner.create(tag, version).await?;
+        Ok(())
+    }
+
+    async fn delete(&mut self, tag: &str) -> Result<()> {
+        self.inner.delete(tag).await?;
+        Ok(())
+    }
+
+    async fn update(&mut self, tag: &str, version: u64) -> Result<()> {
+        self.inner.update(tag, version).await?;
+        Ok(())
+    }
 }

 impl From<NativeTable> for Table {
@@ -1809,7 +2014,7 @@ impl NativeTable {
    /// more information.
    pub async fn uses_v2_manifest_paths(&self) -> Result<bool> {
        let dataset = self.dataset.get().await?;
-        Ok(dataset.manifest_naming_scheme == ManifestNamingScheme::V2)
+        Ok(dataset.manifest_location().naming_scheme == ManifestNamingScheme::V2)
    }

    /// Migrate the table to use the new manifest path scheme.
@@ -1900,6 +2105,10 @@ impl BaseTable for NativeTable {
        self.dataset.as_time_travel(version).await
    }

+    async fn checkout_tag(&self, tag: &str) -> Result<()> {
+        self.dataset.as_time_travel(tag).await
+    }
+
    async fn checkout_latest(&self) -> Result<()> {
        self.dataset
            .as_latest(self.read_consistency_interval)
@@ -1958,7 +2167,7 @@ impl BaseTable for NativeTable {
        &self,
        add: AddDataBuilder<NoData>,
        data: Box<dyn RecordBatchReader + Send>,
-    ) -> Result<()> {
+    ) -> Result<AddResult> {
        let data = Box::new(MaybeEmbedded::try_new(
            data,
            self.table_definition().await?,
@@ -1981,9 +2190,9 @@ impl BaseTable for NativeTable {
                .execute_stream(data)
                .await?
        };
-
+        let version = dataset.manifest().version;
        self.dataset.set_latest(dataset).await;
-        Ok(())
+        Ok(AddResult { version })
    }

    async fn create_index(&self, opts: IndexBuilder) -> Result<()> {
@@ -2029,7 +2238,7 @@ impl BaseTable for NativeTable {
        Ok(dataset.prewarm_index(index_name).await?)
    }

-    async fn update(&self, update: UpdateBuilder) -> Result<u64> {
+    async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult> {
        let dataset = self.dataset.get().await?.clone();
        let mut builder = LanceUpdateBuilder::new(Arc::new(dataset));
        if let Some(predicate) = update.filter {
@@ -2045,7 +2254,10 @@ impl BaseTable for NativeTable {
        self.dataset
            .set_latest(res.new_dataset.as_ref().clone())
            .await;
-        Ok(res.rows_updated)
+        Ok(UpdateResult {
+            rows_updated: res.rows_updated,
+            version: res.new_dataset.version().version,
+        })
    }

    async fn create_plan(
@@ -2237,7 +2449,7 @@ impl BaseTable for NativeTable {
        &self,
        params: MergeInsertBuilder,
        new_data: Box<dyn RecordBatchReader + Send>,
-    ) -> Result<()> {
+    ) -> Result<MergeResult> {
        let dataset = Arc::new(self.dataset.get().await?.clone());
        let mut builder = LanceMergeInsertBuilder::try_new(dataset.clone(), params.on)?;
        match (
@@ -2263,16 +2475,51 @@ impl BaseTable for NativeTable {
        } else {
            builder.when_not_matched_by_source(WhenNotMatchedBySource::Keep);
        }
-        let job = builder.try_build()?;
-        let (new_dataset, _stats) = job.execute_reader(new_data).await?;
+
+        let future = if let Some(timeout) = params.timeout {
+            // The default retry timeout is 30s, so we pass the full timeout down
+            // as well in case it is longer than that.
+            let future = builder
+                .retry_timeout(timeout)
+                .try_build()?
+                .execute_reader(new_data);
+            Either::Left(tokio::time::timeout(timeout, future).map(|res| match res {
+                Ok(Ok((new_dataset, stats))) => Ok((new_dataset, stats)),
+                Ok(Err(e)) => Err(e.into()),
+                Err(_) => Err(Error::Runtime {
+                    message: "merge insert timed out".to_string(),
+                }),
+            }))
+        } else {
+            let job = builder.try_build()?;
+            Either::Right(job.execute_reader(new_data).map_err(|e| e.into()))
+        };
+        let (new_dataset, stats) = future.await?;
+        let version = new_dataset.manifest().version;
        self.dataset.set_latest(new_dataset.as_ref().clone()).await;
-        Ok(())
+        Ok(MergeResult {
+            version,
+            num_updated_rows: stats.num_updated_rows,
+            num_inserted_rows: stats.num_inserted_rows,
+            num_deleted_rows: stats.num_deleted_rows,
+        })
    }

    /// Delete rows from the table
-    async fn delete(&self, predicate: &str) -> Result<()> {
-        self.dataset.get_mut().await?.delete(predicate).await?;
-        Ok(())
+    async fn delete(&self, predicate: &str) -> Result<DeleteResult> {
+        let mut dataset = self.dataset.get_mut().await?;
+        dataset.delete(predicate).await?;
+        Ok(DeleteResult {
+            version: dataset.version().version,
+        })
+    }
+
+    async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
+        let dataset = self.dataset.get().await?;
+
+        Ok(Box::new(NativeTags {
+            inner: dataset.tags.clone(),
+        }))
    }

    async fn optimize(&self, action: OptimizeAction) -> Result<OptimizeStats> {
@@ -2331,27 +2578,28 @@ impl BaseTable for NativeTable {
        &self,
        transforms: NewColumnTransform,
        read_columns: Option<Vec<String>>,
-    ) -> Result<()> {
-        self.dataset
-            .get_mut()
-            .await?
-            .add_columns(transforms, read_columns, None)
-            .await?;
-        Ok(())
+    ) -> Result<AddColumnsResult> {
+        let mut dataset = self.dataset.get_mut().await?;
+        dataset.add_columns(transforms, read_columns, None).await?;
+        Ok(AddColumnsResult {
+            version: dataset.version().version,
+        })
    }

-    async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<()> {
-        self.dataset
-            .get_mut()
-            .await?
-            .alter_columns(alterations)
-            .await?;
-        Ok(())
+    async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<AlterColumnsResult> {
+        let mut dataset = self.dataset.get_mut().await?;
+        dataset.alter_columns(alterations).await?;
+        Ok(AlterColumnsResult {
+            version: dataset.version().version,
+        })
    }

-    async fn drop_columns(&self, columns: &[&str]) -> Result<()> {
-        self.dataset.get_mut().await?.drop_columns(columns).await?;
-        Ok(())
+    async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
+        let mut dataset = self.dataset.get_mut().await?;
+        dataset.drop_columns(columns).await?;
+        Ok(DropColumnsResult {
+            version: dataset.version().version,
+        })
    }

    async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
@@ -2430,6 +2678,118 @@ impl BaseTable for NativeTable {
            loss,
        }))
    }
+
+    /// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
+    /// are not fully indexed within the timeout.
+    async fn wait_for_index(
+        &self,
+        index_names: &[&str],
+        timeout: std::time::Duration,
+    ) -> Result<()> {
+        wait_for_index(self, index_names, timeout).await
+    }
+
+    async fn stats(&self) -> Result<TableStatistics> {
+        let num_rows = self.count_rows(None).await?;
+        let num_indices = self.list_indices().await?.len();
+        let ds = self.dataset.get().await?;
+        let ds_clone = (*ds).clone();
+        let ds_stats = Arc::new(ds_clone).calculate_data_stats().await?;
+        let total_bytes = ds_stats.fields.iter().map(|f| f.bytes_on_disk).sum::<u64>() as usize;
+
+        let frags = ds.get_fragments();
+        let mut sorted_sizes = join_all(
+            frags
+                .iter()
+                .map(|frag| async move { frag.physical_rows().await.unwrap_or(0) }),
+        )
+        .await;
+        sorted_sizes.sort();
+
+        let small_frag_threshold = 100000;
+        let num_fragments = sorted_sizes.len();
+        let num_small_fragments = sorted_sizes
+            .iter()
+            .filter(|&&size| size < small_frag_threshold)
+            .count();
+
+        let p25 = *sorted_sizes.get(num_fragments / 4).unwrap_or(&0);
+        let p50 = *sorted_sizes.get(num_fragments / 2).unwrap_or(&0);
+        let p75 = *sorted_sizes.get(num_fragments * 3 / 4).unwrap_or(&0);
+        let p99 = *sorted_sizes.get(num_fragments * 99 / 100).unwrap_or(&0);
+        let min = sorted_sizes.first().copied().unwrap_or(0);
+        let max = sorted_sizes.last().copied().unwrap_or(0);
+        let mean = if num_fragments == 0 {
+            0
+        } else {
+            sorted_sizes.iter().copied().sum::<usize>() / num_fragments
+        };
+
+        let frag_stats = FragmentStatistics {
+            num_fragments,
+            num_small_fragments,
+            lengths: FragmentSummaryStats {
+                min,
+                max,
+                mean,
+                p25,
+                p50,
+                p75,
+                p99,
+            },
+        };
+        let stats = TableStatistics {
+            total_bytes,
+            num_rows,
+            num_indices,
+            fragment_stats: frag_stats,
+        };
+        Ok(stats)
+    }
+}
+
+#[skip_serializing_none]
+#[derive(Debug, Deserialize, PartialEq)]
+pub struct TableStatistics {
+    /// The total number of bytes in the table
+    pub total_bytes: usize,
+
+    /// The number of rows in the table
+    pub num_rows: usize,
+
+    /// The number of indices in the table
+    pub num_indices: usize,
+
+    /// Statistics on table fragments
+    pub fragment_stats: FragmentStatistics,
+}
+
+#[skip_serializing_none]
+#[derive(Debug, Deserialize, PartialEq)]
+pub struct FragmentStatistics {
+    /// The number of fragments in the table
+    pub num_fragments: usize,
+
+    /// The number of uncompacted fragments in the table
+    pub num_small_fragments: usize,
+
+    /// Statistics on the number of rows in the table fragments
+    pub lengths: FragmentSummaryStats,
+    // todo: add size statistics
+    // /// Statistics on the number of bytes in the table fragments
+    // sizes: FragmentStats,
+}
+
+#[skip_serializing_none]
+#[derive(Debug, Deserialize, PartialEq)]
+pub struct FragmentSummaryStats {
+    pub min: usize,
+    pub max: usize,
+    pub mean: usize,
+    pub p25: usize,
+    pub p50: usize,
+    pub p75: usize,
+    pub p99: usize,
 }

 #[cfg(test)]
@@ -3031,6 +3391,60 @@ mod tests {
        )
    }

+    #[tokio::test]
+    async fn test_tags() {
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+
+        let conn = ConnectBuilder::new(uri)
+            .read_consistency_interval(Duration::from_secs(0))
+            .execute()
+            .await
+            .unwrap();
+        let table = conn
+            .create_table("my_table", some_sample_data())
+            .execute()
+            .await
+            .unwrap();
+        assert_eq!(table.version().await.unwrap(), 1);
+        table.add(some_sample_data()).execute().await.unwrap();
+        assert_eq!(table.version().await.unwrap(), 2);
+        let mut tags_manager = table.tags().await.unwrap();
+        let tags = tags_manager.list().await.unwrap();
+        assert!(tags.is_empty(), "Tags should be empty initially");
+        let tag1 = "tag1";
+        tags_manager.create(tag1, 1).await.unwrap();
+        assert_eq!(tags_manager.get_version(tag1).await.unwrap(), 1);
+        let tags = tags_manager.list().await.unwrap();
+        assert_eq!(tags.len(), 1);
+        assert!(tags.contains_key(tag1));
+        assert_eq!(tags.get(tag1).unwrap().version, 1);
+        tags_manager.create("tag2", 2).await.unwrap();
+        assert_eq!(tags_manager.get_version("tag2").await.unwrap(), 2);
+        let tags = tags_manager.list().await.unwrap();
+        assert_eq!(tags.len(), 2);
+        assert!(tags.contains_key(tag1));
+        assert_eq!(tags.get(tag1).unwrap().version, 1);
+        assert!(tags.contains_key("tag2"));
+        assert_eq!(tags.get("tag2").unwrap().version, 2);
+        // Test update and delete
+        table.add(some_sample_data()).execute().await.unwrap();
+        tags_manager.update(tag1, 3).await.unwrap();
+        assert_eq!(tags_manager.get_version(tag1).await.unwrap(), 3);
+        tags_manager.delete("tag2").await.unwrap();
+        let tags = tags_manager.list().await.unwrap();
+        assert_eq!(tags.len(), 1);
+        assert!(tags.contains_key(tag1));
+        assert_eq!(tags.get(tag1).unwrap().version, 3);
+        // Test checkout tag
+        table.add(some_sample_data()).execute().await.unwrap();
+        assert_eq!(table.version().await.unwrap(), 4);
+        table.checkout_tag(tag1).await.unwrap();
+        assert_eq!(table.version().await.unwrap(), 3);
+        table.checkout_latest().await.unwrap();
+        assert_eq!(table.version().await.unwrap(), 4);
+    }
+
    #[tokio::test]
    async fn test_create_index() {
        use arrow_array::RecordBatch;
@@ -3213,7 +3627,10 @@ mod tests {
            .execute()
            .await
            .unwrap();
-
+        table
+            .wait_for_index(&["embeddings_idx"], Duration::from_millis(10))
+            .await
+            .unwrap();
        let index_configs = table.list_indices().await.unwrap();
        assert_eq!(index_configs.len(), 1);
        let index = index_configs.into_iter().next().unwrap();
@@ -3281,7 +3698,10 @@ mod tests {
            .execute()
            .await
            .unwrap();
-
+        table
+            .wait_for_index(&["i_idx"], Duration::from_millis(10))
+            .await
+            .unwrap();
        let index_configs = table.list_indices().await.unwrap();
        assert_eq!(index_configs.len(), 1);
        let index = index_configs.into_iter().next().unwrap();
@@ -3747,4 +4167,108 @@ mod tests {
            Some(&"test_field_val1".to_string())
        );
    }
+
+    #[tokio::test]
+    pub async fn test_stats() {
+        let tmp_dir = tempdir().unwrap();
+        let uri = tmp_dir.path().to_str().unwrap();
+
+        let conn = ConnectBuilder::new(uri).execute().await.unwrap();
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("foo", DataType::Int32, true),
+        ]));
+        let batch = RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int32Array::from_iter_values(0..100)),
+                Arc::new(Int32Array::from_iter_values(0..100)),
+            ],
+        )
+        .unwrap();
+
+        let table = conn
+            .create_table(
+                "test_stats",
+                RecordBatchIterator::new(vec![Ok(batch.clone())], batch.schema()),
+            )
+            .execute()
+            .await
+            .unwrap();
+        for _ in 0..10 {
+            let batch = RecordBatch::try_new(
+                schema.clone(),
+                vec![
+                    Arc::new(Int32Array::from_iter_values(0..15)),
+                    Arc::new(Int32Array::from_iter_values(0..15)),
+                ],
+            )
+            .unwrap();
+            table
+                .add(RecordBatchIterator::new(
+                    vec![Ok(batch.clone())],
+                    batch.schema(),
+                ))
+                .execute()
+                .await
+                .unwrap();
+        }
+
+        let empty_table = conn
+            .create_table(
+                "test_stats_empty",
+                RecordBatchIterator::new(vec![], batch.schema()),
+            )
+            .execute()
+            .await
+            .unwrap();
+
+        let res = table.stats().await.unwrap();
+        println!("{:#?}", res);
+        assert_eq!(
+            res,
+            TableStatistics {
+                num_rows: 250,
+                num_indices: 0,
+                total_bytes: 2000,
+                fragment_stats: FragmentStatistics {
+                    num_fragments: 11,
+                    num_small_fragments: 11,
+                    lengths: FragmentSummaryStats {
+                        min: 15,
+                        max: 100,
+                        mean: 22,
+                        p25: 15,
+                        p50: 15,
+                        p75: 15,
+                        p99: 100,
+                    },
+                },
+            }
+        );
+        let res = empty_table.stats().await.unwrap();
+        println!("{:#?}", res);
+        assert_eq!(
+            res,
+            TableStatistics {
+                num_rows: 0,
+                num_indices: 0,
+                total_bytes: 0,
+                fragment_stats: FragmentStatistics {
+                    num_fragments: 0,
+                    num_small_fragments: 0,
+                    lengths: FragmentSummaryStats {
+                        min: 0,
+                        max: 0,
+                        mean: 0,
+                        p25: 0,
+                        p50: 0,
+                        p75: 0,
+                        p99: 0,
+                    },
+                },
+            }
+        )
+    }
 }
--- a/rust/lancedb/src/table/dataset.rs
+++ b/rust/lancedb/src/table/dataset.rs
@@ -7,7 +7,7 @@ use std::{
    time::{self, Duration, Instant},
 };

-use lance::Dataset;
+use lance::{dataset::refs, Dataset};
 use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};

 use crate::error::Result;
@@ -83,19 +83,32 @@ impl DatasetRef {
        }
    }

-    async fn as_time_travel(&mut self, target_version: u64) -> Result<()> {
+    async fn as_time_travel(&mut self, target_version: impl Into<refs::Ref>) -> Result<()> {
+        let target_ref = target_version.into();
+
        match self {
            Self::Latest { dataset, .. } => {
+                let new_dataset = dataset.checkout_version(target_ref.clone()).await?;
+                let version_value = new_dataset.version().version;
+
                *self = Self::TimeTravel {
-                    dataset: dataset.checkout_version(target_version).await?,
-                    version: target_version,
+                    dataset: new_dataset,
+                    version: version_value,
                };
            }
            Self::TimeTravel { dataset, version } => {
-                if *version != target_version {
+                let should_checkout = match &target_ref {
+                    refs::Ref::Version(target_ver) => version != target_ver,
+                    refs::Ref::Tag(_) => true, // Always checkout for tags
+                };
+
+                if should_checkout {
+                    let new_dataset = dataset.checkout_version(target_ref).await?;
+                    let version_value = new_dataset.version().version;
+
                    *self = Self::TimeTravel {
-                        dataset: dataset.checkout_version(target_version).await?,
-                        version: target_version,
+                        dataset: new_dataset,
+                        version: version_value,
                    };
                }
            }
@@ -175,7 +188,7 @@ impl DatasetConsistencyWrapper {
        write_guard.as_latest(read_consistency_interval).await
    }

-    pub async fn as_time_travel(&self, target_version: u64) -> Result<()> {
+    pub async fn as_time_travel(&self, target_version: impl Into<refs::Ref>) -> Result<()> {
        self.0.write().await.as_time_travel(target_version).await
    }

--- a/rust/lancedb/src/table/merge.rs
+++ b/rust/lancedb/src/table/merge.rs
@@ -1,13 +1,13 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright The LanceDB Authors

-use std::sync::Arc;
+use std::{sync::Arc, time::Duration};

 use arrow_array::RecordBatchReader;

 use crate::Result;

-use super::BaseTable;
+use super::{BaseTable, MergeResult};

 /// A builder used to create and run a merge insert operation
 ///
@@ -21,6 +21,7 @@ pub struct MergeInsertBuilder {
    pub(crate) when_not_matched_insert_all: bool,
    pub(crate) when_not_matched_by_source_delete: bool,
    pub(crate) when_not_matched_by_source_delete_filt: Option<String>,
+    pub(crate) timeout: Option<Duration>,
 }

 impl MergeInsertBuilder {
@@ -33,6 +34,7 @@ impl MergeInsertBuilder {
            when_not_matched_insert_all: false,
            when_not_matched_by_source_delete: false,
            when_not_matched_by_source_delete_filt: None,
+            timeout: None,
        }
    }

@@ -84,10 +86,26 @@ impl MergeInsertBuilder {
        self
    }

+    /// Maximum time to run the operation before cancelling it.
+    ///
+    /// By default, there is a 30-second timeout that is only enforced after the
+    /// first attempt. This is to prevent spending too long retrying to resolve
+    /// conflicts. For example, if a write attempt takes 20 seconds and fails,
+    /// the second attempt will be cancelled after 10 seconds, hitting the
+    /// 30-second timeout. However, a write that takes one hour and succeeds on the
+    /// first attempt will not be cancelled.
+    ///
+    /// When this is set, the timeout is enforced on all attempts, including the first.
+    pub fn timeout(&mut self, timeout: Duration) -> &mut Self {
+        self.timeout = Some(timeout);
+        self
+    }
+
    /// Executes the merge insert operation
    ///
-    /// Nothing is returned but the [`super::Table`] is updated
-    pub async fn execute(self, new_data: Box<dyn RecordBatchReader + Send>) -> Result<()> {
+    /// Returns version and statistics about the merge operation including the number of rows
+    /// inserted, updated, and deleted.
+    pub async fn execute(self, new_data: Box<dyn RecordBatchReader + Send>) -> Result<MergeResult> {
        self.table.clone().merge_insert(self, new_data).await
    }
 }