chore: update lance dependency to v4.0.0-beta.12

feat(nodejs): support field/data type input in add_columns() method (#3114 )
Add support for passing field/data type information into add_columns() method, bringing parity with Python bindings. The method now accepts: - AddColumnsSql[] - SQL expressions (existing functionality) - Field - single Arrow field with explicit data type - Field[] - array of Arrow fields with explicit data types - Schema - Arrow schema with explicit data types New columns added via Field/Schema are initialized with null values. All field-based columns must be nullable due to null initialization. Resolves #3107 --------- Signed-off-by: Pratik <pratikrocks.dey11@gmail.com> Co-authored-by: Claude <noreply@anthropic.com>
2026-06-05 13:20:39 +00:00 · 2026-03-13 21:56:59 +00:00 · 2026-03-13 12:57:14 -07:00 · 2026-03-13 12:54:26 -07:00 · 2026-03-12 14:37:42 +08:00 · 2026-03-10 21:43:51 -07:00
19 changed files with 649 additions and 151 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3070,8 +3070,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"

 [[package]]
 name = "fsst"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow-array",
 "rand 0.9.2",
@@ -4241,8 +4241,8 @@ dependencies = [

 [[package]]
 name = "lance"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4308,8 +4308,8 @@ dependencies = [

 [[package]]
 name = "lance-arrow"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4329,8 +4329,8 @@ dependencies = [

 [[package]]
 name = "lance-bitpacking"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrayref",
 "paste",
@@ -4339,8 +4339,8 @@ dependencies = [

 [[package]]
 name = "lance-core"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4377,8 +4377,8 @@ dependencies = [

 [[package]]
 name = "lance-datafusion"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4408,8 +4408,8 @@ dependencies = [

 [[package]]
 name = "lance-datagen"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4427,8 +4427,8 @@ dependencies = [

 [[package]]
 name = "lance-encoding"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4465,8 +4465,8 @@ dependencies = [

 [[package]]
 name = "lance-file"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4498,8 +4498,8 @@ dependencies = [

 [[package]]
 name = "lance-index"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4562,8 +4562,8 @@ dependencies = [

 [[package]]
 name = "lance-io"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4604,8 +4604,8 @@ dependencies = [

 [[package]]
 name = "lance-linalg"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4621,8 +4621,8 @@ dependencies = [

 [[package]]
 name = "lance-namespace"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow",
 "async-trait",
@@ -4634,8 +4634,8 @@ dependencies = [

 [[package]]
 name = "lance-namespace-impls"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow",
 "arrow-ipc",
@@ -4679,8 +4679,8 @@ dependencies = [

 [[package]]
 name = "lance-table"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4719,8 +4719,8 @@ dependencies = [

 [[package]]
 name = "lance-testing"
-version = "4.0.0-beta.8"
-source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.8#e7eb014de4a8d9d958ed9eaa8d62eadb9cae3f40"
+version = "4.0.0-beta.12"
+source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.12#1e7b7252881261f290740d8d5a487c053f05039e"
 dependencies = [
 "arrow-array",
 "arrow-schema",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
 rust-version = "1.91.0"

 [workspace.dependencies]
-lance = { "version" = "=4.0.0-beta.8", default-features = false, "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=4.0.0-beta.8", default-features = false, "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=4.0.0-beta.8", default-features = false, "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = { "version" = "=4.0.0-beta.8", "tag" = "v4.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" }
+lance = { "version" = "=4.0.0-beta.12", default-features = false, "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-core = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-datagen = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-file = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-io = { "version" = "=4.0.0-beta.12", default-features = false, "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-index = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-linalg = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-namespace-impls = { "version" = "=4.0.0-beta.12", default-features = false, "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-table = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-testing = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-datafusion = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-encoding = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
+lance-arrow = { "version" = "=4.0.0-beta.12", "tag" = "v4.0.0-beta.12", "git" = "https://github.com/lance-format/lance.git" }
 ahash = "0.8"
 # Note that this one does not include pyarrow
 arrow = { version = "57.2", optional = false }
--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -71,11 +71,12 @@ Add new columns with defined values.

 #### Parameters

-* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
-    pairs of column names and
-    the SQL expression to use to calculate the value of the new column. These
-    expressions will be evaluated for each row in the table, and can
-    reference existing columns in the table.
+* **newColumnTransforms**: `Field`&lt;`any`&gt; \| `Field`&lt;`any`&gt;[] \| `Schema`&lt;`any`&gt; \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
+    Either:
+    - An array of objects with column names and SQL expressions to calculate values
+    - A single Arrow Field defining one column with its data type (column will be initialized with null values)
+    - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
+    - An Arrow Schema defining columns with their data types (columns will be initialized with null values)

 #### Returns

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -28,7 +28,7 @@
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>15.0.0</arrow.version>
-        <lance-core.version>4.0.0-beta.8</lance-core.version>
+        <lance-core.version>4.0.0-beta.12</lance-core.version>
        <spotless.skip>false</spotless.skip>
        <spotless.version>2.30.0</spotless.version>
        <spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -1259,6 +1259,98 @@ describe("schema evolution", function () {
    expect(await table.schema()).toEqual(expectedSchema);
  });

+  it("can add columns with schema for explicit data types", async function () {
+    const con = await connect(tmpDir.name);
+    const table = await con.createTable("vectors", [
+      { id: 1n, vector: [0.1, 0.2] },
+    ]);
+
+    // Define schema for new columns with explicit data types
+    // Note: All columns must be nullable when using addColumns with Schema
+    // because they are initially populated with null values
+    const newColumnsSchema = new Schema([
+      new Field("price", new Float64(), true),
+      new Field("category", new Utf8(), true),
+      new Field("rating", new Int32(), true),
+    ]);
+
+    const result = await table.addColumns(newColumnsSchema);
+    expect(result).toHaveProperty("version");
+    expect(result.version).toBe(2);
+
+    const expectedSchema = new Schema([
+      new Field("id", new Int64(), true),
+      new Field(
+        "vector",
+        new FixedSizeList(2, new Field("item", new Float32(), true)),
+        true,
+      ),
+      new Field("price", new Float64(), true),
+      new Field("category", new Utf8(), true),
+      new Field("rating", new Int32(), true),
+    ]);
+    expect(await table.schema()).toEqual(expectedSchema);
+
+    // Verify that new columns are populated with null values
+    const results = await table.query().toArray();
+    expect(results).toHaveLength(1);
+    expect(results[0].price).toBeNull();
+    expect(results[0].category).toBeNull();
+    expect(results[0].rating).toBeNull();
+  });
+
+  it("can add a single column using Field", async function () {
+    const con = await connect(tmpDir.name);
+    const table = await con.createTable("vectors", [
+      { id: 1n, vector: [0.1, 0.2] },
+    ]);
+
+    // Add a single field
+    const priceField = new Field("price", new Float64(), true);
+    const result = await table.addColumns(priceField);
+    expect(result).toHaveProperty("version");
+    expect(result.version).toBe(2);
+
+    const expectedSchema = new Schema([
+      new Field("id", new Int64(), true),
+      new Field(
+        "vector",
+        new FixedSizeList(2, new Field("item", new Float32(), true)),
+        true,
+      ),
+      new Field("price", new Float64(), true),
+    ]);
+    expect(await table.schema()).toEqual(expectedSchema);
+  });
+
+  it("can add multiple columns using array of Fields", async function () {
+    const con = await connect(tmpDir.name);
+    const table = await con.createTable("vectors", [
+      { id: 1n, vector: [0.1, 0.2] },
+    ]);
+
+    // Add multiple fields as array
+    const fields = [
+      new Field("price", new Float64(), true),
+      new Field("category", new Utf8(), true),
+    ];
+    const result = await table.addColumns(fields);
+    expect(result).toHaveProperty("version");
+    expect(result.version).toBe(2);
+
+    const expectedSchema = new Schema([
+      new Field("id", new Int64(), true),
+      new Field(
+        "vector",
+        new FixedSizeList(2, new Field("item", new Float32(), true)),
+        true,
+      ),
+      new Field("price", new Float64(), true),
+      new Field("category", new Utf8(), true),
+    ]);
+    expect(await table.schema()).toEqual(expectedSchema);
+  });
+
  it("can alter the columns in the schema", async function () {
    const con = await connect(tmpDir.name);
    const schema = new Schema([
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -5,12 +5,15 @@ import {
  Table as ArrowTable,
  Data,
  DataType,
+  Field,
  IntoVector,
  MultiVector,
  Schema,
  dataTypeToJson,
  fromDataToBuffer,
+  fromTableToBuffer,
  isMultiVector,
+  makeEmptyTable,
  tableFromIPC,
 } from "./arrow";

@@ -84,6 +87,16 @@ export interface OptimizeOptions {
   * tbl.optimize({cleanupOlderThan: new Date()});
   */
  cleanupOlderThan: Date;
+  /**
+   * Because they may be part of an in-progress transaction, files newer than
+   * 7 days old are not deleted by default. If you are sure that there are no
+   * in-progress transactions, then you can set this to true to delete all
+   * files older than `cleanupOlderThan`.
+   *
+   * **WARNING**: This should only be set to true if you can guarantee that
+   * no other process is currently working on this dataset. Otherwise the
+   * dataset could be put into a corrupted state.
+   */
  deleteUnverified: boolean;
 }

@@ -381,15 +394,16 @@ export abstract class Table {
  abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
  /**
   * Add new columns with defined values.
-   * @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
-   * the SQL expression to use to calculate the value of the new column. These
-   * expressions will be evaluated for each row in the table, and can
-   * reference existing columns in the table.
+   * @param {AddColumnsSql[] | Field | Field[] | Schema} newColumnTransforms Either:
+   *   - An array of objects with column names and SQL expressions to calculate values
+   *   - A single Arrow Field defining one column with its data type (column will be initialized with null values)
+   *   - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
+   *   - An Arrow Schema defining columns with their data types (columns will be initialized with null values)
   * @returns {Promise<AddColumnsResult>} A promise that resolves to an object
   * containing the new version number of the table after adding the columns.
   */
  abstract addColumns(
-    newColumnTransforms: AddColumnsSql[],
+    newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
  ): Promise<AddColumnsResult>;

  /**
@@ -501,19 +515,7 @@ export abstract class Table {
   *  - Index: Optimizes the indices, adding new data to existing indices
   *
   *
-   *  Experimental API
-   *  ----------------
-   *
-   *  The optimization process is undergoing active development and may change.
-   *  Our goal with these changes is to improve the performance of optimization and
-   *  reduce the complexity.
-   *
-   *  That being said, it is essential today to run optimize if you want the best
-   *  performance.  It should be stable and safe to use in production, but it our
-   *  hope that the API may be simplified (or not even need to be called) in the
-   *  future.
-   *
-   *  The frequency an application shoudl call optimize is based on the frequency of
+   *  The frequency an application should call optimize is based on the frequency of
   *  data modifications.  If data is frequently added, deleted, or updated then
   *  optimize should be run frequently.  A good rule of thumb is to run optimize if
   *  you have added or modified 100,000 or more records or run more than 20 data
@@ -806,9 +808,40 @@ export class LocalTable extends Table {
  // TODO: Support BatchUDF

  async addColumns(
-    newColumnTransforms: AddColumnsSql[],
+    newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
  ): Promise<AddColumnsResult> {
-    return await this.inner.addColumns(newColumnTransforms);
+    // Handle single Field -> convert to array of Fields
+    if (newColumnTransforms instanceof Field) {
+      newColumnTransforms = [newColumnTransforms];
+    }
+
+    // Handle array of Fields -> convert to Schema
+    if (
+      Array.isArray(newColumnTransforms) &&
+      newColumnTransforms.length > 0 &&
+      newColumnTransforms[0] instanceof Field
+    ) {
+      const fields = newColumnTransforms as Field[];
+      newColumnTransforms = new Schema(fields);
+    }
+
+    // Handle Schema -> use schema-based approach
+    if (newColumnTransforms instanceof Schema) {
+      const schema = newColumnTransforms;
+      // Convert schema to buffer using Arrow IPC format
+      const emptyTable = makeEmptyTable(schema);
+      const schemaBuf = await fromTableToBuffer(emptyTable);
+      return await this.inner.addColumnsWithSchema(schemaBuf);
+    }
+
+    // Handle SQL expressions (existing functionality)
+    if (Array.isArray(newColumnTransforms)) {
+      return await this.inner.addColumns(
+        newColumnTransforms as AddColumnsSql[],
+      );
+    }
+
+    throw new Error("Invalid input type for addColumns");
  }

  async alterColumns(
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -3,7 +3,7 @@

 use std::collections::HashMap;

-use lancedb::ipc::ipc_file_to_batches;
+use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
 use lancedb::table::{
    AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
    OptimizeAction, OptimizeOptions, Table as LanceDbTable,
@@ -279,6 +279,23 @@ impl Table {
        Ok(res.into())
    }

+    #[napi(catch_unwind)]
+    pub async fn add_columns_with_schema(
+        &self,
+        schema_buf: Buffer,
+    ) -> napi::Result<AddColumnsResult> {
+        let schema = ipc_file_to_schema(schema_buf.to_vec())
+            .map_err(|e| napi::Error::from_reason(format!("Failed to read IPC schema: {}", e)))?;
+
+        let transforms = NewColumnTransform::AllNulls(schema);
+        let res = self
+            .inner_ref()?
+            .add_columns(transforms, None)
+            .await
+            .default_error()?;
+        Ok(res.into())
+    }
+
    #[napi(catch_unwind)]
    pub async fn alter_columns(
        &self,
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -166,6 +166,8 @@ class Table:
    async def checkout(self, version: Union[int, str]): ...
    async def checkout_latest(self): ...
    async def restore(self, version: Optional[Union[int, str]] = None): ...
+    async def prewarm_index(self, index_name: str) -> None: ...
+    async def prewarm_data(self, columns: Optional[List[str]] = None) -> None: ...
    async def list_indices(self) -> list[IndexConfig]: ...
    async def delete(self, filter: str) -> DeleteResult: ...
    async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -640,6 +640,45 @@ class RemoteTable(Table):
    def drop_index(self, index_name: str):
        return LOOP.run(self._table.drop_index(index_name))

+    def prewarm_index(self, name: str) -> None:
+        """Prewarm an index in the table.
+
+        This is a hint to the database that the index will be accessed in the
+        future and should be loaded into memory if possible.  This can reduce
+        cold-start latency for subsequent queries.
+
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        Parameters
+        ----------
+        name: str
+            The name of the index to prewarm
+        """
+        return LOOP.run(self._table.prewarm_index(name))
+
+    def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
+        """Prewarm data for the table.
+
+        This is a hint to the database that the given columns will be accessed
+        in the future and the database should prefetch the data if possible.
+        Currently only supported on remote tables.
+
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        This operation has a large upfront cost but can speed up future queries
+        that need to fetch the given columns.  Large columns such as embeddings
+        or binary data may not be practical to prewarm.  This feature is intended
+        for workloads that issue many queries against the same columns.
+
+        Parameters
+        ----------
+        columns: list of str, optional
+            The columns to prewarm. If None, all columns are prewarmed.
+        """
+        return LOOP.run(self._table.prewarm_data(columns))
+
    def wait_for_index(
        self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
    ):
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -1506,22 +1506,17 @@ class Table(ABC):
            in-progress operation (e.g. appending new data) and these files will not
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
+
+            .. warning::
+
+                This should only be set to True if you can guarantee that no other
+                process is currently working on this dataset. Otherwise the dataset
+                could be put into a corrupted state.
+
        retrain: bool, default False
            This parameter is no longer used and is deprecated.

-        Experimental API
-        ----------------
-
-        The optimization process is undergoing active development and may change.
-        Our goal with these changes is to improve the performance of optimization and
-        reduce the complexity.
-
-        That being said, it is essential today to run optimize if you want the best
-        performance.  It should be stable and safe to use in production, but it our
-        hope that the API may be simplified (or not even need to be called) in the
-        future.
-
-        The frequency an application shoudl call optimize is based on the frequency of
+        The frequency an application should call optimize is based on the frequency of
        data modifications.  If data is frequently added, deleted, or updated then
        optimize should be run frequently.  A good rule of thumb is to run optimize if
        you have added or modified 100,000 or more records or run more than 20 data
@@ -2219,12 +2214,18 @@ class LanceTable(Table):

    def prewarm_index(self, name: str) -> None:
        """
-        Prewarms an index in the table
+        Prewarm an index in the table.

-        This loads the entire index into memory
+        This is a hint to the database that the index will be accessed in the
+        future and should be loaded into memory if possible.  This can reduce
+        cold-start latency for subsequent queries.

-        If the index does not fit into the available cache this call
-        may be wasteful
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        It is generally wasteful to call this if the index does not fit into the
+        available cache.  Not all index types support prewarming; unsupported
+        indices will silently ignore the request.

        Parameters
        ----------
@@ -2233,6 +2234,29 @@ class LanceTable(Table):
        """
        return LOOP.run(self._table.prewarm_index(name))

+    def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
+        """
+        Prewarm data for the table.
+
+        This is a hint to the database that the given columns will be accessed
+        in the future and the database should prefetch the data if possible.
+        Currently only supported on remote tables.
+
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        This operation has a large upfront cost but can speed up future queries
+        that need to fetch the given columns.  Large columns such as embeddings
+        or binary data may not be practical to prewarm.  This feature is intended
+        for workloads that issue many queries against the same columns.
+
+        Parameters
+        ----------
+        columns: list of str, optional
+            The columns to prewarm. If None, all columns are prewarmed.
+        """
+        return LOOP.run(self._table.prewarm_data(columns))
+
    def wait_for_index(
        self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
    ) -> None:
@@ -3018,22 +3042,17 @@ class LanceTable(Table):
            in-progress operation (e.g. appending new data) and these files will not
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
+
+            .. warning::
+
+                This should only be set to True if you can guarantee that no other
+                process is currently working on this dataset. Otherwise the dataset
+                could be put into a corrupted state.
+
        retrain: bool, default False
            This parameter is no longer used and is deprecated.

-        Experimental API
-        ----------------
-
-        The optimization process is undergoing active development and may change.
-        Our goal with these changes is to improve the performance of optimization and
-        reduce the complexity.
-
-        That being said, it is essential today to run optimize if you want the best
-        performance.  It should be stable and safe to use in production, but it our
-        hope that the API may be simplified (or not even need to be called) in the
-        future.
-
-        The frequency an application shoudl call optimize is based on the frequency of
+        The frequency an application should call optimize is based on the frequency of
        data modifications.  If data is frequently added, deleted, or updated then
        optimize should be run frequently.  A good rule of thumb is to run optimize if
        you have added or modified 100,000 or more records or run more than 20 data
@@ -3634,19 +3653,47 @@ class AsyncTable:
        """
        Prewarm an index in the table.

+        This is a hint to the database that the index will be accessed in the
+        future and should be loaded into memory if possible.  This can reduce
+        cold-start latency for subsequent queries.
+
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        It is generally wasteful to call this if the index does not fit into the
+        available cache.  Not all index types support prewarming; unsupported
+        indices will silently ignore the request.
+
        Parameters
        ----------
        name: str
            The name of the index to prewarm
-
-        Notes
-        -----
-        This will load the index into memory.  This may reduce the cold-start time for
-        future queries.  If the index does not fit in the cache then this call may be
-        wasteful.
        """
        await self._inner.prewarm_index(name)

+    async def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
+        """
+        Prewarm data for the table.
+
+        This is a hint to the database that the given columns will be accessed
+        in the future and the database should prefetch the data if possible.
+        Currently only supported on remote tables.
+
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        This operation has a large upfront cost but can speed up future queries
+        that need to fetch the given columns.  Large columns such as embeddings
+        or binary data may not be practical to prewarm.  This feature is intended
+        for workloads that issue many queries against the same columns.
+
+        Parameters
+        ----------
+        columns: list of str, optional
+            The columns to prewarm. If None, all columns are prewarmed.
+        """
+        await self._inner.prewarm_data(columns)
+
    async def wait_for_index(
        self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
    ) -> None:
@@ -4573,22 +4620,17 @@ class AsyncTable:
            in-progress operation (e.g. appending new data) and these files will not
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
+
+            .. warning::
+
+                This should only be set to True if you can guarantee that no other
+                process is currently working on this dataset. Otherwise the dataset
+                could be put into a corrupted state.
+
        retrain: bool, default False
            This parameter is no longer used and is deprecated.

-        Experimental API
-        ----------------
-
-        The optimization process is undergoing active development and may change.
-        Our goal with these changes is to improve the performance of optimization and
-        reduce the complexity.
-
-        That being said, it is essential today to run optimize if you want the best
-        performance.  It should be stable and safe to use in production, but it our
-        hope that the API may be simplified (or not even need to be called) in the
-        future.
-
-        The frequency an application shoudl call optimize is based on the frequency of
+        The frequency an application should call optimize is based on the frequency of
        data modifications.  If data is frequently added, deleted, or updated then
        optimize should be run frequently.  A good rule of thumb is to run optimize if
        you have added or modified 100,000 or more records or run more than 20 data
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -316,6 +316,19 @@ impl<'py> IntoPyObject<'py> for PySelect {
            Select::All => Ok(py.None().into_bound(py).into_any()),
            Select::Columns(columns) => Ok(columns.into_pyobject(py)?.into_any()),
            Select::Dynamic(columns) => Ok(columns.into_pyobject(py)?.into_any()),
+            Select::Expr(pairs) => {
+                // Serialize DataFusion Expr -> SQL string so Python sees the same
+                // format as Select::Dynamic: a list of (name, sql_string) tuples.
+                let sql_pairs: PyResult<Vec<(String, String)>> = pairs
+                    .into_iter()
+                    .map(|(name, expr)| {
+                        lancedb::expr::expr_to_sql_string(&expr)
+                            .map(|sql| (name, sql))
+                            .map_err(|e| PyRuntimeError::new_err(e.to_string()))
+                    })
+                    .collect();
+                Ok(sql_pairs?.into_pyobject(py)?.into_any())
+            }
        }
    }
 }
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -426,6 +426,17 @@ impl Table {
        })
    }

+    pub fn prewarm_data(
+        self_: PyRef<'_, Self>,
+        columns: Option<Vec<String>>,
+    ) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move {
+            inner.prewarm_data(columns).await.infer_error()?;
+            Ok(())
+        })
+    }
+
    pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
--- a/rust/lancedb/src/dataloader/permutation/reader.rs
+++ b/rust/lancedb/src/dataloader/permutation/reader.rs
@@ -339,6 +339,12 @@ impl PermutationReader {
                }
                Ok(false)
            }
+            Select::Expr(columns) => {
+                // For Expr projections, we check if any alias is _rowid.
+                // We can't validate the expression itself (it may differ from _rowid)
+                // but we allow it through; the column will be included.
+                Ok(columns.iter().any(|(alias, _)| alias == ROW_ID))
+            }
        }
    }

--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -47,6 +47,25 @@ pub enum Select {
    ///
    /// See [`Query::select`] for more details and examples
    Dynamic(Vec<(String, String)>),
+    /// Advanced selection using type-safe DataFusion expressions
+    ///
+    /// Similar to [`Select::Dynamic`] but uses [`datafusion_expr::Expr`] instead of
+    /// raw SQL strings. Use [`crate::expr`] helpers to build expressions:
+    ///
+    /// ```
+    /// use lancedb::expr::{col, lit};
+    /// use lancedb::query::Select;
+    ///
+    /// // SELECT id, id * 2 AS id2 FROM ...
+    /// let selection = Select::expr_projection(&[
+    ///     ("id", col("id")),
+    ///     ("id2", col("id") * lit(2)),
+    /// ]);
+    /// ```
+    ///
+    /// Note: For remote/server-side queries the expressions are serialized to SQL strings
+    /// automatically (same as [`Select::Dynamic`]).
+    Expr(Vec<(String, datafusion_expr::Expr)>),
 }

 impl Select {
@@ -69,6 +88,29 @@ impl Select {
                .collect(),
        )
    }
+    /// Create a typed-expression projection.
+    ///
+    /// This is a convenience method for creating a [`Select::Expr`] variant from
+    /// a slice of `(name, expr)` pairs where each `expr` is a [`datafusion_expr::Expr`].
+    ///
+    /// # Example
+    /// ```
+    /// use lancedb::expr::{col, lit};
+    /// use lancedb::query::Select;
+    ///
+    /// let selection = Select::expr_projection(&[
+    ///     ("id", col("id")),
+    ///     ("id2", col("id") * lit(2)),
+    /// ]);
+    /// ```
+    pub fn expr_projection(columns: &[(impl AsRef<str>, datafusion_expr::Expr)]) -> Self {
+        Self::Expr(
+            columns
+                .iter()
+                .map(|(name, expr)| (name.as_ref().to_string(), expr.clone()))
+                .collect(),
+        )
+    }
 }

 /// A trait for converting a type to a query vector
@@ -1591,6 +1633,58 @@ mod tests {
        });
    }

+    #[tokio::test]
+    async fn test_select_with_expr_projection() {
+        // Mirrors test_select_with_transform but uses Select::Expr instead of Select::Dynamic
+        let tmp_dir = tempdir().unwrap();
+        let dataset_path = tmp_dir.path().join("test_expr.lance");
+        let uri = dataset_path.to_str().unwrap();
+
+        let batches = make_non_empty_batches();
+        let conn = connect(uri).execute().await.unwrap();
+        let table = conn
+            .create_table("my_table", batches)
+            .execute()
+            .await
+            .unwrap();
+
+        use crate::expr::{col, lit};
+        let query = table.query().limit(10).select(Select::expr_projection(&[
+            ("id2", col("id") * lit(2i32)),
+            ("id", col("id")),
+        ]));
+
+        let schema = query.output_schema().await.unwrap();
+        assert_eq!(
+            schema,
+            Arc::new(ArrowSchema::new(vec![
+                ArrowField::new("id2", DataType::Int32, true),
+                ArrowField::new("id", DataType::Int32, true),
+            ]))
+        );
+
+        let result = query.execute().await;
+        let mut batches = result
+            .expect("should have result")
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+        assert_eq!(batches.len(), 1);
+        let batch = batches.pop().unwrap();
+
+        // id and id2
+        assert_eq!(batch.num_columns(), 2);
+
+        let id: &Int32Array = batch.column_by_name("id").unwrap().as_primitive();
+        let id2: &Int32Array = batch.column_by_name("id2").unwrap().as_primitive();
+
+        id.iter().zip(id2.iter()).for_each(|(id, id2)| {
+            let id = id.unwrap();
+            let id2 = id2.unwrap();
+            assert_eq!(id * 2, id2);
+        });
+    }
+
    #[tokio::test]
    async fn test_execute_no_vector() {
        // TODO: Switch back to memory://foo after https://github.com/lancedb/lancedb/issues/1051
--- a/rust/lancedb/src/remote/client.rs
+++ b/rust/lancedb/src/remote/client.rs
@@ -426,14 +426,11 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
                })?,
            );
        }
-        if db_prefix.is_some() {
+        if let Some(prefix) = db_prefix {
            headers.insert(
                HeaderName::from_static("x-lancedb-database-prefix"),
-                HeaderValue::from_str(db_prefix.unwrap()).map_err(|_| Error::InvalidInput {
-                    message: format!(
-                        "non-ascii database prefix '{}' provided",
-                        db_prefix.unwrap()
-                    ),
+                HeaderValue::from_str(prefix).map_err(|_| Error::InvalidInput {
+                    message: format!("non-ascii database prefix '{}' provided", prefix),
                })?,
            );
        }
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -477,6 +477,16 @@ impl<S: HttpSend> RemoteTable<S> {
                    }));
                body["columns"] = alias_map.into();
            }
+            Select::Expr(pairs) => {
+                let alias_map: Result<serde_json::Map<String, serde_json::Value>> = pairs
+                    .iter()
+                    .map(|(name, expr)| {
+                        expr_to_sql_string(expr)
+                            .map(|sql| (name.clone(), serde_json::Value::String(sql)))
+                    })
+                    .collect();
+                body["columns"] = alias_map?.into();
+            }
        }

        if params.fast_search {
@@ -1645,10 +1655,33 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
        Ok(())
    }

-    async fn prewarm_index(&self, _index_name: &str) -> Result<()> {
-        Err(Error::NotSupported {
-            message: "prewarm_index is not yet supported on LanceDB cloud.".into(),
-        })
+    async fn prewarm_index(&self, index_name: &str) -> Result<()> {
+        let request = self.client.post(&format!(
+            "/v1/table/{}/index/{}/prewarm/",
+            self.identifier, index_name
+        ));
+        let (request_id, response) = self.send(request, true).await?;
+        if response.status() == StatusCode::NOT_FOUND {
+            return Err(Error::IndexNotFound {
+                name: index_name.to_string(),
+            });
+        }
+        self.check_table_response(&request_id, response).await?;
+        Ok(())
+    }
+
+    async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()> {
+        let mut request = self.client.post(&format!(
+            "/v1/table/{}/page_cache/prewarm/",
+            self.identifier
+        ));
+        let body = serde_json::json!({
+            "columns": columns.unwrap_or_default(),
+        });
+        request = request.json(&body);
+        let (request_id, response) = self.send(request, true).await?;
+        self.check_table_response(&request_id, response).await?;
+        Ok(())
    }

    async fn table_definition(&self) -> Result<TableDefinition> {
@@ -3529,6 +3562,64 @@ mod tests {
        assert_eq!(result.version, if old_server { 0 } else { 43 });
    }

+    #[tokio::test]
+    async fn test_prewarm_index() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(request.method(), "POST");
+            assert_eq!(
+                request.url().path(),
+                "/v1/table/my_table/index/my_index/prewarm/"
+            );
+            http::Response::builder().status(200).body("{}").unwrap()
+        });
+        table.prewarm_index("my_index").await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_prewarm_index_not_found() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(
+                request.url().path(),
+                "/v1/table/my_table/index/my_index/prewarm/"
+            );
+            http::Response::builder().status(404).body("{}").unwrap()
+        });
+        let e = table.prewarm_index("my_index").await.unwrap_err();
+        assert!(matches!(e, Error::IndexNotFound { .. }));
+    }
+
+    #[tokio::test]
+    async fn test_prewarm_data() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(request.method(), "POST");
+            assert_eq!(
+                request.url().path(),
+                "/v1/table/my_table/page_cache/prewarm/"
+            );
+            http::Response::builder().status(200).body("{}").unwrap()
+        });
+        table.prewarm_data(None).await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_prewarm_data_with_columns() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(request.method(), "POST");
+            assert_eq!(
+                request.url().path(),
+                "/v1/table/my_table/page_cache/prewarm/"
+            );
+            let body = request.body().unwrap().as_bytes().unwrap();
+            let body: serde_json::Value = serde_json::from_slice(body).unwrap();
+            assert_eq!(body["columns"], serde_json::json!(["col_a", "col_b"]));
+            http::Response::builder().status(200).body("{}").unwrap()
+        });
+        table
+            .prewarm_data(Some(vec!["col_a".into(), "col_b".into()]))
+            .await
+            .unwrap();
+    }
+
    #[tokio::test]
    async fn test_drop_index() {
        let table = Table::new_with_handler("my_table", |request| {
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -277,8 +277,13 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
    async fn list_indices(&self) -> Result<Vec<IndexConfig>>;
    /// Drop an index from the table.
    async fn drop_index(&self, name: &str) -> Result<()>;
-    /// Prewarm an index in the table
+    /// Prewarm an index in the table.
    async fn prewarm_index(&self, name: &str) -> Result<()>;
+    /// Prewarm data for the table.
+    ///
+    /// Currently only supported on remote tables.
+    /// If `columns` is `None`, all columns are prewarmed.
+    async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()>;
    /// Get statistics about the index.
    async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>>;
    /// Merge insert new records into the table.
@@ -946,17 +951,7 @@ impl Table {
    ///  * Prune: Removes old versions of the dataset
    ///  * Index: Optimizes the indices, adding new data to existing indices
    ///
-    /// <section class="warning">Experimental API</section>
-    ///
-    /// The optimization process is undergoing active development and may change.
-    /// Our goal with these changes is to improve the performance of optimization and
-    /// reduce the complexity.
-    ///
-    /// That being said, it is essential today to run optimize if you want the best
-    /// performance.  It should be stable and safe to use in production, but it our
-    /// hope that the API may be simplified (or not even need to be called) in the future.
-    ///
-    /// The frequency an application shoudl call optimize is based on the frequency of
+    /// The frequency an application should call optimize is based on the frequency of
    /// data modifications.  If data is frequently added, deleted, or updated then
    /// optimize should be run frequently.  A good rule of thumb is to run optimize if
    /// you have added or modified 100,000 or more records or run more than 20 data
@@ -1123,22 +1118,45 @@ impl Table {
        self.inner.drop_index(name).await
    }

-    /// Prewarm an index in the table
+    /// Prewarm an index in the table.
    ///
-    /// This is a hint to fully load the index into memory.  It can be used to
-    /// avoid cold starts
+    /// This is a hint to the database that the index will be accessed in the
+    /// future and should be loaded into memory if possible.  This can reduce
+    /// cold-start latency for subsequent queries.
+    ///
+    /// This call initiates prewarming and returns once the request is accepted.
+    /// It is idempotent and safe to call from multiple clients concurrently.
    ///
    /// It is generally wasteful to call this if the index does not fit into the
-    /// available cache.
-    ///
-    /// Note: This function is not yet supported on all indices, in which case it
-    /// may do nothing.
+    /// available cache.  Not all index types support prewarming; unsupported
+    /// indices will silently ignore the request.
    ///
    /// Use [`Self::list_indices()`] to find the names of the indices.
    pub async fn prewarm_index(&self, name: &str) -> Result<()> {
        self.inner.prewarm_index(name).await
    }

+    /// Prewarm data for the table.
+    ///
+    /// This is a hint to the database that the given columns will be accessed in
+    /// the future and the database should prefetch the data if possible.  This
+    /// can reduce cold-start latency for subsequent queries.  Currently only
+    /// supported on remote tables.
+    ///
+    /// This call initiates prewarming and returns once the request is accepted.
+    /// It is idempotent and safe to call from multiple clients concurrently —
+    /// calling it on already-prewarmed columns is a no-op on the server.
+    ///
+    /// This operation has a large upfront cost but can speed up future queries
+    /// that need to fetch the given columns.  Large columns such as embeddings
+    /// or binary data may not be practical to prewarm.  This feature is intended
+    /// for workloads that issue many queries against the same columns.
+    ///
+    /// If `columns` is `None`, all columns are prewarmed.
+    pub async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()> {
+        self.inner.prewarm_data(columns).await
+    }
+
    /// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
    /// are not fully indexed within the timeout.
    pub async fn wait_for_index(
@@ -2290,6 +2308,12 @@ impl BaseTable for NativeTable {
        Ok(dataset.prewarm_index(index_name).await?)
    }

+    async fn prewarm_data(&self, _columns: Option<Vec<String>>) -> Result<()> {
+        Err(Error::NotSupported {
+            message: "prewarm_data is currently only supported on remote tables.".into(),
+        })
+    }
+
    async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult> {
        // Delegate to the submodule implementation
        update::execute_update(self, update).await
--- a/rust/lancedb/src/table/optimize.rs
+++ b/rust/lancedb/src/table/optimize.rs
@@ -64,6 +64,9 @@ pub enum OptimizeAction {
        older_than: Option<Duration>,
        /// Because they may be part of an in-progress transaction, files newer than 7 days old are not deleted by default.
        /// If you are sure that there are no in-progress transactions, then you can set this to True to delete all files older than `older_than`.
+        ///
+        /// **WARNING**: This should only be set to true if you can guarantee that no other process is
+        /// currently working on this dataset.  Otherwise the dataset could be put into a corrupted state.
        delete_unverified: Option<bool>,
        /// If true, an error will be returned if there are any old versions that are still tagged.
        error_if_tagged_old_versions: Option<bool>,
@@ -117,6 +120,10 @@ pub(crate) async fn optimize_indices(table: &NativeTable, options: &OptimizeOpti
 ///   If you are sure that there are no in-progress transactions, then you
 ///   can set this to True to delete all files older than `older_than`.
 ///
+///   **WARNING**: This should only be set to true if you can guarantee that
+///   no other process is currently working on this dataset.  Otherwise the
+///   dataset could be put into a corrupted state.
+///
 /// This calls into [lance::dataset::Dataset::cleanup_old_versions] and
 /// returns the result.
 pub(crate) async fn cleanup_old_versions(
--- a/rust/lancedb/src/table/query.rs
+++ b/rust/lancedb/src/table/query.rs
@@ -186,6 +186,13 @@ pub async fn create_plan(
        Select::Dynamic(ref select_with_transform) => {
            scanner.project_with_transform(select_with_transform.as_slice())?;
        }
+        Select::Expr(ref expr_pairs) => {
+            let sql_pairs: crate::Result<Vec<(String, String)>> = expr_pairs
+                .iter()
+                .map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
+                .collect();
+            scanner.project_with_transform(sql_pairs?.as_slice())?;
+        }
        Select::All => {}
    }

@@ -340,6 +347,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
                                .to_string(),
                    });
                }
+                Select::Expr(pairs) => {
+                    let sql_pairs: crate::Result<Vec<(String, String)>> = pairs
+                        .iter()
+                        .map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
+                        .collect();
+                    let sql_pairs = sql_pairs?;
+                    Some(Box::new(QueryTableRequestColumns {
+                        column_names: None,
+                        column_aliases: Some(sql_pairs.into_iter().collect()),
+                    }))
+                }
            };

            // Check for unsupported features
@@ -411,6 +429,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
                            .to_string(),
                    });
                }
+                Select::Expr(pairs) => {
+                    let sql_pairs: crate::Result<Vec<(String, String)>> = pairs
+                        .iter()
+                        .map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
+                        .collect();
+                    let sql_pairs = sql_pairs?;
+                    Some(Box::new(QueryTableRequestColumns {
+                        column_names: None,
+                        column_aliases: Some(sql_pairs.into_iter().collect()),
+                    }))
+                }
            };

            // Handle full text search if present
Author	SHA1	Message	Date
lancedb automation	ef46e49876	chore: update lance dependency to v4.0.0-beta.12	2026-03-13 21:56:59 +00:00
Pratik Dey	d1d720d08a	feat(nodejs): support field/data type input in add_columns() method (#3114 ) Add support for passing field/data type information into add_columns() method, bringing parity with Python bindings. The method now accepts: - AddColumnsSql[] - SQL expressions (existing functionality) - Field - single Arrow field with explicit data type - Field[] - array of Arrow fields with explicit data types - Schema - Arrow schema with explicit data types New columns added via Field/Schema are initialized with null values. All field-based columns must be nullable due to null initialization. Resolves #3107 --------- Signed-off-by: Pratik <pratikrocks.dey11@gmail.com> Co-authored-by: Claude <noreply@anthropic.com>	2026-03-13 12:57:14 -07:00
Mesut-Doner	c2e543f1b7	feat(rust): support Expr in projection query (#3069 ) Referred and followed [`Select::Dynamic`] implementation. Closes #3039	2026-03-13 12:54:26 -07:00
Weston Pace	216c1b5f77	docs: remove experimental label from optimize and warn about delete_unverified (#3128 ) ## Summary - Removes the "Experimental API" section from `optimize` method documentation across Rust, Python, and TypeScript - Adds a warning to `delete_unverified` documentation in all bindings: this should only be set to true if you can guarantee no other process is working on the dataset, otherwise it could be corrupted - Fixes a typo ("shoudl" → "should") Closes #3125 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-12 14:37:42 +08:00
Xin Sun	fc1867da83	chore: remove the duplicate snafu-derive dependency in the lockfile (#3124 )	2026-03-10 21:43:51 -07:00
Esteban Gutierrez	f951da2b00	feat: support prewarm_index and prewarm_data on remote tables (#3110 ) ## Summary - Implement `RemoteTable.prewarm_data(columns)` calling `POST /v1/table/{id}/page_cache/prewarm/` - Implement `RemoteTable.prewarm_index(name)` calling `POST /v1/table/{id}/index/{name}/prewarm/` (previously returned `NotSupported`) - Add `BaseTable::prewarm_data(columns)` trait method and `Table` public API in Rust core - Add PyO3 bindings and Python API (`AsyncTable`, `LanceTable`, `RemoteTable`) for `prewarm_data` - Add type stubs for `prewarm_index` and `prewarm_data` in `_lancedb.pyi` - Upgrade Lance to 3.0.0-rc.3 with breaking change fixes Co-authored-by: Will Jones <willjones127@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-10 15:39:39 -05:00