Bump version: 0.30.0-beta.6 → 0.30.0

Bump version: 0.30.0-beta.5 → 0.30.0-beta.6
feat: update lance dependency to 3.0.0 release (#3137 )
2026-03-26 02:20:40 +00:00 · 2026-03-16 22:46:19 +00:00 · 2026-03-16 22:46:17 +00:00 · 2026-03-16 15:29:18 -07:00 · 2026-03-16 14:02:19 -07:00 · 2026-03-16 09:02:11 -07:00
28 changed files with 705 additions and 178 deletions
--- a/.github/workflows/nodejs.yml
+++ b/.github/workflows/nodejs.yml
@@ -7,6 +7,7 @@ on:
  pull_request:
    paths:
      - Cargo.toml
+      - Cargo.lock
      - nodejs/**
      - rust/**
      - docs/src/js/**
--- a/.github/workflows/npm-publish.yml
+++ b/.github/workflows/npm-publish.yml
@@ -19,6 +19,7 @@ on:
    paths:
      - .github/workflows/npm-publish.yml
      - Cargo.toml # Change in dependency frequently breaks builds
+      - Cargo.lock

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -124,7 +125,12 @@ jobs:
            pre_build: |-
              set -e &&
              apt-get update &&
-              apt-get install -y protobuf-compiler pkg-config
+              apt-get install -y protobuf-compiler pkg-config &&
+              # The base image (manylinux2014-cross) sets TARGET_CC to the old
+              # GCC 4.8 cross-compiler. aws-lc-sys checks TARGET_CC before CC,
+              # so it picks up GCC even though the napi-rs image sets CC=clang.
+              # Override to use the image's clang-18 which supports -fuse-ld=lld.
+              export TARGET_CC=clang TARGET_CXX=clang++
          - target: x86_64-unknown-linux-musl
            # This one seems to need some extra memory
            host: ubuntu-2404-8x-x64
@@ -144,9 +150,10 @@ jobs:
              set -e &&
              apt-get update &&
              apt-get install -y protobuf-compiler pkg-config &&
-              # https://github.com/aws/aws-lc-rs/issues/737#issuecomment-2725918627
-              ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc/aarch64-unknown-linux-gnu/4.8.5/crtbeginS.o /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/crtbeginS.o &&
-              ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/gcc &&
+              export TARGET_CC=clang TARGET_CXX=clang++ &&
+              # The manylinux2014 sysroot has glibc 2.17 headers which lack
+              # AT_HWCAP2 (added in Linux 3.17). Define it for aws-lc-sys.
+              export CFLAGS="$CFLAGS -DAT_HWCAP2=26" &&
              rustup target add aarch64-unknown-linux-gnu
          - target: aarch64-unknown-linux-musl
            host: ubuntu-2404-8x-x64
@@ -266,7 +273,7 @@ jobs:
          - target: x86_64-unknown-linux-gnu
            host: ubuntu-latest
          - target: aarch64-unknown-linux-gnu
-            host: buildjet-16vcpu-ubuntu-2204-arm
+            host: ubuntu-2404-8x-arm64
        node:
          - '20'
    runs-on: ${{ matrix.settings.host }}
--- a/.github/workflows/pypi-publish.yml
+++ b/.github/workflows/pypi-publish.yml
@@ -9,6 +9,7 @@ on:
    paths:
      - .github/workflows/pypi-publish.yml
      - Cargo.toml # Change in dependency frequently breaks builds
+      - Cargo.lock

 env:
  PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -7,6 +7,7 @@ on:
  pull_request:
    paths:
      - Cargo.toml
+      - Cargo.lock
      - python/**
      - rust/**
      - .github/workflows/python.yml
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -7,6 +7,7 @@ on:
  pull_request:
    paths:
      - Cargo.toml
+      - Cargo.lock
      - rust/**
      - .github/workflows/rust.yml

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3070,8 +3070,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"

 [[package]]
 name = "fsst"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ae4126c38f86d37d5479295c135a1b81688b6c799d6c39d44b1855f9a0e712c"
 dependencies = [
 "arrow-array",
 "rand 0.9.2",
@@ -3852,7 +3853,7 @@ dependencies = [
 "libc",
 "percent-encoding",
 "pin-project-lite",
- "socket2 0.5.10",
+ "socket2 0.6.0",
 "system-configuration",
 "tokio",
 "tower-service",
@@ -4241,8 +4242,9 @@ dependencies = [

 [[package]]
 name = "lance"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45c38e7c7c448a77203b50c05f5bf308cadb3fe074e7dde22e4302206ea44d7a"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4308,8 +4310,9 @@ dependencies = [

 [[package]]
 name = "lance-arrow"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "174bae71821e5535a594f9ecd64b07e6ffe729498a5d27a0ca926b4ff2714664"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4329,8 +4332,9 @@ dependencies = [

 [[package]]
 name = "lance-bitpacking"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4494187b4244fa56c8cf911d7358e5322fa1cf7d8f6a213b3155a4139eb556b1"
 dependencies = [
 "arrayref",
 "paste",
@@ -4339,8 +4343,9 @@ dependencies = [

 [[package]]
 name = "lance-core"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce0a5d4427c42f7d9302771bb2aa474b09316a8919633abad0030c4d58013e89"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4377,8 +4382,9 @@ dependencies = [

 [[package]]
 name = "lance-datafusion"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61c6151ee46a35886ac6c804f870de2992c36de4198da1c64d806529f246d2d7"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4408,8 +4414,9 @@ dependencies = [

 [[package]]
 name = "lance-datagen"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d130ec0426173daeb14a6032d18a1eb8c1f8858dca3f42735f0d2c7dde3c47f"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4427,8 +4434,9 @@ dependencies = [

 [[package]]
 name = "lance-encoding"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "965281449dc6b47d4669e11572f7b2a5e0e6b206dc1f21bce0f3b5f7dc533ece"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4465,8 +4473,9 @@ dependencies = [

 [[package]]
 name = "lance-file"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c8230f9a2e63170eef3d4f8f7576593a73ff8a2f8d5a75400e511e74d4d308f"
 dependencies = [
 "arrow-arith",
 "arrow-array",
@@ -4498,8 +4507,9 @@ dependencies = [

 [[package]]
 name = "lance-index"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "950a0bc24e01044fc86260c54e810f2d051660d89caa727234f09b252446c425"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4562,8 +4572,9 @@ dependencies = [

 [[package]]
 name = "lance-io"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "023188a98822bc87c87726893fbd6c1deea2dccf5c4214051b789b6047bdc2a4"
 dependencies = [
 "arrow",
 "arrow-arith",
@@ -4604,8 +4615,9 @@ dependencies = [

 [[package]]
 name = "lance-linalg"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc4b0c9892f985dac4d27c058d21b6adfda5c73a7aaf697a92f300f36995ded"
 dependencies = [
 "arrow-array",
 "arrow-buffer",
@@ -4621,8 +4633,9 @@ dependencies = [

 [[package]]
 name = "lance-namespace"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c8aab88d6c91b045ac3d3967c73b547d9fed5559a91a907f3f4586d0f2d6cc6"
 dependencies = [
 "arrow",
 "async-trait",
@@ -4634,8 +4647,9 @@ dependencies = [

 [[package]]
 name = "lance-namespace-impls"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0beb555ea8ac3bfc220519585ffaea34caa61cc24edce533b98f48474042e3f0"
 dependencies = [
 "arrow",
 "arrow-ipc",
@@ -4679,8 +4693,9 @@ dependencies = [

 [[package]]
 name = "lance-table"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7511e4e951d3938316b16f1a64360cfae0b44d22d9c5aca971ed7b5bc83caea7"
 dependencies = [
 "arrow",
 "arrow-array",
@@ -4719,8 +4734,9 @@ dependencies = [

 [[package]]
 name = "lance-testing"
-version = "3.0.0-rc.3"
-source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3de153d46ba1c1fc6dbd4c93ed8b0b99b9001902d5df1159d855ef9fd2613591"
 dependencies = [
 "arrow-array",
 "arrow-schema",
@@ -6443,7 +6459,7 @@ version = "0.14.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
 dependencies = [
- "heck 0.4.1",
+ "heck 0.5.0",
 "itertools 0.14.0",
 "log",
 "multimap",
@@ -6632,7 +6648,7 @@ dependencies = [
 "quinn-udp",
 "rustc-hash",
 "rustls 0.23.31",
- "socket2 0.5.10",
+ "socket2 0.6.0",
 "thiserror 2.0.17",
 "tokio",
 "tracing",
@@ -6669,7 +6685,7 @@ dependencies = [
 "cfg_aliases",
 "libc",
 "once_cell",
- "socket2 0.5.10",
+ "socket2 0.6.0",
 "tracing",
 "windows-sys 0.60.2",
 ]
@@ -7735,7 +7751,7 @@ version = "0.8.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451"
 dependencies = [
- "heck 0.4.1",
+ "heck 0.5.0",
 "proc-macro2",
 "quote",
 "syn 2.0.114",
@@ -7747,7 +7763,7 @@ version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40"
 dependencies = [
- "heck 0.4.1",
+ "heck 0.5.0",
 "proc-macro2",
 "quote",
 "syn 2.0.114",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
 rust-version = "1.91.0"

 [workspace.dependencies]
-lance = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-core = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-datagen = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-file = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-io = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-index = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-linalg = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-namespace-impls = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-table = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-testing = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-datafusion = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-encoding = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
-lance-arrow = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
+lance = { version = "=3.0.0", default-features = false }
+lance-core = { version = "=3.0.0" }
+lance-datagen = { version = "=3.0.0" }
+lance-file = { version = "=3.0.0" }
+lance-io = { version = "=3.0.0", default-features = false }
+lance-index = { version = "=3.0.0" }
+lance-linalg = { version = "=3.0.0" }
+lance-namespace = { version = "=3.0.0" }
+lance-namespace-impls = { version = "=3.0.0", default-features = false }
+lance-table = { version = "=3.0.0" }
+lance-testing = { version = "=3.0.0" }
+lance-datafusion = { version = "=3.0.0" }
+lance-encoding = { version = "=3.0.0" }
+lance-arrow = { version = "=3.0.0" }
 ahash = "0.8"
 # Note that this one does not include pyarrow
 arrow = { version = "57.2", optional = false }
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,7 +1,7 @@
 version: "3.9"
 services:
  localstack:
-    image: localstack/localstack:3.3
+    image: localstack/localstack:4.0
    ports:
      - 4566:4566
    environment:
--- a/docs/src/js/classes/Table.md
+++ b/docs/src/js/classes/Table.md
@@ -71,11 +71,12 @@ Add new columns with defined values.

 #### Parameters

-* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
-    pairs of column names and
-    the SQL expression to use to calculate the value of the new column. These
-    expressions will be evaluated for each row in the table, and can
-    reference existing columns in the table.
+* **newColumnTransforms**: `Field`&lt;`any`&gt; \| `Field`&lt;`any`&gt;[] \| `Schema`&lt;`any`&gt; \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
+    Either:
+    - An array of objects with column names and SQL expressions to calculate values
+    - A single Arrow Field defining one column with its data type (column will be initialized with null values)
+    - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
+    - An Arrow Schema defining columns with their data types (columns will be initialized with null values)

 #### Returns

@@ -484,19 +485,7 @@ Modeled after ``VACUUM`` in PostgreSQL.
 - Prune: Removes old versions of the dataset
 - Index: Optimizes the indices, adding new data to existing indices

- Experimental API
- ----------------
-
- The optimization process is undergoing active development and may change.
- Our goal with these changes is to improve the performance of optimization and
- reduce the complexity.
-
- That being said, it is essential today to run optimize if you want the best
- performance.  It should be stable and safe to use in production, but it our
- hope that the API may be simplified (or not even need to be called) in the
- future.
-
- The frequency an application shoudl call optimize is based on the frequency of
+ The frequency an application should call optimize is based on the frequency of
 data modifications.  If data is frequently added, deleted, or updated then
 optimize should be run frequently.  A good rule of thumb is to run optimize if
 you have added or modified 100,000 or more records or run more than 20 data
--- a/docs/src/js/interfaces/OptimizeOptions.md
+++ b/docs/src/js/interfaces/OptimizeOptions.md
@@ -37,3 +37,12 @@ tbl.optimize({cleanupOlderThan: new Date()});
 ```ts
 deleteUnverified: boolean;
 ```
+
+Because they may be part of an in-progress transaction, files newer than
+7 days old are not deleted by default. If you are sure that there are no
+in-progress transactions, then you can set this to true to delete all
+files older than `cleanupOlderThan`.
+
+**WARNING**: This should only be set to true if you can guarantee that
+no other process is currently working on this dataset. Otherwise the
+dataset could be put into a corrupted state.
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -1259,6 +1259,98 @@ describe("schema evolution", function () {
    expect(await table.schema()).toEqual(expectedSchema);
  });

+  it("can add columns with schema for explicit data types", async function () {
+    const con = await connect(tmpDir.name);
+    const table = await con.createTable("vectors", [
+      { id: 1n, vector: [0.1, 0.2] },
+    ]);
+
+    // Define schema for new columns with explicit data types
+    // Note: All columns must be nullable when using addColumns with Schema
+    // because they are initially populated with null values
+    const newColumnsSchema = new Schema([
+      new Field("price", new Float64(), true),
+      new Field("category", new Utf8(), true),
+      new Field("rating", new Int32(), true),
+    ]);
+
+    const result = await table.addColumns(newColumnsSchema);
+    expect(result).toHaveProperty("version");
+    expect(result.version).toBe(2);
+
+    const expectedSchema = new Schema([
+      new Field("id", new Int64(), true),
+      new Field(
+        "vector",
+        new FixedSizeList(2, new Field("item", new Float32(), true)),
+        true,
+      ),
+      new Field("price", new Float64(), true),
+      new Field("category", new Utf8(), true),
+      new Field("rating", new Int32(), true),
+    ]);
+    expect(await table.schema()).toEqual(expectedSchema);
+
+    // Verify that new columns are populated with null values
+    const results = await table.query().toArray();
+    expect(results).toHaveLength(1);
+    expect(results[0].price).toBeNull();
+    expect(results[0].category).toBeNull();
+    expect(results[0].rating).toBeNull();
+  });
+
+  it("can add a single column using Field", async function () {
+    const con = await connect(tmpDir.name);
+    const table = await con.createTable("vectors", [
+      { id: 1n, vector: [0.1, 0.2] },
+    ]);
+
+    // Add a single field
+    const priceField = new Field("price", new Float64(), true);
+    const result = await table.addColumns(priceField);
+    expect(result).toHaveProperty("version");
+    expect(result.version).toBe(2);
+
+    const expectedSchema = new Schema([
+      new Field("id", new Int64(), true),
+      new Field(
+        "vector",
+        new FixedSizeList(2, new Field("item", new Float32(), true)),
+        true,
+      ),
+      new Field("price", new Float64(), true),
+    ]);
+    expect(await table.schema()).toEqual(expectedSchema);
+  });
+
+  it("can add multiple columns using array of Fields", async function () {
+    const con = await connect(tmpDir.name);
+    const table = await con.createTable("vectors", [
+      { id: 1n, vector: [0.1, 0.2] },
+    ]);
+
+    // Add multiple fields as array
+    const fields = [
+      new Field("price", new Float64(), true),
+      new Field("category", new Utf8(), true),
+    ];
+    const result = await table.addColumns(fields);
+    expect(result).toHaveProperty("version");
+    expect(result.version).toBe(2);
+
+    const expectedSchema = new Schema([
+      new Field("id", new Int64(), true),
+      new Field(
+        "vector",
+        new FixedSizeList(2, new Field("item", new Float32(), true)),
+        true,
+      ),
+      new Field("price", new Float64(), true),
+      new Field("category", new Utf8(), true),
+    ]);
+    expect(await table.schema()).toEqual(expectedSchema);
+  });
+
  it("can alter the columns in the schema", async function () {
    const con = await connect(tmpDir.name);
    const schema = new Schema([
--- a/nodejs/lancedb/table.ts
+++ b/nodejs/lancedb/table.ts
@@ -5,12 +5,15 @@ import {
  Table as ArrowTable,
  Data,
  DataType,
+  Field,
  IntoVector,
  MultiVector,
  Schema,
  dataTypeToJson,
  fromDataToBuffer,
+  fromTableToBuffer,
  isMultiVector,
+  makeEmptyTable,
  tableFromIPC,
 } from "./arrow";

@@ -84,6 +87,16 @@ export interface OptimizeOptions {
   * tbl.optimize({cleanupOlderThan: new Date()});
   */
  cleanupOlderThan: Date;
+  /**
+   * Because they may be part of an in-progress transaction, files newer than
+   * 7 days old are not deleted by default. If you are sure that there are no
+   * in-progress transactions, then you can set this to true to delete all
+   * files older than `cleanupOlderThan`.
+   *
+   * **WARNING**: This should only be set to true if you can guarantee that
+   * no other process is currently working on this dataset. Otherwise the
+   * dataset could be put into a corrupted state.
+   */
  deleteUnverified: boolean;
 }

@@ -381,15 +394,16 @@ export abstract class Table {
  abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
  /**
   * Add new columns with defined values.
-   * @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
-   * the SQL expression to use to calculate the value of the new column. These
-   * expressions will be evaluated for each row in the table, and can
-   * reference existing columns in the table.
+   * @param {AddColumnsSql[] | Field | Field[] | Schema} newColumnTransforms Either:
+   *   - An array of objects with column names and SQL expressions to calculate values
+   *   - A single Arrow Field defining one column with its data type (column will be initialized with null values)
+   *   - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
+   *   - An Arrow Schema defining columns with their data types (columns will be initialized with null values)
   * @returns {Promise<AddColumnsResult>} A promise that resolves to an object
   * containing the new version number of the table after adding the columns.
   */
  abstract addColumns(
-    newColumnTransforms: AddColumnsSql[],
+    newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
  ): Promise<AddColumnsResult>;

  /**
@@ -501,19 +515,7 @@ export abstract class Table {
   *  - Index: Optimizes the indices, adding new data to existing indices
   *
   *
-   *  Experimental API
-   *  ----------------
-   *
-   *  The optimization process is undergoing active development and may change.
-   *  Our goal with these changes is to improve the performance of optimization and
-   *  reduce the complexity.
-   *
-   *  That being said, it is essential today to run optimize if you want the best
-   *  performance.  It should be stable and safe to use in production, but it our
-   *  hope that the API may be simplified (or not even need to be called) in the
-   *  future.
-   *
-   *  The frequency an application shoudl call optimize is based on the frequency of
+   *  The frequency an application should call optimize is based on the frequency of
   *  data modifications.  If data is frequently added, deleted, or updated then
   *  optimize should be run frequently.  A good rule of thumb is to run optimize if
   *  you have added or modified 100,000 or more records or run more than 20 data
@@ -806,9 +808,40 @@ export class LocalTable extends Table {
  // TODO: Support BatchUDF

  async addColumns(
-    newColumnTransforms: AddColumnsSql[],
+    newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
  ): Promise<AddColumnsResult> {
-    return await this.inner.addColumns(newColumnTransforms);
+    // Handle single Field -> convert to array of Fields
+    if (newColumnTransforms instanceof Field) {
+      newColumnTransforms = [newColumnTransforms];
+    }
+
+    // Handle array of Fields -> convert to Schema
+    if (
+      Array.isArray(newColumnTransforms) &&
+      newColumnTransforms.length > 0 &&
+      newColumnTransforms[0] instanceof Field
+    ) {
+      const fields = newColumnTransforms as Field[];
+      newColumnTransforms = new Schema(fields);
+    }
+
+    // Handle Schema -> use schema-based approach
+    if (newColumnTransforms instanceof Schema) {
+      const schema = newColumnTransforms;
+      // Convert schema to buffer using Arrow IPC format
+      const emptyTable = makeEmptyTable(schema);
+      const schemaBuf = await fromTableToBuffer(emptyTable);
+      return await this.inner.addColumnsWithSchema(schemaBuf);
+    }
+
+    // Handle SQL expressions (existing functionality)
+    if (Array.isArray(newColumnTransforms)) {
+      return await this.inner.addColumns(
+        newColumnTransforms as AddColumnsSql[],
+      );
+    }
+
+    throw new Error("Invalid input type for addColumns");
  }

  async alterColumns(
--- a/nodejs/src/table.rs
+++ b/nodejs/src/table.rs
@@ -3,7 +3,7 @@

 use std::collections::HashMap;

-use lancedb::ipc::ipc_file_to_batches;
+use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
 use lancedb::table::{
    AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
    OptimizeAction, OptimizeOptions, Table as LanceDbTable,
@@ -279,6 +279,23 @@ impl Table {
        Ok(res.into())
    }

+    #[napi(catch_unwind)]
+    pub async fn add_columns_with_schema(
+        &self,
+        schema_buf: Buffer,
+    ) -> napi::Result<AddColumnsResult> {
+        let schema = ipc_file_to_schema(schema_buf.to_vec())
+            .map_err(|e| napi::Error::from_reason(format!("Failed to read IPC schema: {}", e)))?;
+
+        let transforms = NewColumnTransform::AllNulls(schema);
+        let res = self
+            .inner_ref()?
+            .add_columns(transforms, None)
+            .await
+            .default_error()?;
+        Ok(res.into())
+    }
+
    #[napi(catch_unwind)]
    pub async fn alter_columns(
        &self,
--- a/python/.bumpversion.toml
+++ b/python/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.30.0-beta.5"
+current_version = "0.30.0"
 parse = """(?x)
    (?P<major>0|[1-9]\\d*)\\.
    (?P<minor>0|[1-9]\\d*)\\.
--- a/python/Cargo.toml
+++ b/python/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "lancedb-python"
-version = "0.30.0-beta.5"
+version = "0.30.0"
 edition.workspace = true
 description = "Python bindings for LanceDB"
 license.workspace = true
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -166,6 +166,8 @@ class Table:
    async def checkout(self, version: Union[int, str]): ...
    async def checkout_latest(self): ...
    async def restore(self, version: Optional[Union[int, str]] = None): ...
+    async def prewarm_index(self, index_name: str) -> None: ...
+    async def prewarm_data(self, columns: Optional[List[str]] = None) -> None: ...
    async def list_indices(self) -> list[IndexConfig]: ...
    async def delete(self, filter: str) -> DeleteResult: ...
    async def add_columns(self, columns: list[tuple[str, str]]) -> AddColumnsResult: ...
--- a/python/python/lancedb/remote/table.py
+++ b/python/python/lancedb/remote/table.py
@@ -640,6 +640,45 @@ class RemoteTable(Table):
    def drop_index(self, index_name: str):
        return LOOP.run(self._table.drop_index(index_name))

+    def prewarm_index(self, name: str) -> None:
+        """Prewarm an index in the table.
+
+        This is a hint to the database that the index will be accessed in the
+        future and should be loaded into memory if possible.  This can reduce
+        cold-start latency for subsequent queries.
+
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        Parameters
+        ----------
+        name: str
+            The name of the index to prewarm
+        """
+        return LOOP.run(self._table.prewarm_index(name))
+
+    def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
+        """Prewarm data for the table.
+
+        This is a hint to the database that the given columns will be accessed
+        in the future and the database should prefetch the data if possible.
+        Currently only supported on remote tables.
+
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        This operation has a large upfront cost but can speed up future queries
+        that need to fetch the given columns.  Large columns such as embeddings
+        or binary data may not be practical to prewarm.  This feature is intended
+        for workloads that issue many queries against the same columns.
+
+        Parameters
+        ----------
+        columns: list of str, optional
+            The columns to prewarm. If None, all columns are prewarmed.
+        """
+        return LOOP.run(self._table.prewarm_data(columns))
+
    def wait_for_index(
        self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
    ):
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -1506,22 +1506,17 @@ class Table(ABC):
            in-progress operation (e.g. appending new data) and these files will not
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
+
+            .. warning::
+
+                This should only be set to True if you can guarantee that no other
+                process is currently working on this dataset. Otherwise the dataset
+                could be put into a corrupted state.
+
        retrain: bool, default False
            This parameter is no longer used and is deprecated.

-        Experimental API
-        ----------------
-
-        The optimization process is undergoing active development and may change.
-        Our goal with these changes is to improve the performance of optimization and
-        reduce the complexity.
-
-        That being said, it is essential today to run optimize if you want the best
-        performance.  It should be stable and safe to use in production, but it our
-        hope that the API may be simplified (or not even need to be called) in the
-        future.
-
-        The frequency an application shoudl call optimize is based on the frequency of
+        The frequency an application should call optimize is based on the frequency of
        data modifications.  If data is frequently added, deleted, or updated then
        optimize should be run frequently.  A good rule of thumb is to run optimize if
        you have added or modified 100,000 or more records or run more than 20 data
@@ -2219,12 +2214,18 @@ class LanceTable(Table):

    def prewarm_index(self, name: str) -> None:
        """
-        Prewarms an index in the table
+        Prewarm an index in the table.

-        This loads the entire index into memory
+        This is a hint to the database that the index will be accessed in the
+        future and should be loaded into memory if possible.  This can reduce
+        cold-start latency for subsequent queries.

-        If the index does not fit into the available cache this call
-        may be wasteful
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        It is generally wasteful to call this if the index does not fit into the
+        available cache.  Not all index types support prewarming; unsupported
+        indices will silently ignore the request.

        Parameters
        ----------
@@ -2233,6 +2234,29 @@ class LanceTable(Table):
        """
        return LOOP.run(self._table.prewarm_index(name))

+    def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
+        """
+        Prewarm data for the table.
+
+        This is a hint to the database that the given columns will be accessed
+        in the future and the database should prefetch the data if possible.
+        Currently only supported on remote tables.
+
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        This operation has a large upfront cost but can speed up future queries
+        that need to fetch the given columns.  Large columns such as embeddings
+        or binary data may not be practical to prewarm.  This feature is intended
+        for workloads that issue many queries against the same columns.
+
+        Parameters
+        ----------
+        columns: list of str, optional
+            The columns to prewarm. If None, all columns are prewarmed.
+        """
+        return LOOP.run(self._table.prewarm_data(columns))
+
    def wait_for_index(
        self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
    ) -> None:
@@ -3018,22 +3042,17 @@ class LanceTable(Table):
            in-progress operation (e.g. appending new data) and these files will not
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
+
+            .. warning::
+
+                This should only be set to True if you can guarantee that no other
+                process is currently working on this dataset. Otherwise the dataset
+                could be put into a corrupted state.
+
        retrain: bool, default False
            This parameter is no longer used and is deprecated.

-        Experimental API
-        ----------------
-
-        The optimization process is undergoing active development and may change.
-        Our goal with these changes is to improve the performance of optimization and
-        reduce the complexity.
-
-        That being said, it is essential today to run optimize if you want the best
-        performance.  It should be stable and safe to use in production, but it our
-        hope that the API may be simplified (or not even need to be called) in the
-        future.
-
-        The frequency an application shoudl call optimize is based on the frequency of
+        The frequency an application should call optimize is based on the frequency of
        data modifications.  If data is frequently added, deleted, or updated then
        optimize should be run frequently.  A good rule of thumb is to run optimize if
        you have added or modified 100,000 or more records or run more than 20 data
@@ -3634,19 +3653,47 @@ class AsyncTable:
        """
        Prewarm an index in the table.

+        This is a hint to the database that the index will be accessed in the
+        future and should be loaded into memory if possible.  This can reduce
+        cold-start latency for subsequent queries.
+
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        It is generally wasteful to call this if the index does not fit into the
+        available cache.  Not all index types support prewarming; unsupported
+        indices will silently ignore the request.
+
        Parameters
        ----------
        name: str
            The name of the index to prewarm
-
-        Notes
-        -----
-        This will load the index into memory.  This may reduce the cold-start time for
-        future queries.  If the index does not fit in the cache then this call may be
-        wasteful.
        """
        await self._inner.prewarm_index(name)

+    async def prewarm_data(self, columns: Optional[List[str]] = None) -> None:
+        """
+        Prewarm data for the table.
+
+        This is a hint to the database that the given columns will be accessed
+        in the future and the database should prefetch the data if possible.
+        Currently only supported on remote tables.
+
+        This call initiates prewarming and returns once the request is accepted.
+        It is idempotent and safe to call from multiple clients concurrently.
+
+        This operation has a large upfront cost but can speed up future queries
+        that need to fetch the given columns.  Large columns such as embeddings
+        or binary data may not be practical to prewarm.  This feature is intended
+        for workloads that issue many queries against the same columns.
+
+        Parameters
+        ----------
+        columns: list of str, optional
+            The columns to prewarm. If None, all columns are prewarmed.
+        """
+        await self._inner.prewarm_data(columns)
+
    async def wait_for_index(
        self, index_names: Iterable[str], timeout: timedelta = timedelta(seconds=300)
    ) -> None:
@@ -4573,22 +4620,17 @@ class AsyncTable:
            in-progress operation (e.g. appending new data) and these files will not
            be deleted unless they are at least 7 days old. If delete_unverified is True
            then these files will be deleted regardless of their age.
+
+            .. warning::
+
+                This should only be set to True if you can guarantee that no other
+                process is currently working on this dataset. Otherwise the dataset
+                could be put into a corrupted state.
+
        retrain: bool, default False
            This parameter is no longer used and is deprecated.

-        Experimental API
-        ----------------
-
-        The optimization process is undergoing active development and may change.
-        Our goal with these changes is to improve the performance of optimization and
-        reduce the complexity.
-
-        That being said, it is essential today to run optimize if you want the best
-        performance.  It should be stable and safe to use in production, but it our
-        hope that the API may be simplified (or not even need to be called) in the
-        future.
-
-        The frequency an application shoudl call optimize is based on the frequency of
+        The frequency an application should call optimize is based on the frequency of
        data modifications.  If data is frequently added, deleted, or updated then
        optimize should be run frequently.  A good rule of thumb is to run optimize if
        you have added or modified 100,000 or more records or run more than 20 data
--- a/python/python/tests/test_namespace_integration.py
+++ b/python/python/tests/test_namespace_integration.py
@@ -147,7 +147,12 @@ class TrackingNamespace(LanceNamespace):
        This simulates a credential rotation system where each call returns
        new credentials that expire after credential_expires_in_seconds.
        """
-        modified = copy.deepcopy(storage_options) if storage_options else {}
+        # Start from base storage options (endpoint, region, allow_http, etc.)
+        # because DirectoryNamespace returns None for storage_options from
+        # describe_table/declare_table when no credential vendor is configured.
+        modified = copy.deepcopy(self.base_storage_options)
+        if storage_options:
+            modified.update(storage_options)

        # Increment credentials to simulate rotation
        modified["aws_access_key_id"] = f"AKID_{count}"
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -316,6 +316,19 @@ impl<'py> IntoPyObject<'py> for PySelect {
            Select::All => Ok(py.None().into_bound(py).into_any()),
            Select::Columns(columns) => Ok(columns.into_pyobject(py)?.into_any()),
            Select::Dynamic(columns) => Ok(columns.into_pyobject(py)?.into_any()),
+            Select::Expr(pairs) => {
+                // Serialize DataFusion Expr -> SQL string so Python sees the same
+                // format as Select::Dynamic: a list of (name, sql_string) tuples.
+                let sql_pairs: PyResult<Vec<(String, String)>> = pairs
+                    .into_iter()
+                    .map(|(name, expr)| {
+                        lancedb::expr::expr_to_sql_string(&expr)
+                            .map(|sql| (name, sql))
+                            .map_err(|e| PyRuntimeError::new_err(e.to_string()))
+                    })
+                    .collect();
+                Ok(sql_pairs?.into_pyobject(py)?.into_any())
+            }
        }
    }
 }
--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -426,6 +426,17 @@ impl Table {
        })
    }

+    pub fn prewarm_data(
+        self_: PyRef<'_, Self>,
+        columns: Option<Vec<String>>,
+    ) -> PyResult<Bound<'_, PyAny>> {
+        let inner = self_.inner_ref()?.clone();
+        future_into_py(self_.py(), async move {
+            inner.prewarm_data(columns).await.infer_error()?;
+            Ok(())
+        })
+    }
+
    pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
        let inner = self_.inner_ref()?.clone();
        future_into_py(self_.py(), async move {
--- a/rust/lancedb/src/dataloader/permutation/reader.rs
+++ b/rust/lancedb/src/dataloader/permutation/reader.rs
@@ -339,6 +339,12 @@ impl PermutationReader {
                }
                Ok(false)
            }
+            Select::Expr(columns) => {
+                // For Expr projections, we check if any alias is _rowid.
+                // We can't validate the expression itself (it may differ from _rowid)
+                // but we allow it through; the column will be included.
+                Ok(columns.iter().any(|(alias, _)| alias == ROW_ID))
+            }
        }
    }

--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -47,6 +47,25 @@ pub enum Select {
    ///
    /// See [`Query::select`] for more details and examples
    Dynamic(Vec<(String, String)>),
+    /// Advanced selection using type-safe DataFusion expressions
+    ///
+    /// Similar to [`Select::Dynamic`] but uses [`datafusion_expr::Expr`] instead of
+    /// raw SQL strings. Use [`crate::expr`] helpers to build expressions:
+    ///
+    /// ```
+    /// use lancedb::expr::{col, lit};
+    /// use lancedb::query::Select;
+    ///
+    /// // SELECT id, id * 2 AS id2 FROM ...
+    /// let selection = Select::expr_projection(&[
+    ///     ("id", col("id")),
+    ///     ("id2", col("id") * lit(2)),
+    /// ]);
+    /// ```
+    ///
+    /// Note: For remote/server-side queries the expressions are serialized to SQL strings
+    /// automatically (same as [`Select::Dynamic`]).
+    Expr(Vec<(String, datafusion_expr::Expr)>),
 }

 impl Select {
@@ -69,6 +88,29 @@ impl Select {
                .collect(),
        )
    }
+    /// Create a typed-expression projection.
+    ///
+    /// This is a convenience method for creating a [`Select::Expr`] variant from
+    /// a slice of `(name, expr)` pairs where each `expr` is a [`datafusion_expr::Expr`].
+    ///
+    /// # Example
+    /// ```
+    /// use lancedb::expr::{col, lit};
+    /// use lancedb::query::Select;
+    ///
+    /// let selection = Select::expr_projection(&[
+    ///     ("id", col("id")),
+    ///     ("id2", col("id") * lit(2)),
+    /// ]);
+    /// ```
+    pub fn expr_projection(columns: &[(impl AsRef<str>, datafusion_expr::Expr)]) -> Self {
+        Self::Expr(
+            columns
+                .iter()
+                .map(|(name, expr)| (name.as_ref().to_string(), expr.clone()))
+                .collect(),
+        )
+    }
 }

 /// A trait for converting a type to a query vector
@@ -1591,6 +1633,58 @@ mod tests {
        });
    }

+    #[tokio::test]
+    async fn test_select_with_expr_projection() {
+        // Mirrors test_select_with_transform but uses Select::Expr instead of Select::Dynamic
+        let tmp_dir = tempdir().unwrap();
+        let dataset_path = tmp_dir.path().join("test_expr.lance");
+        let uri = dataset_path.to_str().unwrap();
+
+        let batches = make_non_empty_batches();
+        let conn = connect(uri).execute().await.unwrap();
+        let table = conn
+            .create_table("my_table", batches)
+            .execute()
+            .await
+            .unwrap();
+
+        use crate::expr::{col, lit};
+        let query = table.query().limit(10).select(Select::expr_projection(&[
+            ("id2", col("id") * lit(2i32)),
+            ("id", col("id")),
+        ]));
+
+        let schema = query.output_schema().await.unwrap();
+        assert_eq!(
+            schema,
+            Arc::new(ArrowSchema::new(vec![
+                ArrowField::new("id2", DataType::Int32, true),
+                ArrowField::new("id", DataType::Int32, true),
+            ]))
+        );
+
+        let result = query.execute().await;
+        let mut batches = result
+            .expect("should have result")
+            .try_collect::<Vec<_>>()
+            .await
+            .unwrap();
+        assert_eq!(batches.len(), 1);
+        let batch = batches.pop().unwrap();
+
+        // id and id2
+        assert_eq!(batch.num_columns(), 2);
+
+        let id: &Int32Array = batch.column_by_name("id").unwrap().as_primitive();
+        let id2: &Int32Array = batch.column_by_name("id2").unwrap().as_primitive();
+
+        id.iter().zip(id2.iter()).for_each(|(id, id2)| {
+            let id = id.unwrap();
+            let id2 = id2.unwrap();
+            assert_eq!(id * 2, id2);
+        });
+    }
+
    #[tokio::test]
    async fn test_execute_no_vector() {
        // TODO: Switch back to memory://foo after https://github.com/lancedb/lancedb/issues/1051
--- a/rust/lancedb/src/remote/client.rs
+++ b/rust/lancedb/src/remote/client.rs
@@ -426,14 +426,11 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
                })?,
            );
        }
-        if db_prefix.is_some() {
+        if let Some(prefix) = db_prefix {
            headers.insert(
                HeaderName::from_static("x-lancedb-database-prefix"),
-                HeaderValue::from_str(db_prefix.unwrap()).map_err(|_| Error::InvalidInput {
-                    message: format!(
-                        "non-ascii database prefix '{}' provided",
-                        db_prefix.unwrap()
-                    ),
+                HeaderValue::from_str(prefix).map_err(|_| Error::InvalidInput {
+                    message: format!("non-ascii database prefix '{}' provided", prefix),
                })?,
            );
        }
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -477,6 +477,16 @@ impl<S: HttpSend> RemoteTable<S> {
                    }));
                body["columns"] = alias_map.into();
            }
+            Select::Expr(pairs) => {
+                let alias_map: Result<serde_json::Map<String, serde_json::Value>> = pairs
+                    .iter()
+                    .map(|(name, expr)| {
+                        expr_to_sql_string(expr)
+                            .map(|sql| (name.clone(), serde_json::Value::String(sql)))
+                    })
+                    .collect();
+                body["columns"] = alias_map?.into();
+            }
        }

        if params.fast_search {
@@ -1645,10 +1655,33 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
        Ok(())
    }

-    async fn prewarm_index(&self, _index_name: &str) -> Result<()> {
-        Err(Error::NotSupported {
-            message: "prewarm_index is not yet supported on LanceDB cloud.".into(),
-        })
+    async fn prewarm_index(&self, index_name: &str) -> Result<()> {
+        let request = self.client.post(&format!(
+            "/v1/table/{}/index/{}/prewarm/",
+            self.identifier, index_name
+        ));
+        let (request_id, response) = self.send(request, true).await?;
+        if response.status() == StatusCode::NOT_FOUND {
+            return Err(Error::IndexNotFound {
+                name: index_name.to_string(),
+            });
+        }
+        self.check_table_response(&request_id, response).await?;
+        Ok(())
+    }
+
+    async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()> {
+        let mut request = self.client.post(&format!(
+            "/v1/table/{}/page_cache/prewarm/",
+            self.identifier
+        ));
+        let body = serde_json::json!({
+            "columns": columns.unwrap_or_default(),
+        });
+        request = request.json(&body);
+        let (request_id, response) = self.send(request, true).await?;
+        self.check_table_response(&request_id, response).await?;
+        Ok(())
    }

    async fn table_definition(&self) -> Result<TableDefinition> {
@@ -3529,6 +3562,64 @@ mod tests {
        assert_eq!(result.version, if old_server { 0 } else { 43 });
    }

+    #[tokio::test]
+    async fn test_prewarm_index() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(request.method(), "POST");
+            assert_eq!(
+                request.url().path(),
+                "/v1/table/my_table/index/my_index/prewarm/"
+            );
+            http::Response::builder().status(200).body("{}").unwrap()
+        });
+        table.prewarm_index("my_index").await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_prewarm_index_not_found() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(
+                request.url().path(),
+                "/v1/table/my_table/index/my_index/prewarm/"
+            );
+            http::Response::builder().status(404).body("{}").unwrap()
+        });
+        let e = table.prewarm_index("my_index").await.unwrap_err();
+        assert!(matches!(e, Error::IndexNotFound { .. }));
+    }
+
+    #[tokio::test]
+    async fn test_prewarm_data() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(request.method(), "POST");
+            assert_eq!(
+                request.url().path(),
+                "/v1/table/my_table/page_cache/prewarm/"
+            );
+            http::Response::builder().status(200).body("{}").unwrap()
+        });
+        table.prewarm_data(None).await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_prewarm_data_with_columns() {
+        let table = Table::new_with_handler("my_table", |request| {
+            assert_eq!(request.method(), "POST");
+            assert_eq!(
+                request.url().path(),
+                "/v1/table/my_table/page_cache/prewarm/"
+            );
+            let body = request.body().unwrap().as_bytes().unwrap();
+            let body: serde_json::Value = serde_json::from_slice(body).unwrap();
+            assert_eq!(body["columns"], serde_json::json!(["col_a", "col_b"]));
+            http::Response::builder().status(200).body("{}").unwrap()
+        });
+        table
+            .prewarm_data(Some(vec!["col_a".into(), "col_b".into()]))
+            .await
+            .unwrap();
+    }
+
    #[tokio::test]
    async fn test_drop_index() {
        let table = Table::new_with_handler("my_table", |request| {
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -277,8 +277,13 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
    async fn list_indices(&self) -> Result<Vec<IndexConfig>>;
    /// Drop an index from the table.
    async fn drop_index(&self, name: &str) -> Result<()>;
-    /// Prewarm an index in the table
+    /// Prewarm an index in the table.
    async fn prewarm_index(&self, name: &str) -> Result<()>;
+    /// Prewarm data for the table.
+    ///
+    /// Currently only supported on remote tables.
+    /// If `columns` is `None`, all columns are prewarmed.
+    async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()>;
    /// Get statistics about the index.
    async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>>;
    /// Merge insert new records into the table.
@@ -946,17 +951,7 @@ impl Table {
    ///  * Prune: Removes old versions of the dataset
    ///  * Index: Optimizes the indices, adding new data to existing indices
    ///
-    /// <section class="warning">Experimental API</section>
-    ///
-    /// The optimization process is undergoing active development and may change.
-    /// Our goal with these changes is to improve the performance of optimization and
-    /// reduce the complexity.
-    ///
-    /// That being said, it is essential today to run optimize if you want the best
-    /// performance.  It should be stable and safe to use in production, but it our
-    /// hope that the API may be simplified (or not even need to be called) in the future.
-    ///
-    /// The frequency an application shoudl call optimize is based on the frequency of
+    /// The frequency an application should call optimize is based on the frequency of
    /// data modifications.  If data is frequently added, deleted, or updated then
    /// optimize should be run frequently.  A good rule of thumb is to run optimize if
    /// you have added or modified 100,000 or more records or run more than 20 data
@@ -1123,22 +1118,45 @@ impl Table {
        self.inner.drop_index(name).await
    }

-    /// Prewarm an index in the table
+    /// Prewarm an index in the table.
    ///
-    /// This is a hint to fully load the index into memory.  It can be used to
-    /// avoid cold starts
+    /// This is a hint to the database that the index will be accessed in the
+    /// future and should be loaded into memory if possible.  This can reduce
+    /// cold-start latency for subsequent queries.
+    ///
+    /// This call initiates prewarming and returns once the request is accepted.
+    /// It is idempotent and safe to call from multiple clients concurrently.
    ///
    /// It is generally wasteful to call this if the index does not fit into the
-    /// available cache.
-    ///
-    /// Note: This function is not yet supported on all indices, in which case it
-    /// may do nothing.
+    /// available cache.  Not all index types support prewarming; unsupported
+    /// indices will silently ignore the request.
    ///
    /// Use [`Self::list_indices()`] to find the names of the indices.
    pub async fn prewarm_index(&self, name: &str) -> Result<()> {
        self.inner.prewarm_index(name).await
    }

+    /// Prewarm data for the table.
+    ///
+    /// This is a hint to the database that the given columns will be accessed in
+    /// the future and the database should prefetch the data if possible.  This
+    /// can reduce cold-start latency for subsequent queries.  Currently only
+    /// supported on remote tables.
+    ///
+    /// This call initiates prewarming and returns once the request is accepted.
+    /// It is idempotent and safe to call from multiple clients concurrently —
+    /// calling it on already-prewarmed columns is a no-op on the server.
+    ///
+    /// This operation has a large upfront cost but can speed up future queries
+    /// that need to fetch the given columns.  Large columns such as embeddings
+    /// or binary data may not be practical to prewarm.  This feature is intended
+    /// for workloads that issue many queries against the same columns.
+    ///
+    /// If `columns` is `None`, all columns are prewarmed.
+    pub async fn prewarm_data(&self, columns: Option<Vec<String>>) -> Result<()> {
+        self.inner.prewarm_data(columns).await
+    }
+
    /// Poll until the columns are fully indexed. Will return Error::Timeout if the columns
    /// are not fully indexed within the timeout.
    pub async fn wait_for_index(
@@ -2290,6 +2308,12 @@ impl BaseTable for NativeTable {
        Ok(dataset.prewarm_index(index_name).await?)
    }

+    async fn prewarm_data(&self, _columns: Option<Vec<String>>) -> Result<()> {
+        Err(Error::NotSupported {
+            message: "prewarm_data is currently only supported on remote tables.".into(),
+        })
+    }
+
    async fn update(&self, update: UpdateBuilder) -> Result<UpdateResult> {
        // Delegate to the submodule implementation
        update::execute_update(self, update).await
--- a/rust/lancedb/src/table/optimize.rs
+++ b/rust/lancedb/src/table/optimize.rs
@@ -64,6 +64,9 @@ pub enum OptimizeAction {
        older_than: Option<Duration>,
        /// Because they may be part of an in-progress transaction, files newer than 7 days old are not deleted by default.
        /// If you are sure that there are no in-progress transactions, then you can set this to True to delete all files older than `older_than`.
+        ///
+        /// **WARNING**: This should only be set to true if you can guarantee that no other process is
+        /// currently working on this dataset.  Otherwise the dataset could be put into a corrupted state.
        delete_unverified: Option<bool>,
        /// If true, an error will be returned if there are any old versions that are still tagged.
        error_if_tagged_old_versions: Option<bool>,
@@ -117,6 +120,10 @@ pub(crate) async fn optimize_indices(table: &NativeTable, options: &OptimizeOpti
 ///   If you are sure that there are no in-progress transactions, then you
 ///   can set this to True to delete all files older than `older_than`.
 ///
+///   **WARNING**: This should only be set to true if you can guarantee that
+///   no other process is currently working on this dataset.  Otherwise the
+///   dataset could be put into a corrupted state.
+///
 /// This calls into [lance::dataset::Dataset::cleanup_old_versions] and
 /// returns the result.
 pub(crate) async fn cleanup_old_versions(
--- a/rust/lancedb/src/table/query.rs
+++ b/rust/lancedb/src/table/query.rs
@@ -186,6 +186,13 @@ pub async fn create_plan(
        Select::Dynamic(ref select_with_transform) => {
            scanner.project_with_transform(select_with_transform.as_slice())?;
        }
+        Select::Expr(ref expr_pairs) => {
+            let sql_pairs: crate::Result<Vec<(String, String)>> = expr_pairs
+                .iter()
+                .map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
+                .collect();
+            scanner.project_with_transform(sql_pairs?.as_slice())?;
+        }
        Select::All => {}
    }

@@ -340,6 +347,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
                                .to_string(),
                    });
                }
+                Select::Expr(pairs) => {
+                    let sql_pairs: crate::Result<Vec<(String, String)>> = pairs
+                        .iter()
+                        .map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
+                        .collect();
+                    let sql_pairs = sql_pairs?;
+                    Some(Box::new(QueryTableRequestColumns {
+                        column_names: None,
+                        column_aliases: Some(sql_pairs.into_iter().collect()),
+                    }))
+                }
            };

            // Check for unsupported features
@@ -411,6 +429,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
                            .to_string(),
                    });
                }
+                Select::Expr(pairs) => {
+                    let sql_pairs: crate::Result<Vec<(String, String)>> = pairs
+                        .iter()
+                        .map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
+                        .collect();
+                    let sql_pairs = sql_pairs?;
+                    Some(Box::new(QueryTableRequestColumns {
+                        column_names: None,
+                        column_aliases: Some(sql_pairs.into_iter().collect()),
+                    }))
+                }
            };

            // Handle full text search if present
Author	SHA1	Message	Date
Lance Release	c89240b16c	Bump version: 0.30.0-beta.6 → 0.30.0	2026-03-16 22:46:19 +00:00
Lance Release	099ff355a4	Bump version: 0.30.0-beta.5 → 0.30.0-beta.6	2026-03-16 22:46:17 +00:00
Weston Pace	c5995fda67	feat: update lance dependency to 3.0.0 release (#3137 ) ## Summary - Update all 14 lance crates from `3.0.0-rc.3` (git source) to `3.0.0` (crates.io release) - Remove git/tag source references since 3.0.0 is published on crates.io ## Test plan - [x] `cargo check --features remote --tests --examples` passes - [x] `cargo clippy --features remote --tests --examples` passes - [ ] CI passes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-16 15:29:18 -07:00
Weston Pace	25eb1fbfa4	fix: restore storage options on copy in localstack tests (#3148 )	2026-03-16 14:02:19 -07:00
Weston Pace	4ac41c5c3f	fix(ci): upgrade LocalStack to 4.0 for S3 integration tests (#3147 ) ## Summary - Upgrade LocalStack from 3.3 to 4.0 in `docker-compose.yml` to fix S3 integration test failures in CI - Version 3.3 has compatibility issues with newer Python 3.13 and updated boto3 dependencies - Matches the LocalStack version used successfully in the lance repository ## Test plan - [ ] Verify `docker compose up --detach --wait` completes successfully in CI - [ ] All tests in `test_s3.py` pass (5 tests) - [ ] All `@pytest.mark.s3_test` tests in `test_namespace_integration.py` pass (7 tests) - [ ] No regressions in non-integration test jobs (Mac, Windows) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-16 09:02:11 -07:00
Will Jones	9a5b0398ec	chore: fix ci (#3139 ) * Move away from buildjet, which is shutting down runners for GHA [^1] * Add `Cargo.lock` to build jobs, so when we upgrade locked dependencies we check the builds actually pass. CI started failing because dependencies were changed in #3116 without running all build jobs. * Add fixes for aws-lc-rs build in NodeJS. [^1]: https://buildjet.com/for-github-actions/blog/we-are-shutting-down --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>	2026-03-16 06:25:40 -07:00
Pratik Dey	d1d720d08a	feat(nodejs): support field/data type input in add_columns() method (#3114 ) Add support for passing field/data type information into add_columns() method, bringing parity with Python bindings. The method now accepts: - AddColumnsSql[] - SQL expressions (existing functionality) - Field - single Arrow field with explicit data type - Field[] - array of Arrow fields with explicit data types - Schema - Arrow schema with explicit data types New columns added via Field/Schema are initialized with null values. All field-based columns must be nullable due to null initialization. Resolves #3107 --------- Signed-off-by: Pratik <pratikrocks.dey11@gmail.com> Co-authored-by: Claude <noreply@anthropic.com>	2026-03-13 12:57:14 -07:00
Mesut-Doner	c2e543f1b7	feat(rust): support Expr in projection query (#3069 ) Referred and followed [`Select::Dynamic`] implementation. Closes #3039	2026-03-13 12:54:26 -07:00
Weston Pace	216c1b5f77	docs: remove experimental label from optimize and warn about delete_unverified (#3128 ) ## Summary - Removes the "Experimental API" section from `optimize` method documentation across Rust, Python, and TypeScript - Adds a warning to `delete_unverified` documentation in all bindings: this should only be set to true if you can guarantee no other process is working on the dataset, otherwise it could be corrupted - Fixes a typo ("shoudl" → "should") Closes #3125 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-12 14:37:42 +08:00
Xin Sun	fc1867da83	chore: remove the duplicate snafu-derive dependency in the lockfile (#3124 )	2026-03-10 21:43:51 -07:00
Esteban Gutierrez	f951da2b00	feat: support prewarm_index and prewarm_data on remote tables (#3110 ) ## Summary - Implement `RemoteTable.prewarm_data(columns)` calling `POST /v1/table/{id}/page_cache/prewarm/` - Implement `RemoteTable.prewarm_index(name)` calling `POST /v1/table/{id}/index/{name}/prewarm/` (previously returned `NotSupported`) - Add `BaseTable::prewarm_data(columns)` trait method and `Table` public API in Rust core - Add PyO3 bindings and Python API (`AsyncTable`, `LanceTable`, `RemoteTable`) for `prewarm_data` - Add type stubs for `prewarm_index` and `prewarm_data` in `_lancedb.pyi` - Upgrade Lance to 3.0.0-rc.3 with breaking change fixes Co-authored-by: Will Jones <willjones127@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>	2026-03-10 15:39:39 -05:00