Compare commits

..

2 Commits

Author SHA1 Message Date
Jack Ye
78c2cb3a9d fix: also update Java lance-core version in codex bump workflow
Add step to update the <lance-core.version> property in java/pom.xml
when bumping Lance dependencies.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 13:23:18 -08:00
Jack Ye
f8bf9a63a1 fix: avoid force push in codex workflows to work with v0.95.0 git safety
Codex CLI v0.95.0 hardened git command safety so force push (`git push -f`)
now requires approval, which blocks it in non-interactive exec mode. Replace
force push with `gh api` branch deletion followed by regular `git push`.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 13:10:09 -08:00
24 changed files with 130 additions and 762 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.26.1"
current_version = "0.25.0-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -318,7 +318,7 @@ jobs:
- name: Setup node
uses: actions/setup-node@v4
with:
node-version: 24
node-version: 20
cache: npm
cache-dependency-path: nodejs/package-lock.json
registry-url: "https://registry.npmjs.org"
@@ -350,7 +350,6 @@ jobs:
env:
DRY_RUN: ${{ !startsWith(github.ref, 'refs/tags/v') }}
run: |
npm config set provenance true
ARGS="--access public"
if [[ $DRY_RUN == "true" ]]; then
ARGS="$ARGS --dry-run"

91
Cargo.lock generated
View File

@@ -3072,9 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f03a771ab914e207dd26bd2f12666839555ec8ecc7e1770e1ed6f9900d899a4"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4405,9 +4404,8 @@ dependencies = [
[[package]]
name = "lance"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47b685aca3f97ee02997c83ded16f59c747ccb69e74c8abbbae4aa3d22cf1301"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow",
"arrow-arith",
@@ -4472,9 +4470,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf00c7537df524cc518a089f0d156a036d95ca3f5bc2bc1f0a9f9293e9b62ef"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4493,9 +4490,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46752e4ac8fc5590a445e780b63a8800adc7a770bd74770a8dc66963778e4e77"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrayref",
"paste",
@@ -4504,9 +4500,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d13d87d07305c6d4b4dc7780fb1107babf782a0e5b1dc7872e17ae1f8fd11ca"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4543,9 +4538,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6451b5af876eaef8bec4b38a39dadac9d44621e1ecf85d0cdf6097a5d0aa8721"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow",
"arrow-array",
@@ -4575,9 +4569,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1736708dd7867dfbab8fcc930b21c96717c6c00be73b7d9a240336a4ed80375"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow",
"arrow-array",
@@ -4595,9 +4588,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6b6ca4ff94833240d5ba4a94a742cba786d1949b3c3fa7e11d6f0050443432a"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4634,9 +4626,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55fbe959bffe185543aed3cbeb14484f1aa2e55886034fdb1ea3d8cc9b70aad8"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4668,9 +4659,8 @@ dependencies = [
[[package]]
name = "lance-geo"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a52b0adabc953d457f336a784a3b37353a180e6a79905f544949746e0d4c6483"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"datafusion",
"geo-traits",
@@ -4684,9 +4674,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b67654bf86fd942dd2cf08294ee7e91053427cd148225f49c9ff398ff9a40fd"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow",
"arrow-arith",
@@ -4753,9 +4742,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eb0ccc1c414e31687d83992d546af0a0237c8d2f4bf2ae3d347d539fd0fc141"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow",
"arrow-arith",
@@ -4795,9 +4783,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "083404cf12dcdb1a7df98fb58f9daf626b6e43a2f794b37b6b89b4012a0e1f78"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4813,9 +4800,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c12778d2aabf9c2bfd16e2509ebe120e562a288d8ae630ec6b6b204868df41b2"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow",
"async-trait",
@@ -4827,9 +4813,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8863aababdd13a6d2c8d6179dc6981f4f8f49d8b66a00c5dd75115aec4cadc99"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4872,9 +4857,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0fcc83f197ce2000c4abe4f5e0873490ab1f41788fa76571c4209b87d4daf50"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow",
"arrow-array",
@@ -4913,9 +4897,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fb1f7c7e06f91360e141ecee1cf2110f858c231705f69f2cd2fda9e30c1e9f4"
version = "2.0.0-rc.4"
source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-rc.4#584c470f69334600cae384f1ac30bf13f8a6959a"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4926,7 +4909,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.26.0"
version = "0.25.0-beta.0"
dependencies = [
"ahash",
"anyhow",
@@ -5006,7 +4989,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.26.0"
version = "0.25.0-beta.0"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -5026,7 +5009,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.29.0"
version = "0.27.0"
dependencies = [
"arrow",
"async-trait",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.88.0"
[workspace.dependencies]
lance = { "version" = "=2.0.0", default-features = false }
lance-core = "=2.0.0"
lance-datagen = "=2.0.0"
lance-file = "=2.0.0"
lance-io = { "version" = "=2.0.0", default-features = false }
lance-index = "=2.0.0"
lance-linalg = "=2.0.0"
lance-namespace = "=2.0.0"
lance-namespace-impls = { "version" = "=2.0.0", default-features = false }
lance-table = "=2.0.0"
lance-testing = "=2.0.0"
lance-datafusion = "=2.0.0"
lance-encoding = "=2.0.0"
lance-arrow = "=2.0.0"
lance = { "version" = "=2.0.0-rc.4", default-features = false, "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-rc.4", default-features = false, "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-rc.4", default-features = false, "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.26.1</version>
<version>0.25.0-beta.0</version>
</dependency>
```

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.26.1-final.0</version>
<version>0.25.0-beta.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.26.1-final.0</version>
<version>0.25.0-beta.0</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>2.0.0</lance-core.version>
<lance-core.version>1.0.4</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
@@ -292,12 +292,11 @@
<plugin>
<groupId>org.sonatype.central</groupId>
<artifactId>central-publishing-maven-plugin</artifactId>
<version>0.8.0</version>
<version>0.4.0</version>
<extensions>true</extensions>
<configuration>
<publishingServerId>ossrh</publishingServerId>
<tokenAuth>true</tokenAuth>
<autoPublish>true</autoPublish>
</configuration>
</plugin>
<plugin>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.26.1"
version = "0.25.0-beta.0"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.26.1",
"version": "0.25.0-beta.0",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -0,0 +1,3 @@
# `@lancedb/lancedb-darwin-x64`
This is the **x86_64-apple-darwin** binary for `@lancedb/lancedb`

View File

@@ -0,0 +1,12 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.25.0-beta.0",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",
"files": ["lancedb.darwin-x64.node"],
"license": "Apache-2.0",
"engines": {
"node": ">= 18"
}
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.26.1",
"version": "0.25.0-beta.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.26.1",
"version": "0.25.0-beta.0",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.26.1",
"version": "0.25.0-beta.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.26.1",
"version": "0.25.0-beta.0",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.26.1",
"version": "0.25.0-beta.0",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.26.1",
"version": "0.25.0-beta.0",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.26.0",
"version": "0.25.0-beta.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.26.0",
"version": "0.25.0-beta.0",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.26.1",
"version": "0.25.0-beta.0",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",
@@ -25,6 +25,7 @@
"triples": {
"defaults": false,
"additional": [
"x86_64-apple-darwin",
"aarch64-apple-darwin",
"x86_64-unknown-linux-gnu",
"aarch64-unknown-linux-gnu",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.29.1"
current_version = "0.28.0-beta.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.29.1"
version = "0.27.0"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.26.1"
version = "0.25.0-beta.0"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -79,7 +79,6 @@ pub mod datafusion;
pub(crate) mod dataset;
pub mod delete;
pub mod merge;
pub mod schema_evolution;
pub mod update;
use crate::index::waiter::wait_for_index;
@@ -92,7 +91,6 @@ pub use lance::dataset::scanner::DatasetRecordBatchStream;
use lance::dataset::statistics::DatasetStatisticsExt;
use lance_index::frag_reuse::FRAG_REUSE_INDEX_NAME;
pub use lance_index::optimize::OptimizeOptions;
pub use schema_evolution::{AddColumnsResult, AlterColumnsResult, DropColumnsResult};
use serde_with::skip_serializing_none;
pub use update::{UpdateBuilder, UpdateResult};
@@ -398,6 +396,33 @@ pub struct MergeResult {
pub num_attempts: u32,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct AddColumnsResult {
// The commit version associated with the operation.
// A version of `0` indicates compatibility with legacy servers that do not return
/// a commit version.
#[serde(default)]
pub version: u64,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct AlterColumnsResult {
// The commit version associated with the operation.
// A version of `0` indicates compatibility with legacy servers that do not return
/// a commit version.
#[serde(default)]
pub version: u64,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct DropColumnsResult {
// The commit version associated with the operation.
// A version of `0` indicates compatibility with legacy servers that do not return
/// a commit version.
#[serde(default)]
pub version: u64,
}
/// A trait for anything "table-like". This is used for both native tables (which target
/// Lance datasets) and remote tables (which target LanceDB cloud)
///
@@ -3066,15 +3091,27 @@ impl BaseTable for NativeTable {
transforms: NewColumnTransform,
read_columns: Option<Vec<String>>,
) -> Result<AddColumnsResult> {
schema_evolution::execute_add_columns(self, transforms, read_columns).await
let mut dataset = self.dataset.get_mut().await?;
dataset.add_columns(transforms, read_columns, None).await?;
Ok(AddColumnsResult {
version: dataset.version().version,
})
}
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<AlterColumnsResult> {
schema_evolution::execute_alter_columns(self, alterations).await
let mut dataset = self.dataset.get_mut().await?;
dataset.alter_columns(alterations).await?;
Ok(AlterColumnsResult {
version: dataset.version().version,
})
}
async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
schema_evolution::execute_drop_columns(self, columns).await
let mut dataset = self.dataset.get_mut().await?;
dataset.drop_columns(columns).await?;
Ok(DropColumnsResult {
version: dataset.version().version,
})
}
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {

View File

@@ -1,666 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
//! Schema evolution operations for LanceDB tables.
//!
//! This module provides functionality to modify the schema of existing tables:
//! - [`add_columns`](execute_add_columns): Add new columns using SQL expressions
//! - [`alter_columns`](execute_alter_columns): Rename columns, change types, or modify nullability
//! - [`drop_columns`](execute_drop_columns): Remove columns from the table
use lance::dataset::{ColumnAlteration, NewColumnTransform};
use serde::{Deserialize, Serialize};
use super::NativeTable;
use crate::Result;
/// The result of an add columns operation.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct AddColumnsResult {
// The commit version associated with the operation.
// A version of `0` indicates compatibility with legacy servers that do not return
/// a commit version.
#[serde(default)]
pub version: u64,
}
/// The result of an alter columns operation.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct AlterColumnsResult {
// The commit version associated with the operation.
// A version of `0` indicates compatibility with legacy servers that do not return
/// a commit version.
#[serde(default)]
pub version: u64,
}
/// The result of a drop columns operation.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct DropColumnsResult {
// The commit version associated with the operation.
// A version of `0` indicates compatibility with legacy servers that do not return
/// a commit version.
#[serde(default)]
pub version: u64,
}
/// Internal implementation of the add columns logic.
///
/// Adds new columns to the table using the provided transforms.
pub(crate) async fn execute_add_columns(
table: &NativeTable,
transforms: NewColumnTransform,
read_columns: Option<Vec<String>>,
) -> Result<AddColumnsResult> {
let mut dataset = table.dataset.get_mut().await?;
dataset.add_columns(transforms, read_columns, None).await?;
Ok(AddColumnsResult {
version: dataset.version().version,
})
}
/// Internal implementation of the alter columns logic.
///
/// Alters existing columns in the table (rename, change type, or modify nullability).
pub(crate) async fn execute_alter_columns(
table: &NativeTable,
alterations: &[ColumnAlteration],
) -> Result<AlterColumnsResult> {
let mut dataset = table.dataset.get_mut().await?;
dataset.alter_columns(alterations).await?;
Ok(AlterColumnsResult {
version: dataset.version().version,
})
}
/// Internal implementation of the drop columns logic.
///
/// Removes columns from the table.
pub(crate) async fn execute_drop_columns(
table: &NativeTable,
columns: &[&str],
) -> Result<DropColumnsResult> {
let mut dataset = table.dataset.get_mut().await?;
dataset.drop_columns(columns).await?;
Ok(DropColumnsResult {
version: dataset.version().version,
})
}
#[cfg(test)]
mod tests {
use arrow_array::{record_batch, Int32Array, RecordBatchIterator, StringArray};
use arrow_schema::DataType;
use futures::TryStreamExt;
use lance::dataset::ColumnAlteration;
use crate::connect;
use crate::query::{ExecutableQuery, QueryBase, Select};
use crate::table::NewColumnTransform;
// Add Columns Tests
#[tokio::test]
async fn test_add_columns_with_sql_expression() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(("id", Int32, [1, 2, 3, 4, 5])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_add_columns",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
let initial_version = table.version().await.unwrap();
// Add a computed column
let result = table
.add_columns(
NewColumnTransform::SqlExpressions(vec![("doubled".into(), "id * 2".into())]),
None,
)
.await
.unwrap();
// Version should increment
assert!(result.version > initial_version);
// Verify the new column exists with correct values
let batches = table
.query()
.select(Select::columns(&["id", "doubled"]))
.execute()
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap();
let batch = &batches[0];
let ids: Vec<i32> = batch
.column(0)
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.iter()
.map(|v| v.unwrap())
.collect();
let doubled: Vec<i32> = batch
.column(1)
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.iter()
.map(|v| v.unwrap())
.collect();
for (id, d) in ids.iter().zip(doubled.iter()) {
assert_eq!(*d, id * 2);
}
}
#[tokio::test]
async fn test_add_multiple_columns() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(("x", Int32, [10, 20, 30])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_add_multi_columns",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
// Add multiple columns at once
table
.add_columns(
NewColumnTransform::SqlExpressions(vec![
("y".into(), "x + 1".into()),
("z".into(), "x * x".into()),
]),
None,
)
.await
.unwrap();
// Verify schema has all columns
let schema = table.schema().await.unwrap();
assert_eq!(schema.fields().len(), 3);
assert!(schema.field_with_name("x").is_ok());
assert!(schema.field_with_name("y").is_ok());
assert!(schema.field_with_name("z").is_ok());
}
#[tokio::test]
async fn test_add_column_with_constant_expression() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_add_const_column",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
// Add a column with a constant value
table
.add_columns(
NewColumnTransform::SqlExpressions(vec![("constant".into(), "42".into())]),
None,
)
.await
.unwrap();
let schema = table.schema().await.unwrap();
assert!(schema.field_with_name("constant").is_ok());
// Verify all values are 42
let batches = table
.query()
.select(Select::columns(&["constant"]))
.execute()
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap();
let batch = &batches[0];
let values = batch["constant"]
.as_any()
.downcast_ref::<arrow_array::Int64Array>()
.unwrap()
.values();
assert!(values.iter().all(|&v| v == 42));
}
// Alter Columns Tests
#[tokio::test]
async fn test_alter_column_rename() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(("old_name", Int32, [1, 2, 3])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_alter_rename",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
let initial_version = table.version().await.unwrap();
// Rename the column
let result = table
.alter_columns(&[ColumnAlteration::new("old_name".into()).rename("new_name".into())])
.await
.unwrap();
// Version should increment
assert!(result.version > initial_version);
// Verify rename
let schema = table.schema().await.unwrap();
assert!(schema.field_with_name("old_name").is_err());
assert!(schema.field_with_name("new_name").is_ok());
}
#[tokio::test]
async fn test_alter_column_set_nullable() {
use arrow_array::RecordBatch;
use arrow_schema::{Field, Schema};
use std::sync::Arc;
let conn = connect("memory://").execute().await.unwrap();
// Create a schema with a non-nullable field
let schema = Arc::new(Schema::new(vec![Field::new(
"value",
DataType::Int32,
false,
)]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let table = conn
.create_table(
"test_alter_nullable",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
// Initially non-nullable
let schema = table.schema().await.unwrap();
assert!(!schema.field_with_name("value").unwrap().is_nullable());
// Make it nullable
table
.alter_columns(&[ColumnAlteration::new("value".into()).set_nullable(true)])
.await
.unwrap();
// Verify it's now nullable
let schema = table.schema().await.unwrap();
assert!(schema.field_with_name("value").unwrap().is_nullable());
}
#[tokio::test]
async fn test_alter_column_cast_type() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(("num", Int32, [1, 2, 3])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_cast_type",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
// Cast Int32 to Int64 (a supported cast)
table
.alter_columns(&[ColumnAlteration::new("num".into()).cast_to(DataType::Int64)])
.await
.unwrap();
// Verify type changed
let schema = table.schema().await.unwrap();
assert_eq!(
schema.field_with_name("num").unwrap().data_type(),
&DataType::Int64
);
// Query the data and verify the returned type is correct
let batches = table
.query()
.execute()
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap();
let batch = &batches[0];
let values = batch["num"]
.as_any()
.downcast_ref::<arrow_array::Int64Array>()
.unwrap()
.values();
assert_eq!(values.as_ref(), &[1i64, 2, 3]);
}
#[tokio::test]
async fn test_alter_column_invalid_cast_fails() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(("num", Int32, [1, 2, 3])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_invalid_cast",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
// Casting Int32 to Float64 is not supported
let result = table
.alter_columns(&[ColumnAlteration::new("num".into()).cast_to(DataType::Float64)])
.await;
let err = result.unwrap_err();
assert!(
err.to_string().contains("cast"),
"Expected error message to contain 'cast', got: {}",
err
);
}
#[tokio::test]
async fn test_alter_multiple_columns() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(("a", Int32, [1, 2, 3]), ("b", Int32, [4, 5, 6])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_alter_multi",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
// Alter multiple columns at once
table
.alter_columns(&[
ColumnAlteration::new("a".into()).rename("alpha".into()),
ColumnAlteration::new("b".into()).set_nullable(true),
])
.await
.unwrap();
let schema = table.schema().await.unwrap();
assert!(schema.field_with_name("alpha").is_ok());
assert!(schema.field_with_name("a").is_err());
assert!(schema.field_with_name("b").unwrap().is_nullable());
}
// Drop Columns Tests
#[tokio::test]
async fn test_drop_single_column() {
let conn = connect("memory://").execute().await.unwrap();
let batch =
record_batch!(("keep", Int32, [1, 2, 3]), ("remove", Int32, [4, 5, 6])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_drop_single",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
let initial_version = table.version().await.unwrap();
// Drop a column
let result = table.drop_columns(&["remove"]).await.unwrap();
// Version should increment
assert!(result.version > initial_version);
// Verify column was dropped
let schema = table.schema().await.unwrap();
assert_eq!(schema.fields().len(), 1);
assert!(schema.field_with_name("keep").is_ok());
assert!(schema.field_with_name("remove").is_err());
}
#[tokio::test]
async fn test_drop_multiple_columns() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(
("a", Int32, [1, 2]),
("b", Int32, [3, 4]),
("c", Int32, [5, 6]),
("d", Int32, [7, 8])
)
.unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_drop_multi",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
// Drop multiple columns
table.drop_columns(&["b", "d"]).await.unwrap();
// Verify only a and c remain
let schema = table.schema().await.unwrap();
assert_eq!(schema.fields().len(), 2);
assert!(schema.field_with_name("a").is_ok());
assert!(schema.field_with_name("c").is_ok());
assert!(schema.field_with_name("b").is_err());
assert!(schema.field_with_name("d").is_err());
}
#[tokio::test]
async fn test_drop_column_preserves_data() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(
("id", Int32, [1, 2, 3]),
("name", Utf8, ["a", "b", "c"]),
("extra", Int32, [10, 20, 30])
)
.unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_drop_preserves",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
// Drop the extra column
table.drop_columns(&["extra"]).await.unwrap();
// Verify remaining data is intact
let batches = table
.query()
.execute()
.await
.unwrap()
.try_collect::<Vec<_>>()
.await
.unwrap();
let batch = &batches[0];
assert_eq!(batch.num_columns(), 2);
assert_eq!(batch.num_rows(), 3);
let ids: Vec<i32> = batch
.column(0)
.as_any()
.downcast_ref::<Int32Array>()
.unwrap()
.iter()
.map(|v| v.unwrap())
.collect();
assert_eq!(ids, vec![1, 2, 3]);
let names: Vec<&str> = batch
.column(1)
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.iter()
.map(|v| v.unwrap())
.collect();
assert_eq!(names, vec!["a", "b", "c"]);
}
// Error Case Tests
#[tokio::test]
async fn test_drop_nonexistent_column_fails() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(("existing", Int32, [1, 2, 3])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_drop_nonexistent",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
// Try to drop a column that doesn't exist
let result = table.drop_columns(&["nonexistent"]).await;
let err = result.unwrap_err();
assert!(
err.to_string().contains("nonexistent"),
"Expected error message to contain column name 'nonexistent', got: {}",
err
);
}
#[tokio::test]
async fn test_alter_nonexistent_column_fails() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(("existing", Int32, [1, 2, 3])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_alter_nonexistent",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
// Try to alter a column that doesn't exist
let result = table
.alter_columns(&[ColumnAlteration::new("nonexistent".into()).rename("new".into())])
.await;
let err = result.unwrap_err();
assert!(
err.to_string().contains("nonexistent"),
"Expected error message to contain column name 'nonexistent', got: {}",
err
);
}
// Version Tracking Tests
#[tokio::test]
async fn test_schema_operations_increment_version() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(("a", Int32, [1, 2, 3]), ("b", Int32, [4, 5, 6])).unwrap();
let schema = batch.schema();
let table = conn
.create_table(
"test_version_increment",
RecordBatchIterator::new(vec![Ok(batch)], schema),
)
.execute()
.await
.unwrap();
let v1 = table.version().await.unwrap();
// Add column increments version
let add_result = table
.add_columns(
NewColumnTransform::SqlExpressions(vec![("c".into(), "a + b".into())]),
None,
)
.await
.unwrap();
assert!(add_result.version > v1);
let v2 = table.version().await.unwrap();
assert_eq!(add_result.version, v2);
// Alter column increments version
let alter_result = table
.alter_columns(&[ColumnAlteration::new("c".into()).rename("sum".into())])
.await
.unwrap();
assert!(alter_result.version > v2);
let v3 = table.version().await.unwrap();
assert_eq!(alter_result.version, v3);
// Drop column increments version
let drop_result = table.drop_columns(&["b"]).await.unwrap();
assert!(drop_result.version > v3);
let v4 = table.version().await.unwrap();
assert_eq!(drop_result.version, v4);
}
}