mirror of
https://github.com/lancedb/lancedb.git
synced 2026-03-28 19:40:39 +00:00
Compare commits
6 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5cdb15feef | ||
|
|
7a3eea927f | ||
|
|
5dd9b072d8 | ||
|
|
6dde379d44 | ||
|
|
55f09ef1cd | ||
|
|
e9d8651d18 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.25.0-beta.0"
|
current_version = "0.26.0"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
3
.github/workflows/npm-publish.yml
vendored
3
.github/workflows/npm-publish.yml
vendored
@@ -318,7 +318,7 @@ jobs:
|
|||||||
- name: Setup node
|
- name: Setup node
|
||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 24
|
||||||
cache: npm
|
cache: npm
|
||||||
cache-dependency-path: nodejs/package-lock.json
|
cache-dependency-path: nodejs/package-lock.json
|
||||||
registry-url: "https://registry.npmjs.org"
|
registry-url: "https://registry.npmjs.org"
|
||||||
@@ -350,6 +350,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
DRY_RUN: ${{ !startsWith(github.ref, 'refs/tags/v') }}
|
DRY_RUN: ${{ !startsWith(github.ref, 'refs/tags/v') }}
|
||||||
run: |
|
run: |
|
||||||
|
npm config set provenance true
|
||||||
ARGS="--access public"
|
ARGS="--access public"
|
||||||
if [[ $DRY_RUN == "true" ]]; then
|
if [[ $DRY_RUN == "true" ]]; then
|
||||||
ARGS="$ARGS --dry-run"
|
ARGS="$ARGS --dry-run"
|
||||||
|
|||||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -4926,7 +4926,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.25.0-beta.0"
|
version = "0.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash",
|
"ahash",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@@ -5006,7 +5006,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
version = "0.25.0-beta.0"
|
version = "0.26.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow-array",
|
"arrow-array",
|
||||||
"arrow-ipc",
|
"arrow-ipc",
|
||||||
@@ -5026,7 +5026,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.28.0-beta.0"
|
version = "0.29.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-core</artifactId>
|
<artifactId>lancedb-core</artifactId>
|
||||||
<version>0.25.0-beta.0</version>
|
<version>0.26.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.25.0-beta.0</version>
|
<version>0.26.0-final.0</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.25.0-beta.0</version>
|
<version>0.26.0-final.0</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>${project.artifactId}</name>
|
<name>${project.artifactId}</name>
|
||||||
<description>LanceDB Java SDK Parent POM</description>
|
<description>LanceDB Java SDK Parent POM</description>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.25.0-beta.0"
|
version = "0.26.0"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.25.0-beta.0",
|
"version": "0.26.0",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.25.0-beta.0",
|
"version": "0.26.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.25.0-beta.0",
|
"version": "0.26.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.25.0-beta.0",
|
"version": "0.26.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.25.0-beta.0",
|
"version": "0.26.0",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.25.0-beta.0",
|
"version": "0.26.0",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.25.0-beta.0",
|
"version": "0.26.0",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.25.0-beta.0",
|
"version": "0.26.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.25.0-beta.0",
|
"version": "0.26.0",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.25.0-beta.0",
|
"version": "0.26.0",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.29.0"
|
current_version = "0.29.1"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.29.0"
|
version = "0.29.1"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.25.0-beta.0"
|
version = "0.26.0"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -79,6 +79,7 @@ pub mod datafusion;
|
|||||||
pub(crate) mod dataset;
|
pub(crate) mod dataset;
|
||||||
pub mod delete;
|
pub mod delete;
|
||||||
pub mod merge;
|
pub mod merge;
|
||||||
|
pub mod schema_evolution;
|
||||||
pub mod update;
|
pub mod update;
|
||||||
|
|
||||||
use crate::index::waiter::wait_for_index;
|
use crate::index::waiter::wait_for_index;
|
||||||
@@ -91,6 +92,7 @@ pub use lance::dataset::scanner::DatasetRecordBatchStream;
|
|||||||
use lance::dataset::statistics::DatasetStatisticsExt;
|
use lance::dataset::statistics::DatasetStatisticsExt;
|
||||||
use lance_index::frag_reuse::FRAG_REUSE_INDEX_NAME;
|
use lance_index::frag_reuse::FRAG_REUSE_INDEX_NAME;
|
||||||
pub use lance_index::optimize::OptimizeOptions;
|
pub use lance_index::optimize::OptimizeOptions;
|
||||||
|
pub use schema_evolution::{AddColumnsResult, AlterColumnsResult, DropColumnsResult};
|
||||||
use serde_with::skip_serializing_none;
|
use serde_with::skip_serializing_none;
|
||||||
pub use update::{UpdateBuilder, UpdateResult};
|
pub use update::{UpdateBuilder, UpdateResult};
|
||||||
|
|
||||||
@@ -396,33 +398,6 @@ pub struct MergeResult {
|
|||||||
pub num_attempts: u32,
|
pub num_attempts: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
|
||||||
pub struct AddColumnsResult {
|
|
||||||
// The commit version associated with the operation.
|
|
||||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
|
||||||
/// a commit version.
|
|
||||||
#[serde(default)]
|
|
||||||
pub version: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
|
||||||
pub struct AlterColumnsResult {
|
|
||||||
// The commit version associated with the operation.
|
|
||||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
|
||||||
/// a commit version.
|
|
||||||
#[serde(default)]
|
|
||||||
pub version: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
|
||||||
pub struct DropColumnsResult {
|
|
||||||
// The commit version associated with the operation.
|
|
||||||
// A version of `0` indicates compatibility with legacy servers that do not return
|
|
||||||
/// a commit version.
|
|
||||||
#[serde(default)]
|
|
||||||
pub version: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A trait for anything "table-like". This is used for both native tables (which target
|
/// A trait for anything "table-like". This is used for both native tables (which target
|
||||||
/// Lance datasets) and remote tables (which target LanceDB cloud)
|
/// Lance datasets) and remote tables (which target LanceDB cloud)
|
||||||
///
|
///
|
||||||
@@ -3091,27 +3066,15 @@ impl BaseTable for NativeTable {
|
|||||||
transforms: NewColumnTransform,
|
transforms: NewColumnTransform,
|
||||||
read_columns: Option<Vec<String>>,
|
read_columns: Option<Vec<String>>,
|
||||||
) -> Result<AddColumnsResult> {
|
) -> Result<AddColumnsResult> {
|
||||||
let mut dataset = self.dataset.get_mut().await?;
|
schema_evolution::execute_add_columns(self, transforms, read_columns).await
|
||||||
dataset.add_columns(transforms, read_columns, None).await?;
|
|
||||||
Ok(AddColumnsResult {
|
|
||||||
version: dataset.version().version,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<AlterColumnsResult> {
|
async fn alter_columns(&self, alterations: &[ColumnAlteration]) -> Result<AlterColumnsResult> {
|
||||||
let mut dataset = self.dataset.get_mut().await?;
|
schema_evolution::execute_alter_columns(self, alterations).await
|
||||||
dataset.alter_columns(alterations).await?;
|
|
||||||
Ok(AlterColumnsResult {
|
|
||||||
version: dataset.version().version,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
|
async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
|
||||||
let mut dataset = self.dataset.get_mut().await?;
|
schema_evolution::execute_drop_columns(self, columns).await
|
||||||
dataset.drop_columns(columns).await?;
|
|
||||||
Ok(DropColumnsResult {
|
|
||||||
version: dataset.version().version,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
|
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
|
||||||
|
|||||||
666
rust/lancedb/src/table/schema_evolution.rs
Normal file
666
rust/lancedb/src/table/schema_evolution.rs
Normal file
@@ -0,0 +1,666 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
//! Schema evolution operations for LanceDB tables.
|
||||||
|
//!
|
||||||
|
//! This module provides functionality to modify the schema of existing tables:
|
||||||
|
//! - [`add_columns`](execute_add_columns): Add new columns using SQL expressions
|
||||||
|
//! - [`alter_columns`](execute_alter_columns): Rename columns, change types, or modify nullability
|
||||||
|
//! - [`drop_columns`](execute_drop_columns): Remove columns from the table
|
||||||
|
|
||||||
|
use lance::dataset::{ColumnAlteration, NewColumnTransform};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use super::NativeTable;
|
||||||
|
use crate::Result;
|
||||||
|
|
||||||
|
/// The result of an add columns operation.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||||
|
pub struct AddColumnsResult {
|
||||||
|
// The commit version associated with the operation.
|
||||||
|
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||||
|
/// a commit version.
|
||||||
|
#[serde(default)]
|
||||||
|
pub version: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The result of an alter columns operation.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||||
|
pub struct AlterColumnsResult {
|
||||||
|
// The commit version associated with the operation.
|
||||||
|
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||||
|
/// a commit version.
|
||||||
|
#[serde(default)]
|
||||||
|
pub version: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The result of a drop columns operation.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||||
|
pub struct DropColumnsResult {
|
||||||
|
// The commit version associated with the operation.
|
||||||
|
// A version of `0` indicates compatibility with legacy servers that do not return
|
||||||
|
/// a commit version.
|
||||||
|
#[serde(default)]
|
||||||
|
pub version: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Internal implementation of the add columns logic.
|
||||||
|
///
|
||||||
|
/// Adds new columns to the table using the provided transforms.
|
||||||
|
pub(crate) async fn execute_add_columns(
|
||||||
|
table: &NativeTable,
|
||||||
|
transforms: NewColumnTransform,
|
||||||
|
read_columns: Option<Vec<String>>,
|
||||||
|
) -> Result<AddColumnsResult> {
|
||||||
|
let mut dataset = table.dataset.get_mut().await?;
|
||||||
|
dataset.add_columns(transforms, read_columns, None).await?;
|
||||||
|
Ok(AddColumnsResult {
|
||||||
|
version: dataset.version().version,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Internal implementation of the alter columns logic.
|
||||||
|
///
|
||||||
|
/// Alters existing columns in the table (rename, change type, or modify nullability).
|
||||||
|
pub(crate) async fn execute_alter_columns(
|
||||||
|
table: &NativeTable,
|
||||||
|
alterations: &[ColumnAlteration],
|
||||||
|
) -> Result<AlterColumnsResult> {
|
||||||
|
let mut dataset = table.dataset.get_mut().await?;
|
||||||
|
dataset.alter_columns(alterations).await?;
|
||||||
|
Ok(AlterColumnsResult {
|
||||||
|
version: dataset.version().version,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Internal implementation of the drop columns logic.
|
||||||
|
///
|
||||||
|
/// Removes columns from the table.
|
||||||
|
pub(crate) async fn execute_drop_columns(
|
||||||
|
table: &NativeTable,
|
||||||
|
columns: &[&str],
|
||||||
|
) -> Result<DropColumnsResult> {
|
||||||
|
let mut dataset = table.dataset.get_mut().await?;
|
||||||
|
dataset.drop_columns(columns).await?;
|
||||||
|
Ok(DropColumnsResult {
|
||||||
|
version: dataset.version().version,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use arrow_array::{record_batch, Int32Array, RecordBatchIterator, StringArray};
|
||||||
|
use arrow_schema::DataType;
|
||||||
|
use futures::TryStreamExt;
|
||||||
|
use lance::dataset::ColumnAlteration;
|
||||||
|
|
||||||
|
use crate::connect;
|
||||||
|
use crate::query::{ExecutableQuery, QueryBase, Select};
|
||||||
|
use crate::table::NewColumnTransform;
|
||||||
|
|
||||||
|
// Add Columns Tests
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_add_columns_with_sql_expression() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(("id", Int32, [1, 2, 3, 4, 5])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_add_columns",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let initial_version = table.version().await.unwrap();
|
||||||
|
|
||||||
|
// Add a computed column
|
||||||
|
let result = table
|
||||||
|
.add_columns(
|
||||||
|
NewColumnTransform::SqlExpressions(vec![("doubled".into(), "id * 2".into())]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Version should increment
|
||||||
|
assert!(result.version > initial_version);
|
||||||
|
|
||||||
|
// Verify the new column exists with correct values
|
||||||
|
let batches = table
|
||||||
|
.query()
|
||||||
|
.select(Select::columns(&["id", "doubled"]))
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.try_collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let batch = &batches[0];
|
||||||
|
let ids: Vec<i32> = batch
|
||||||
|
.column(0)
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<Int32Array>()
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.map(|v| v.unwrap())
|
||||||
|
.collect();
|
||||||
|
let doubled: Vec<i32> = batch
|
||||||
|
.column(1)
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<Int32Array>()
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.map(|v| v.unwrap())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
for (id, d) in ids.iter().zip(doubled.iter()) {
|
||||||
|
assert_eq!(*d, id * 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_add_multiple_columns() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(("x", Int32, [10, 20, 30])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_add_multi_columns",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Add multiple columns at once
|
||||||
|
table
|
||||||
|
.add_columns(
|
||||||
|
NewColumnTransform::SqlExpressions(vec![
|
||||||
|
("y".into(), "x + 1".into()),
|
||||||
|
("z".into(), "x * x".into()),
|
||||||
|
]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Verify schema has all columns
|
||||||
|
let schema = table.schema().await.unwrap();
|
||||||
|
assert_eq!(schema.fields().len(), 3);
|
||||||
|
assert!(schema.field_with_name("x").is_ok());
|
||||||
|
assert!(schema.field_with_name("y").is_ok());
|
||||||
|
assert!(schema.field_with_name("z").is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_add_column_with_constant_expression() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(("id", Int32, [1, 2, 3])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_add_const_column",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Add a column with a constant value
|
||||||
|
table
|
||||||
|
.add_columns(
|
||||||
|
NewColumnTransform::SqlExpressions(vec![("constant".into(), "42".into())]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let schema = table.schema().await.unwrap();
|
||||||
|
assert!(schema.field_with_name("constant").is_ok());
|
||||||
|
|
||||||
|
// Verify all values are 42
|
||||||
|
let batches = table
|
||||||
|
.query()
|
||||||
|
.select(Select::columns(&["constant"]))
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.try_collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let batch = &batches[0];
|
||||||
|
let values = batch["constant"]
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<arrow_array::Int64Array>()
|
||||||
|
.unwrap()
|
||||||
|
.values();
|
||||||
|
assert!(values.iter().all(|&v| v == 42));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Alter Columns Tests
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_alter_column_rename() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(("old_name", Int32, [1, 2, 3])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_alter_rename",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let initial_version = table.version().await.unwrap();
|
||||||
|
|
||||||
|
// Rename the column
|
||||||
|
let result = table
|
||||||
|
.alter_columns(&[ColumnAlteration::new("old_name".into()).rename("new_name".into())])
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Version should increment
|
||||||
|
assert!(result.version > initial_version);
|
||||||
|
|
||||||
|
// Verify rename
|
||||||
|
let schema = table.schema().await.unwrap();
|
||||||
|
assert!(schema.field_with_name("old_name").is_err());
|
||||||
|
assert!(schema.field_with_name("new_name").is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_alter_column_set_nullable() {
|
||||||
|
use arrow_array::RecordBatch;
|
||||||
|
use arrow_schema::{Field, Schema};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
// Create a schema with a non-nullable field
|
||||||
|
let schema = Arc::new(Schema::new(vec![Field::new(
|
||||||
|
"value",
|
||||||
|
DataType::Int32,
|
||||||
|
false,
|
||||||
|
)]));
|
||||||
|
let batch = RecordBatch::try_new(
|
||||||
|
schema.clone(),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_alter_nullable",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Initially non-nullable
|
||||||
|
let schema = table.schema().await.unwrap();
|
||||||
|
assert!(!schema.field_with_name("value").unwrap().is_nullable());
|
||||||
|
|
||||||
|
// Make it nullable
|
||||||
|
table
|
||||||
|
.alter_columns(&[ColumnAlteration::new("value".into()).set_nullable(true)])
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Verify it's now nullable
|
||||||
|
let schema = table.schema().await.unwrap();
|
||||||
|
assert!(schema.field_with_name("value").unwrap().is_nullable());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_alter_column_cast_type() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(("num", Int32, [1, 2, 3])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_cast_type",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Cast Int32 to Int64 (a supported cast)
|
||||||
|
table
|
||||||
|
.alter_columns(&[ColumnAlteration::new("num".into()).cast_to(DataType::Int64)])
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Verify type changed
|
||||||
|
let schema = table.schema().await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
schema.field_with_name("num").unwrap().data_type(),
|
||||||
|
&DataType::Int64
|
||||||
|
);
|
||||||
|
|
||||||
|
// Query the data and verify the returned type is correct
|
||||||
|
let batches = table
|
||||||
|
.query()
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.try_collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let batch = &batches[0];
|
||||||
|
let values = batch["num"]
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<arrow_array::Int64Array>()
|
||||||
|
.unwrap()
|
||||||
|
.values();
|
||||||
|
assert_eq!(values.as_ref(), &[1i64, 2, 3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_alter_column_invalid_cast_fails() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(("num", Int32, [1, 2, 3])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_invalid_cast",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Casting Int32 to Float64 is not supported
|
||||||
|
let result = table
|
||||||
|
.alter_columns(&[ColumnAlteration::new("num".into()).cast_to(DataType::Float64)])
|
||||||
|
.await;
|
||||||
|
let err = result.unwrap_err();
|
||||||
|
assert!(
|
||||||
|
err.to_string().contains("cast"),
|
||||||
|
"Expected error message to contain 'cast', got: {}",
|
||||||
|
err
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_alter_multiple_columns() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(("a", Int32, [1, 2, 3]), ("b", Int32, [4, 5, 6])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_alter_multi",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Alter multiple columns at once
|
||||||
|
table
|
||||||
|
.alter_columns(&[
|
||||||
|
ColumnAlteration::new("a".into()).rename("alpha".into()),
|
||||||
|
ColumnAlteration::new("b".into()).set_nullable(true),
|
||||||
|
])
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let schema = table.schema().await.unwrap();
|
||||||
|
assert!(schema.field_with_name("alpha").is_ok());
|
||||||
|
assert!(schema.field_with_name("a").is_err());
|
||||||
|
assert!(schema.field_with_name("b").unwrap().is_nullable());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drop Columns Tests
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_drop_single_column() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch =
|
||||||
|
record_batch!(("keep", Int32, [1, 2, 3]), ("remove", Int32, [4, 5, 6])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_drop_single",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let initial_version = table.version().await.unwrap();
|
||||||
|
|
||||||
|
// Drop a column
|
||||||
|
let result = table.drop_columns(&["remove"]).await.unwrap();
|
||||||
|
|
||||||
|
// Version should increment
|
||||||
|
assert!(result.version > initial_version);
|
||||||
|
|
||||||
|
// Verify column was dropped
|
||||||
|
let schema = table.schema().await.unwrap();
|
||||||
|
assert_eq!(schema.fields().len(), 1);
|
||||||
|
assert!(schema.field_with_name("keep").is_ok());
|
||||||
|
assert!(schema.field_with_name("remove").is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_drop_multiple_columns() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(
|
||||||
|
("a", Int32, [1, 2]),
|
||||||
|
("b", Int32, [3, 4]),
|
||||||
|
("c", Int32, [5, 6]),
|
||||||
|
("d", Int32, [7, 8])
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_drop_multi",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Drop multiple columns
|
||||||
|
table.drop_columns(&["b", "d"]).await.unwrap();
|
||||||
|
|
||||||
|
// Verify only a and c remain
|
||||||
|
let schema = table.schema().await.unwrap();
|
||||||
|
assert_eq!(schema.fields().len(), 2);
|
||||||
|
assert!(schema.field_with_name("a").is_ok());
|
||||||
|
assert!(schema.field_with_name("c").is_ok());
|
||||||
|
assert!(schema.field_with_name("b").is_err());
|
||||||
|
assert!(schema.field_with_name("d").is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_drop_column_preserves_data() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(
|
||||||
|
("id", Int32, [1, 2, 3]),
|
||||||
|
("name", Utf8, ["a", "b", "c"]),
|
||||||
|
("extra", Int32, [10, 20, 30])
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_drop_preserves",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Drop the extra column
|
||||||
|
table.drop_columns(&["extra"]).await.unwrap();
|
||||||
|
|
||||||
|
// Verify remaining data is intact
|
||||||
|
let batches = table
|
||||||
|
.query()
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.try_collect::<Vec<_>>()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let batch = &batches[0];
|
||||||
|
assert_eq!(batch.num_columns(), 2);
|
||||||
|
assert_eq!(batch.num_rows(), 3);
|
||||||
|
|
||||||
|
let ids: Vec<i32> = batch
|
||||||
|
.column(0)
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<Int32Array>()
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.map(|v| v.unwrap())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(ids, vec![1, 2, 3]);
|
||||||
|
|
||||||
|
let names: Vec<&str> = batch
|
||||||
|
.column(1)
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<StringArray>()
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.map(|v| v.unwrap())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(names, vec!["a", "b", "c"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error Case Tests
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_drop_nonexistent_column_fails() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(("existing", Int32, [1, 2, 3])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_drop_nonexistent",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Try to drop a column that doesn't exist
|
||||||
|
let result = table.drop_columns(&["nonexistent"]).await;
|
||||||
|
let err = result.unwrap_err();
|
||||||
|
assert!(
|
||||||
|
err.to_string().contains("nonexistent"),
|
||||||
|
"Expected error message to contain column name 'nonexistent', got: {}",
|
||||||
|
err
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_alter_nonexistent_column_fails() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(("existing", Int32, [1, 2, 3])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_alter_nonexistent",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Try to alter a column that doesn't exist
|
||||||
|
let result = table
|
||||||
|
.alter_columns(&[ColumnAlteration::new("nonexistent".into()).rename("new".into())])
|
||||||
|
.await;
|
||||||
|
let err = result.unwrap_err();
|
||||||
|
assert!(
|
||||||
|
err.to_string().contains("nonexistent"),
|
||||||
|
"Expected error message to contain column name 'nonexistent', got: {}",
|
||||||
|
err
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Version Tracking Tests
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_schema_operations_increment_version() {
|
||||||
|
let conn = connect("memory://").execute().await.unwrap();
|
||||||
|
|
||||||
|
let batch = record_batch!(("a", Int32, [1, 2, 3]), ("b", Int32, [4, 5, 6])).unwrap();
|
||||||
|
let schema = batch.schema();
|
||||||
|
|
||||||
|
let table = conn
|
||||||
|
.create_table(
|
||||||
|
"test_version_increment",
|
||||||
|
RecordBatchIterator::new(vec![Ok(batch)], schema),
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let v1 = table.version().await.unwrap();
|
||||||
|
|
||||||
|
// Add column increments version
|
||||||
|
let add_result = table
|
||||||
|
.add_columns(
|
||||||
|
NewColumnTransform::SqlExpressions(vec![("c".into(), "a + b".into())]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(add_result.version > v1);
|
||||||
|
let v2 = table.version().await.unwrap();
|
||||||
|
assert_eq!(add_result.version, v2);
|
||||||
|
|
||||||
|
// Alter column increments version
|
||||||
|
let alter_result = table
|
||||||
|
.alter_columns(&[ColumnAlteration::new("c".into()).rename("sum".into())])
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(alter_result.version > v2);
|
||||||
|
let v3 = table.version().await.unwrap();
|
||||||
|
assert_eq!(alter_result.version, v3);
|
||||||
|
|
||||||
|
// Drop column increments version
|
||||||
|
let drop_result = table.drop_columns(&["b"]).await.unwrap();
|
||||||
|
assert!(drop_result.version > v3);
|
||||||
|
let v4 = table.version().await.unwrap();
|
||||||
|
assert_eq!(drop_result.version, v4);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user