Compare commits

...

8 Commits

Author SHA1 Message Date
Lance Release
c89240b16c Bump version: 0.30.0-beta.6 → 0.30.0 2026-03-16 22:46:19 +00:00
Lance Release
099ff355a4 Bump version: 0.30.0-beta.5 → 0.30.0-beta.6 2026-03-16 22:46:17 +00:00
Weston Pace
c5995fda67 feat: update lance dependency to 3.0.0 release (#3137)
## Summary
- Update all 14 lance crates from `3.0.0-rc.3` (git source) to `3.0.0`
(crates.io release)
- Remove git/tag source references since 3.0.0 is published on crates.io

## Test plan
- [x] `cargo check --features remote --tests --examples` passes
- [x] `cargo clippy --features remote --tests --examples` passes
- [ ] CI passes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 15:29:18 -07:00
Weston Pace
25eb1fbfa4 fix: restore storage options on copy in localstack tests (#3148) 2026-03-16 14:02:19 -07:00
Weston Pace
4ac41c5c3f fix(ci): upgrade LocalStack to 4.0 for S3 integration tests (#3147)
## Summary
- Upgrade LocalStack from 3.3 to 4.0 in `docker-compose.yml` to fix S3
integration test failures in CI
- Version 3.3 has compatibility issues with newer Python 3.13 and
updated boto3 dependencies
- Matches the LocalStack version used successfully in the lance
repository

## Test plan
- [ ] Verify `docker compose up --detach --wait` completes successfully
in CI
- [ ] All tests in `test_s3.py` pass (5 tests)
- [ ] All `@pytest.mark.s3_test` tests in
`test_namespace_integration.py` pass (7 tests)
- [ ] No regressions in non-integration test jobs (Mac, Windows)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 09:02:11 -07:00
Will Jones
9a5b0398ec chore: fix ci (#3139)
* Move away from buildjet, which is shutting down runners for GHA [^1]
* Add `Cargo.lock` to build jobs, so when we upgrade locked dependencies
we check the builds actually pass. CI started failing because
dependencies were changed in #3116 without running all build jobs.
* Add fixes for aws-lc-rs build in NodeJS.

[^1]: https://buildjet.com/for-github-actions/blog/we-are-shutting-down

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 06:25:40 -07:00
Pratik Dey
d1d720d08a feat(nodejs): support field/data type input in add_columns() method (#3114)
Add support for passing field/data type information into add_columns()
method, bringing parity with Python bindings. The method now accepts:

- AddColumnsSql[] - SQL expressions (existing functionality)
- Field - single Arrow field with explicit data type
- Field[] - array of Arrow fields with explicit data types
- Schema - Arrow schema with explicit data types

New columns added via Field/Schema are initialized with null values. All
field-based columns must be nullable due to null initialization.

Resolves #3107

---------

Signed-off-by: Pratik <pratikrocks.dey11@gmail.com>
Co-authored-by: Claude <noreply@anthropic.com>
2026-03-13 12:57:14 -07:00
Mesut-Doner
c2e543f1b7 feat(rust): support Expr in projection query (#3069)
Referred and followed [`Select::Dynamic`] implementation. 

Closes #3039
2026-03-13 12:54:26 -07:00
21 changed files with 413 additions and 87 deletions

View File

@@ -7,6 +7,7 @@ on:
pull_request:
paths:
- Cargo.toml
- Cargo.lock
- nodejs/**
- rust/**
- docs/src/js/**

View File

@@ -19,6 +19,7 @@ on:
paths:
- .github/workflows/npm-publish.yml
- Cargo.toml # Change in dependency frequently breaks builds
- Cargo.lock
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -124,7 +125,12 @@ jobs:
pre_build: |-
set -e &&
apt-get update &&
apt-get install -y protobuf-compiler pkg-config
apt-get install -y protobuf-compiler pkg-config &&
# The base image (manylinux2014-cross) sets TARGET_CC to the old
# GCC 4.8 cross-compiler. aws-lc-sys checks TARGET_CC before CC,
# so it picks up GCC even though the napi-rs image sets CC=clang.
# Override to use the image's clang-18 which supports -fuse-ld=lld.
export TARGET_CC=clang TARGET_CXX=clang++
- target: x86_64-unknown-linux-musl
# This one seems to need some extra memory
host: ubuntu-2404-8x-x64
@@ -144,9 +150,10 @@ jobs:
set -e &&
apt-get update &&
apt-get install -y protobuf-compiler pkg-config &&
# https://github.com/aws/aws-lc-rs/issues/737#issuecomment-2725918627
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc/aarch64-unknown-linux-gnu/4.8.5/crtbeginS.o /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/crtbeginS.o &&
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/gcc &&
export TARGET_CC=clang TARGET_CXX=clang++ &&
# The manylinux2014 sysroot has glibc 2.17 headers which lack
# AT_HWCAP2 (added in Linux 3.17). Define it for aws-lc-sys.
export CFLAGS="$CFLAGS -DAT_HWCAP2=26" &&
rustup target add aarch64-unknown-linux-gnu
- target: aarch64-unknown-linux-musl
host: ubuntu-2404-8x-x64
@@ -266,7 +273,7 @@ jobs:
- target: x86_64-unknown-linux-gnu
host: ubuntu-latest
- target: aarch64-unknown-linux-gnu
host: buildjet-16vcpu-ubuntu-2204-arm
host: ubuntu-2404-8x-arm64
node:
- '20'
runs-on: ${{ matrix.settings.host }}

View File

@@ -9,6 +9,7 @@ on:
paths:
- .github/workflows/pypi-publish.yml
- Cargo.toml # Change in dependency frequently breaks builds
- Cargo.lock
env:
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"

View File

@@ -7,6 +7,7 @@ on:
pull_request:
paths:
- Cargo.toml
- Cargo.lock
- python/**
- rust/**
- .github/workflows/python.yml

View File

@@ -7,6 +7,7 @@ on:
pull_request:
paths:
- Cargo.toml
- Cargo.lock
- rust/**
- .github/workflows/rust.yml

92
Cargo.lock generated
View File

@@ -3070,8 +3070,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ae4126c38f86d37d5479295c135a1b81688b6c799d6c39d44b1855f9a0e712c"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -3852,7 +3853,7 @@ dependencies = [
"libc",
"percent-encoding",
"pin-project-lite",
"socket2 0.5.10",
"socket2 0.6.0",
"system-configuration",
"tokio",
"tower-service",
@@ -4241,8 +4242,9 @@ dependencies = [
[[package]]
name = "lance"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45c38e7c7c448a77203b50c05f5bf308cadb3fe074e7dde22e4302206ea44d7a"
dependencies = [
"arrow",
"arrow-arith",
@@ -4308,8 +4310,9 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "174bae71821e5535a594f9ecd64b07e6ffe729498a5d27a0ca926b4ff2714664"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4329,8 +4332,9 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4494187b4244fa56c8cf911d7358e5322fa1cf7d8f6a213b3155a4139eb556b1"
dependencies = [
"arrayref",
"paste",
@@ -4339,8 +4343,9 @@ dependencies = [
[[package]]
name = "lance-core"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce0a5d4427c42f7d9302771bb2aa474b09316a8919633abad0030c4d58013e89"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4377,8 +4382,9 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61c6151ee46a35886ac6c804f870de2992c36de4198da1c64d806529f246d2d7"
dependencies = [
"arrow",
"arrow-array",
@@ -4408,8 +4414,9 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d130ec0426173daeb14a6032d18a1eb8c1f8858dca3f42735f0d2c7dde3c47f"
dependencies = [
"arrow",
"arrow-array",
@@ -4427,8 +4434,9 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "965281449dc6b47d4669e11572f7b2a5e0e6b206dc1f21bce0f3b5f7dc533ece"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4465,8 +4473,9 @@ dependencies = [
[[package]]
name = "lance-file"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c8230f9a2e63170eef3d4f8f7576593a73ff8a2f8d5a75400e511e74d4d308f"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4498,8 +4507,9 @@ dependencies = [
[[package]]
name = "lance-index"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "950a0bc24e01044fc86260c54e810f2d051660d89caa727234f09b252446c425"
dependencies = [
"arrow",
"arrow-arith",
@@ -4562,8 +4572,9 @@ dependencies = [
[[package]]
name = "lance-io"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "023188a98822bc87c87726893fbd6c1deea2dccf5c4214051b789b6047bdc2a4"
dependencies = [
"arrow",
"arrow-arith",
@@ -4604,8 +4615,9 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bc4b0c9892f985dac4d27c058d21b6adfda5c73a7aaf697a92f300f36995ded"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4621,8 +4633,9 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8aab88d6c91b045ac3d3967c73b547d9fed5559a91a907f3f4586d0f2d6cc6"
dependencies = [
"arrow",
"async-trait",
@@ -4634,8 +4647,9 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0beb555ea8ac3bfc220519585ffaea34caa61cc24edce533b98f48474042e3f0"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4679,8 +4693,9 @@ dependencies = [
[[package]]
name = "lance-table"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7511e4e951d3938316b16f1a64360cfae0b44d22d9c5aca971ed7b5bc83caea7"
dependencies = [
"arrow",
"arrow-array",
@@ -4719,8 +4734,9 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3de153d46ba1c1fc6dbd4c93ed8b0b99b9001902d5df1159d855ef9fd2613591"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -6443,7 +6459,7 @@ version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
dependencies = [
"heck 0.4.1",
"heck 0.5.0",
"itertools 0.14.0",
"log",
"multimap",
@@ -6632,7 +6648,7 @@ dependencies = [
"quinn-udp",
"rustc-hash",
"rustls 0.23.31",
"socket2 0.5.10",
"socket2 0.6.0",
"thiserror 2.0.17",
"tokio",
"tracing",
@@ -6669,7 +6685,7 @@ dependencies = [
"cfg_aliases",
"libc",
"once_cell",
"socket2 0.5.10",
"socket2 0.6.0",
"tracing",
"windows-sys 0.60.2",
]
@@ -7735,7 +7751,7 @@ version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451"
dependencies = [
"heck 0.4.1",
"heck 0.5.0",
"proc-macro2",
"quote",
"syn 2.0.114",
@@ -7747,7 +7763,7 @@ version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54254b8531cafa275c5e096f62d48c81435d1015405a91198ddb11e967301d40"
dependencies = [
"heck 0.4.1",
"heck 0.5.0",
"proc-macro2",
"quote",
"syn 2.0.114",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance = { version = "=3.0.0", default-features = false }
lance-core = { version = "=3.0.0" }
lance-datagen = { version = "=3.0.0" }
lance-file = { version = "=3.0.0" }
lance-io = { version = "=3.0.0", default-features = false }
lance-index = { version = "=3.0.0" }
lance-linalg = { version = "=3.0.0" }
lance-namespace = { version = "=3.0.0" }
lance-namespace-impls = { version = "=3.0.0", default-features = false }
lance-table = { version = "=3.0.0" }
lance-testing = { version = "=3.0.0" }
lance-datafusion = { version = "=3.0.0" }
lance-encoding = { version = "=3.0.0" }
lance-arrow = { version = "=3.0.0" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }

View File

@@ -1,7 +1,7 @@
version: "3.9"
services:
localstack:
image: localstack/localstack:3.3
image: localstack/localstack:4.0
ports:
- 4566:4566
environment:

View File

@@ -71,11 +71,12 @@ Add new columns with defined values.
#### Parameters
* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
pairs of column names and
the SQL expression to use to calculate the value of the new column. These
expressions will be evaluated for each row in the table, and can
reference existing columns in the table.
* **newColumnTransforms**: `Field`&lt;`any`&gt; \| `Field`&lt;`any`&gt;[] \| `Schema`&lt;`any`&gt; \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
Either:
- An array of objects with column names and SQL expressions to calculate values
- A single Arrow Field defining one column with its data type (column will be initialized with null values)
- An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
- An Arrow Schema defining columns with their data types (columns will be initialized with null values)
#### Returns
@@ -484,19 +485,7 @@ Modeled after ``VACUUM`` in PostgreSQL.
- Prune: Removes old versions of the dataset
- Index: Optimizes the indices, adding new data to existing indices
Experimental API
----------------
The optimization process is undergoing active development and may change.
Our goal with these changes is to improve the performance of optimization and
reduce the complexity.
That being said, it is essential today to run optimize if you want the best
performance. It should be stable and safe to use in production, but it our
hope that the API may be simplified (or not even need to be called) in the
future.
The frequency an application shoudl call optimize is based on the frequency of
The frequency an application should call optimize is based on the frequency of
data modifications. If data is frequently added, deleted, or updated then
optimize should be run frequently. A good rule of thumb is to run optimize if
you have added or modified 100,000 or more records or run more than 20 data

View File

@@ -37,3 +37,12 @@ tbl.optimize({cleanupOlderThan: new Date()});
```ts
deleteUnverified: boolean;
```
Because they may be part of an in-progress transaction, files newer than
7 days old are not deleted by default. If you are sure that there are no
in-progress transactions, then you can set this to true to delete all
files older than `cleanupOlderThan`.
**WARNING**: This should only be set to true if you can guarantee that
no other process is currently working on this dataset. Otherwise the
dataset could be put into a corrupted state.

View File

@@ -1259,6 +1259,98 @@ describe("schema evolution", function () {
expect(await table.schema()).toEqual(expectedSchema);
});
it("can add columns with schema for explicit data types", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
// Define schema for new columns with explicit data types
// Note: All columns must be nullable when using addColumns with Schema
// because they are initially populated with null values
const newColumnsSchema = new Schema([
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
new Field("rating", new Int32(), true),
]);
const result = await table.addColumns(newColumnsSchema);
expect(result).toHaveProperty("version");
expect(result.version).toBe(2);
const expectedSchema = new Schema([
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
new Field("rating", new Int32(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
// Verify that new columns are populated with null values
const results = await table.query().toArray();
expect(results).toHaveLength(1);
expect(results[0].price).toBeNull();
expect(results[0].category).toBeNull();
expect(results[0].rating).toBeNull();
});
it("can add a single column using Field", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
// Add a single field
const priceField = new Field("price", new Float64(), true);
const result = await table.addColumns(priceField);
expect(result).toHaveProperty("version");
expect(result.version).toBe(2);
const expectedSchema = new Schema([
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
});
it("can add multiple columns using array of Fields", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
// Add multiple fields as array
const fields = [
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
];
const result = await table.addColumns(fields);
expect(result).toHaveProperty("version");
expect(result.version).toBe(2);
const expectedSchema = new Schema([
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
});
it("can alter the columns in the schema", async function () {
const con = await connect(tmpDir.name);
const schema = new Schema([

View File

@@ -5,12 +5,15 @@ import {
Table as ArrowTable,
Data,
DataType,
Field,
IntoVector,
MultiVector,
Schema,
dataTypeToJson,
fromDataToBuffer,
fromTableToBuffer,
isMultiVector,
makeEmptyTable,
tableFromIPC,
} from "./arrow";
@@ -391,15 +394,16 @@ export abstract class Table {
abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
/**
* Add new columns with defined values.
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
* the SQL expression to use to calculate the value of the new column. These
* expressions will be evaluated for each row in the table, and can
* reference existing columns in the table.
* @param {AddColumnsSql[] | Field | Field[] | Schema} newColumnTransforms Either:
* - An array of objects with column names and SQL expressions to calculate values
* - A single Arrow Field defining one column with its data type (column will be initialized with null values)
* - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
* - An Arrow Schema defining columns with their data types (columns will be initialized with null values)
* @returns {Promise<AddColumnsResult>} A promise that resolves to an object
* containing the new version number of the table after adding the columns.
*/
abstract addColumns(
newColumnTransforms: AddColumnsSql[],
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
): Promise<AddColumnsResult>;
/**
@@ -804,9 +808,40 @@ export class LocalTable extends Table {
// TODO: Support BatchUDF
async addColumns(
newColumnTransforms: AddColumnsSql[],
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
): Promise<AddColumnsResult> {
return await this.inner.addColumns(newColumnTransforms);
// Handle single Field -> convert to array of Fields
if (newColumnTransforms instanceof Field) {
newColumnTransforms = [newColumnTransforms];
}
// Handle array of Fields -> convert to Schema
if (
Array.isArray(newColumnTransforms) &&
newColumnTransforms.length > 0 &&
newColumnTransforms[0] instanceof Field
) {
const fields = newColumnTransforms as Field[];
newColumnTransforms = new Schema(fields);
}
// Handle Schema -> use schema-based approach
if (newColumnTransforms instanceof Schema) {
const schema = newColumnTransforms;
// Convert schema to buffer using Arrow IPC format
const emptyTable = makeEmptyTable(schema);
const schemaBuf = await fromTableToBuffer(emptyTable);
return await this.inner.addColumnsWithSchema(schemaBuf);
}
// Handle SQL expressions (existing functionality)
if (Array.isArray(newColumnTransforms)) {
return await this.inner.addColumns(
newColumnTransforms as AddColumnsSql[],
);
}
throw new Error("Invalid input type for addColumns");
}
async alterColumns(

View File

@@ -3,7 +3,7 @@
use std::collections::HashMap;
use lancedb::ipc::ipc_file_to_batches;
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
use lancedb::table::{
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
@@ -279,6 +279,23 @@ impl Table {
Ok(res.into())
}
#[napi(catch_unwind)]
pub async fn add_columns_with_schema(
&self,
schema_buf: Buffer,
) -> napi::Result<AddColumnsResult> {
let schema = ipc_file_to_schema(schema_buf.to_vec())
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC schema: {}", e)))?;
let transforms = NewColumnTransform::AllNulls(schema);
let res = self
.inner_ref()?
.add_columns(transforms, None)
.await
.default_error()?;
Ok(res.into())
}
#[napi(catch_unwind)]
pub async fn alter_columns(
&self,

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.30.0-beta.5"
current_version = "0.30.0"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.30.0-beta.5"
version = "0.30.0"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -147,7 +147,12 @@ class TrackingNamespace(LanceNamespace):
This simulates a credential rotation system where each call returns
new credentials that expire after credential_expires_in_seconds.
"""
modified = copy.deepcopy(storage_options) if storage_options else {}
# Start from base storage options (endpoint, region, allow_http, etc.)
# because DirectoryNamespace returns None for storage_options from
# describe_table/declare_table when no credential vendor is configured.
modified = copy.deepcopy(self.base_storage_options)
if storage_options:
modified.update(storage_options)
# Increment credentials to simulate rotation
modified["aws_access_key_id"] = f"AKID_{count}"

View File

@@ -316,6 +316,19 @@ impl<'py> IntoPyObject<'py> for PySelect {
Select::All => Ok(py.None().into_bound(py).into_any()),
Select::Columns(columns) => Ok(columns.into_pyobject(py)?.into_any()),
Select::Dynamic(columns) => Ok(columns.into_pyobject(py)?.into_any()),
Select::Expr(pairs) => {
// Serialize DataFusion Expr -> SQL string so Python sees the same
// format as Select::Dynamic: a list of (name, sql_string) tuples.
let sql_pairs: PyResult<Vec<(String, String)>> = pairs
.into_iter()
.map(|(name, expr)| {
lancedb::expr::expr_to_sql_string(&expr)
.map(|sql| (name, sql))
.map_err(|e| PyRuntimeError::new_err(e.to_string()))
})
.collect();
Ok(sql_pairs?.into_pyobject(py)?.into_any())
}
}
}
}

View File

@@ -339,6 +339,12 @@ impl PermutationReader {
}
Ok(false)
}
Select::Expr(columns) => {
// For Expr projections, we check if any alias is _rowid.
// We can't validate the expression itself (it may differ from _rowid)
// but we allow it through; the column will be included.
Ok(columns.iter().any(|(alias, _)| alias == ROW_ID))
}
}
}

View File

@@ -47,6 +47,25 @@ pub enum Select {
///
/// See [`Query::select`] for more details and examples
Dynamic(Vec<(String, String)>),
/// Advanced selection using type-safe DataFusion expressions
///
/// Similar to [`Select::Dynamic`] but uses [`datafusion_expr::Expr`] instead of
/// raw SQL strings. Use [`crate::expr`] helpers to build expressions:
///
/// ```
/// use lancedb::expr::{col, lit};
/// use lancedb::query::Select;
///
/// // SELECT id, id * 2 AS id2 FROM ...
/// let selection = Select::expr_projection(&[
/// ("id", col("id")),
/// ("id2", col("id") * lit(2)),
/// ]);
/// ```
///
/// Note: For remote/server-side queries the expressions are serialized to SQL strings
/// automatically (same as [`Select::Dynamic`]).
Expr(Vec<(String, datafusion_expr::Expr)>),
}
impl Select {
@@ -69,6 +88,29 @@ impl Select {
.collect(),
)
}
/// Create a typed-expression projection.
///
/// This is a convenience method for creating a [`Select::Expr`] variant from
/// a slice of `(name, expr)` pairs where each `expr` is a [`datafusion_expr::Expr`].
///
/// # Example
/// ```
/// use lancedb::expr::{col, lit};
/// use lancedb::query::Select;
///
/// let selection = Select::expr_projection(&[
/// ("id", col("id")),
/// ("id2", col("id") * lit(2)),
/// ]);
/// ```
pub fn expr_projection(columns: &[(impl AsRef<str>, datafusion_expr::Expr)]) -> Self {
Self::Expr(
columns
.iter()
.map(|(name, expr)| (name.as_ref().to_string(), expr.clone()))
.collect(),
)
}
}
/// A trait for converting a type to a query vector
@@ -1591,6 +1633,58 @@ mod tests {
});
}
#[tokio::test]
async fn test_select_with_expr_projection() {
// Mirrors test_select_with_transform but uses Select::Expr instead of Select::Dynamic
let tmp_dir = tempdir().unwrap();
let dataset_path = tmp_dir.path().join("test_expr.lance");
let uri = dataset_path.to_str().unwrap();
let batches = make_non_empty_batches();
let conn = connect(uri).execute().await.unwrap();
let table = conn
.create_table("my_table", batches)
.execute()
.await
.unwrap();
use crate::expr::{col, lit};
let query = table.query().limit(10).select(Select::expr_projection(&[
("id2", col("id") * lit(2i32)),
("id", col("id")),
]));
let schema = query.output_schema().await.unwrap();
assert_eq!(
schema,
Arc::new(ArrowSchema::new(vec![
ArrowField::new("id2", DataType::Int32, true),
ArrowField::new("id", DataType::Int32, true),
]))
);
let result = query.execute().await;
let mut batches = result
.expect("should have result")
.try_collect::<Vec<_>>()
.await
.unwrap();
assert_eq!(batches.len(), 1);
let batch = batches.pop().unwrap();
// id and id2
assert_eq!(batch.num_columns(), 2);
let id: &Int32Array = batch.column_by_name("id").unwrap().as_primitive();
let id2: &Int32Array = batch.column_by_name("id2").unwrap().as_primitive();
id.iter().zip(id2.iter()).for_each(|(id, id2)| {
let id = id.unwrap();
let id2 = id2.unwrap();
assert_eq!(id * 2, id2);
});
}
#[tokio::test]
async fn test_execute_no_vector() {
// TODO: Switch back to memory://foo after https://github.com/lancedb/lancedb/issues/1051

View File

@@ -477,6 +477,16 @@ impl<S: HttpSend> RemoteTable<S> {
}));
body["columns"] = alias_map.into();
}
Select::Expr(pairs) => {
let alias_map: Result<serde_json::Map<String, serde_json::Value>> = pairs
.iter()
.map(|(name, expr)| {
expr_to_sql_string(expr)
.map(|sql| (name.clone(), serde_json::Value::String(sql)))
})
.collect();
body["columns"] = alias_map?.into();
}
}
if params.fast_search {

View File

@@ -186,6 +186,13 @@ pub async fn create_plan(
Select::Dynamic(ref select_with_transform) => {
scanner.project_with_transform(select_with_transform.as_slice())?;
}
Select::Expr(ref expr_pairs) => {
let sql_pairs: crate::Result<Vec<(String, String)>> = expr_pairs
.iter()
.map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
.collect();
scanner.project_with_transform(sql_pairs?.as_slice())?;
}
Select::All => {}
}
@@ -340,6 +347,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
.to_string(),
});
}
Select::Expr(pairs) => {
let sql_pairs: crate::Result<Vec<(String, String)>> = pairs
.iter()
.map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
.collect();
let sql_pairs = sql_pairs?;
Some(Box::new(QueryTableRequestColumns {
column_names: None,
column_aliases: Some(sql_pairs.into_iter().collect()),
}))
}
};
// Check for unsupported features
@@ -411,6 +429,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
.to_string(),
});
}
Select::Expr(pairs) => {
let sql_pairs: crate::Result<Vec<(String, String)>> = pairs
.iter()
.map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
.collect();
let sql_pairs = sql_pairs?;
Some(Box::new(QueryTableRequestColumns {
column_names: None,
column_aliases: Some(sql_pairs.into_iter().collect()),
}))
}
};
// Handle full text search if present