Compare commits

...

2 Commits

Author SHA1 Message Date
lancedb automation
4b35c50409 chore: update lance dependency to v4.0.0-beta.10 2026-03-12 22:48:28 +00:00
Weston Pace
216c1b5f77 docs: remove experimental label from optimize and warn about delete_unverified (#3128)
## Summary
- Removes the "Experimental API" section from `optimize` method
documentation across Rust, Python, and TypeScript
- Adds a warning to `delete_unverified` documentation in all bindings:
this should only be set to true if you can guarantee no other process is
working on the dataset, otherwise it could be corrupted
- Fixes a typo ("shoudl" → "should")

Closes #3125


🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 14:37:42 +08:00
7 changed files with 90 additions and 110 deletions

64
Cargo.lock generated
View File

@@ -3070,8 +3070,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4241,8 +4241,8 @@ dependencies = [
[[package]]
name = "lance"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow",
"arrow-arith",
@@ -4308,8 +4308,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4329,8 +4329,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrayref",
"paste",
@@ -4339,8 +4339,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4377,8 +4377,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow",
"arrow-array",
@@ -4408,8 +4408,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow",
"arrow-array",
@@ -4427,8 +4427,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4465,8 +4465,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4498,8 +4498,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow",
"arrow-arith",
@@ -4562,8 +4562,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow",
"arrow-arith",
@@ -4604,8 +4604,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4621,8 +4621,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow",
"async-trait",
@@ -4634,8 +4634,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4679,8 +4679,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow",
"arrow-array",
@@ -4719,8 +4719,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "3.0.0-rc.3"
source = "git+https://github.com/lance-format/lance.git?tag=v3.0.0-rc.3#de393a26a068dd297929ca7d798e43dc31c57337"
version = "4.0.0-beta.10"
source = "git+https://github.com/lance-format/lance.git?tag=v4.0.0-beta.10#3f74834171a9ce231325e3604a0410886bc3612a"
dependencies = [
"arrow-array",
"arrow-schema",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=4.0.0-beta.10", default-features = false, "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=4.0.0-beta.10", default-features = false, "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=4.0.0-beta.10", default-features = false, "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=4.0.0-beta.10", "tag" = "v4.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }

View File

@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>3.1.0-beta.2</lance-core.version>
<lance-core.version>4.0.0-beta.10</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>

View File

@@ -84,6 +84,16 @@ export interface OptimizeOptions {
* tbl.optimize({cleanupOlderThan: new Date()});
*/
cleanupOlderThan: Date;
/**
* Because they may be part of an in-progress transaction, files newer than
* 7 days old are not deleted by default. If you are sure that there are no
* in-progress transactions, then you can set this to true to delete all
* files older than `cleanupOlderThan`.
*
* **WARNING**: This should only be set to true if you can guarantee that
* no other process is currently working on this dataset. Otherwise the
* dataset could be put into a corrupted state.
*/
deleteUnverified: boolean;
}
@@ -501,19 +511,7 @@ export abstract class Table {
* - Index: Optimizes the indices, adding new data to existing indices
*
*
* Experimental API
* ----------------
*
* The optimization process is undergoing active development and may change.
* Our goal with these changes is to improve the performance of optimization and
* reduce the complexity.
*
* That being said, it is essential today to run optimize if you want the best
* performance. It should be stable and safe to use in production, but it our
* hope that the API may be simplified (or not even need to be called) in the
* future.
*
* The frequency an application shoudl call optimize is based on the frequency of
* The frequency an application should call optimize is based on the frequency of
* data modifications. If data is frequently added, deleted, or updated then
* optimize should be run frequently. A good rule of thumb is to run optimize if
* you have added or modified 100,000 or more records or run more than 20 data

View File

@@ -1506,22 +1506,17 @@ class Table(ABC):
in-progress operation (e.g. appending new data) and these files will not
be deleted unless they are at least 7 days old. If delete_unverified is True
then these files will be deleted regardless of their age.
.. warning::
This should only be set to True if you can guarantee that no other
process is currently working on this dataset. Otherwise the dataset
could be put into a corrupted state.
retrain: bool, default False
This parameter is no longer used and is deprecated.
Experimental API
----------------
The optimization process is undergoing active development and may change.
Our goal with these changes is to improve the performance of optimization and
reduce the complexity.
That being said, it is essential today to run optimize if you want the best
performance. It should be stable and safe to use in production, but it our
hope that the API may be simplified (or not even need to be called) in the
future.
The frequency an application shoudl call optimize is based on the frequency of
The frequency an application should call optimize is based on the frequency of
data modifications. If data is frequently added, deleted, or updated then
optimize should be run frequently. A good rule of thumb is to run optimize if
you have added or modified 100,000 or more records or run more than 20 data
@@ -3047,22 +3042,17 @@ class LanceTable(Table):
in-progress operation (e.g. appending new data) and these files will not
be deleted unless they are at least 7 days old. If delete_unverified is True
then these files will be deleted regardless of their age.
.. warning::
This should only be set to True if you can guarantee that no other
process is currently working on this dataset. Otherwise the dataset
could be put into a corrupted state.
retrain: bool, default False
This parameter is no longer used and is deprecated.
Experimental API
----------------
The optimization process is undergoing active development and may change.
Our goal with these changes is to improve the performance of optimization and
reduce the complexity.
That being said, it is essential today to run optimize if you want the best
performance. It should be stable and safe to use in production, but it our
hope that the API may be simplified (or not even need to be called) in the
future.
The frequency an application shoudl call optimize is based on the frequency of
The frequency an application should call optimize is based on the frequency of
data modifications. If data is frequently added, deleted, or updated then
optimize should be run frequently. A good rule of thumb is to run optimize if
you have added or modified 100,000 or more records or run more than 20 data
@@ -4630,22 +4620,17 @@ class AsyncTable:
in-progress operation (e.g. appending new data) and these files will not
be deleted unless they are at least 7 days old. If delete_unverified is True
then these files will be deleted regardless of their age.
.. warning::
This should only be set to True if you can guarantee that no other
process is currently working on this dataset. Otherwise the dataset
could be put into a corrupted state.
retrain: bool, default False
This parameter is no longer used and is deprecated.
Experimental API
----------------
The optimization process is undergoing active development and may change.
Our goal with these changes is to improve the performance of optimization and
reduce the complexity.
That being said, it is essential today to run optimize if you want the best
performance. It should be stable and safe to use in production, but it our
hope that the API may be simplified (or not even need to be called) in the
future.
The frequency an application shoudl call optimize is based on the frequency of
The frequency an application should call optimize is based on the frequency of
data modifications. If data is frequently added, deleted, or updated then
optimize should be run frequently. A good rule of thumb is to run optimize if
you have added or modified 100,000 or more records or run more than 20 data

View File

@@ -951,17 +951,7 @@ impl Table {
/// * Prune: Removes old versions of the dataset
/// * Index: Optimizes the indices, adding new data to existing indices
///
/// <section class="warning">Experimental API</section>
///
/// The optimization process is undergoing active development and may change.
/// Our goal with these changes is to improve the performance of optimization and
/// reduce the complexity.
///
/// That being said, it is essential today to run optimize if you want the best
/// performance. It should be stable and safe to use in production, but it our
/// hope that the API may be simplified (or not even need to be called) in the future.
///
/// The frequency an application shoudl call optimize is based on the frequency of
/// The frequency an application should call optimize is based on the frequency of
/// data modifications. If data is frequently added, deleted, or updated then
/// optimize should be run frequently. A good rule of thumb is to run optimize if
/// you have added or modified 100,000 or more records or run more than 20 data

View File

@@ -64,6 +64,9 @@ pub enum OptimizeAction {
older_than: Option<Duration>,
/// Because they may be part of an in-progress transaction, files newer than 7 days old are not deleted by default.
/// If you are sure that there are no in-progress transactions, then you can set this to True to delete all files older than `older_than`.
///
/// **WARNING**: This should only be set to true if you can guarantee that no other process is
/// currently working on this dataset. Otherwise the dataset could be put into a corrupted state.
delete_unverified: Option<bool>,
/// If true, an error will be returned if there are any old versions that are still tagged.
error_if_tagged_old_versions: Option<bool>,
@@ -117,6 +120,10 @@ pub(crate) async fn optimize_indices(table: &NativeTable, options: &OptimizeOpti
/// If you are sure that there are no in-progress transactions, then you
/// can set this to True to delete all files older than `older_than`.
///
/// **WARNING**: This should only be set to true if you can guarantee that
/// no other process is currently working on this dataset. Otherwise the
/// dataset could be put into a corrupted state.
///
/// This calls into [lance::dataset::Dataset::cleanup_old_versions] and
/// returns the result.
pub(crate) async fn cleanup_old_versions(