From 1ab60fae7fe8a584f1c5df57b6029027d566e44e Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 23 Sep 2025 13:41:47 -0700 Subject: [PATCH] feat: upgrade Lance to v0.37.0 (#2672) Change logs: * https://github.com/lancedb/lance/releases/tag/v0.37.0 * https://github.com/lancedb/lance/releases/tag/v0.36.0 --- Cargo.lock | 100 ++++++++++++++++++++++--------- Cargo.toml | 16 ++--- ci/set_lance_version.py | 9 ++- python/python/lancedb/table.py | 26 ++++---- python/src/table.rs | 10 ++-- rust/lancedb/src/index/vector.rs | 4 +- rust/lancedb/src/table.rs | 4 ++ 7 files changed, 109 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1c4f92db..98f437c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1153,7 +1153,7 @@ dependencies = [ "bitflags 2.9.4", "cexpr", "clang-sys", - "itertools 0.11.0", + "itertools 0.12.1", "lazy_static", "lazycell", "log", @@ -2929,6 +2929,18 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" +[[package]] +name = "fastbloom" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18c1ddb9231d8554c2d6bdf4cfaabf0c59251658c68b6c95cd52dd0c513a912a" +dependencies = [ + "getrandom 0.3.3", + "libm", + "rand 0.9.2", + "siphasher", +] + [[package]] name = "fastdivide" version = "0.4.2" @@ -3028,8 +3040,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0a0b1d16ce6b863be8ab766004d89ebf0779fd6ce31b0ef3bbc7fedaaad373" dependencies = [ "arrow-array", "rand 0.9.2", @@ -4206,8 +4219,9 @@ dependencies = [ [[package]] name = "lance" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42171f2af5d377e6bbcc8a8572144ee15b73a8f78ceb6160f1adeabf0d0f3e3c" dependencies = [ "arrow", "arrow-arith", @@ -4270,8 +4284,9 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ef9499a1e581112f45fbf743fdc8e24830cda0bd13396b11c71aa6e6cba083" dependencies = [ "arrow-array", "arrow-buffer", @@ -4289,8 +4304,9 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1101fffd5b161bbdc6e932d6c0a7f94cb1752b0f8cd6d18ef9064052ab901a84" dependencies = [ "arrayref", "paste", @@ -4299,8 +4315,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527ee5e6472d058d8c66c702fbe318a3f60f971e652e60dcfc6349bdbc9b0733" dependencies = [ "arrow-array", "arrow-buffer", @@ -4335,8 +4352,9 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65a80f7f15f2d941ec7b8253625cbb8e12081ea27584dd1fbc657fb9fb377f7a" dependencies = [ "arrow", "arrow-array", @@ -4365,8 +4383,9 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0495c8afa18f246ac4b337c47d7827560283783963dd2177862d91161478fd79" dependencies = [ "arrow", "arrow-array", @@ -4383,8 +4402,9 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e80e9ae49d68b95d58e77d9177f68983dce4f0803ef42840e1631b38dd66adc" dependencies = [ "arrow-arith", "arrow-array", @@ -4412,6 +4432,7 @@ dependencies = [ "prost-types", "rand 0.9.2", "snafu", + "strum", "tokio", "tracing", "xxhash-rust", @@ -4420,8 +4441,9 @@ dependencies = [ [[package]] name = "lance-file" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1707f9f5097b36c82d3a8524bb41c762c80d5dfa5e32aa7bfc6a1c0847a1cce" dependencies = [ "arrow-arith", "arrow-array", @@ -4455,8 +4477,9 @@ dependencies = [ [[package]] name = "lance-index" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28ab52586a5a7f5371a5abf4862968231f8c0232ce0780bc456f1ec16e9370f9" dependencies = [ "arrow", "arrow-array", @@ -4477,6 +4500,7 @@ dependencies = [ "datafusion-sql", "deepsize", "dirs", + "fastbloom", "fst", "futures", "half", @@ -4491,6 +4515,7 @@ dependencies = [ "lance-io", "lance-linalg", "lance-table", + "libm", "log", "num-traits", "object_store", @@ -4507,13 +4532,15 @@ dependencies = [ "tempfile", "tokio", "tracing", + "twox-hash", "uuid", ] [[package]] name = "lance-io" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d606f9f6a7f8ec2cacf28dfce7b2fc39e7db9f0ec77f907b8e47c756e3dd163b" dependencies = [ "arrow", "arrow-arith", @@ -4553,8 +4580,9 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9f1a94a5d966ff1eae817a835e3a57b34f73300f83a43bb28e7e2806695b8ba" dependencies = [ "arrow-array", "arrow-buffer", @@ -4577,8 +4605,9 @@ dependencies = [ [[package]] name = "lance-table" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fac5c0ca6e5c285645465b95fb99fc464a1fd22a6d4b32ae0e0760f06b4b8a7f" dependencies = [ "arrow", "arrow-array", @@ -4616,8 +4645,9 @@ dependencies = [ [[package]] name = "lance-testing" -version = "0.35.0" -source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384acc1dd13379a2ae24f3e3635d9c1f4fb4dc1534f7ffd2740c268f2eb73455" dependencies = [ "arrow-array", "arrow-schema", @@ -7781,6 +7811,15 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros", +] + [[package]] name = "strum_macros" version = "0.25.3" @@ -8441,6 +8480,9 @@ name = "twox-hash" version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" +dependencies = [ + "rand 0.9.2", +] [[package]] name = "typenum" diff --git a/Cargo.toml b/Cargo.toml index 044c10d4..bf72f0fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,14 +15,14 @@ categories = ["database-implementations"] rust-version = "1.78.0" [workspace.dependencies] -lance = { "version" = "=0.35.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } -lance-io = { "version" = "=0.35.0", default-features = false, "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } -lance-index = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } -lance-linalg = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } -lance-table = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } -lance-testing = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } -lance-datafusion = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } -lance-encoding = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } +lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"] } +lance-io = { "version" = "=0.37.0", default-features = false } +lance-index = "=0.37.0" +lance-linalg = "=0.37.0" +lance-table = "=0.37.0" +lance-testing = "=0.37.0" +lance-datafusion = "=0.37.0" +lance-encoding = "=0.37.0" # Note that this one does not include pyarrow arrow = { version = "55.1", optional = false } arrow-array = "55.1" diff --git a/ci/set_lance_version.py b/ci/set_lance_version.py index 51d806dc..5e110490 100644 --- a/ci/set_lance_version.py +++ b/ci/set_lance_version.py @@ -1,4 +1,5 @@ import argparse +import re import sys import json @@ -18,8 +19,12 @@ def run_command(command: str) -> str: def get_latest_stable_version() -> str: version_line = run_command("cargo info lance | grep '^version:'") - version = version_line.split(" ")[1].strip() - return version + # Example output: "version: 0.35.0 (latest 0.37.0)" + match = re.search(r'\(latest ([0-9.]+)\)', version_line) + if match: + return match.group(1) + # Fallback: use the first version after 'version:' + return version_line.split("version:")[1].split()[0].strip() def get_latest_preview_version() -> str: diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 21333f8d..7efae5d8 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -1470,10 +1470,7 @@ class Table(ABC): be deleted unless they are at least 7 days old. If delete_unverified is True then these files will be deleted regardless of their age. retrain: bool, default False - If True, retrain the vector indices, this would refine the IVF clustering - and quantization, which may improve the search accuracy. It's faster than - re-creating the index from scratch, so it's recommended to try this first, - when the data distribution has changed significantly. + This parameter is no longer used and is deprecated. Experimental API ---------------- @@ -2835,10 +2832,7 @@ class LanceTable(Table): be deleted unless they are at least 7 days old. If delete_unverified is True then these files will be deleted regardless of their age. retrain: bool, default False - If True, retrain the vector indices, this would refine the IVF clustering - and quantization, which may improve the search accuracy. It's faster than - re-creating the index from scratch, so it's recommended to try this first, - when the data distribution has changed significantly. + This parameter is no longer used and is deprecated. Experimental API ---------------- @@ -4298,10 +4292,7 @@ class AsyncTable: be deleted unless they are at least 7 days old. If delete_unverified is True then these files will be deleted regardless of their age. retrain: bool, default False - If True, retrain the vector indices, this would refine the IVF clustering - and quantization, which may improve the search accuracy. It's faster than - re-creating the index from scratch, so it's recommended to try this first, - when the data distribution has changed significantly. + This parameter is no longer used and is deprecated. Experimental API ---------------- @@ -4324,10 +4315,19 @@ class AsyncTable: cleanup_since_ms: Optional[int] = None if cleanup_older_than is not None: cleanup_since_ms = round(cleanup_older_than.total_seconds() * 1000) + + if retrain: + import warnings + + warnings.warn( + "The 'retrain' parameter is deprecated and will be removed in a " + "future version.", + DeprecationWarning, + ) + return await self._inner.optimize( cleanup_since_ms=cleanup_since_ms, delete_unverified=delete_unverified, - retrain=retrain, ) async def list_indices(self) -> Iterable[IndexConfig]: diff --git a/python/src/table.rs b/python/src/table.rs index c73fae43..dafd79b5 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -591,12 +591,11 @@ impl Table { } /// Optimize the on-disk data by compacting and pruning old data, for better performance. - #[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None, retrain=None))] + #[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None))] pub fn optimize( self_: PyRef<'_, Self>, cleanup_since_ms: Option, delete_unverified: Option, - retrain: Option, ) -> PyResult> { let inner = self_.inner_ref()?.clone(); let older_than = if let Some(ms) = cleanup_since_ms { @@ -632,10 +631,9 @@ impl Table { .prune .unwrap(); inner - .optimize(lancedb::table::OptimizeAction::Index(match retrain { - Some(true) => OptimizeOptions::retrain(), - _ => OptimizeOptions::default(), - })) + .optimize(lancedb::table::OptimizeAction::Index( + OptimizeOptions::default(), + )) .await .infer_error()?; Ok(OptimizeStats { diff --git a/rust/lancedb/src/index/vector.rs b/rust/lancedb/src/index/vector.rs index 684c3839..bf16ec6c 100644 --- a/rust/lancedb/src/index/vector.rs +++ b/rust/lancedb/src/index/vector.rs @@ -8,7 +8,7 @@ //! values use std::cmp::max; -use lance::table::format::{Index, Manifest}; +use lance::table::format::{IndexMetadata, Manifest}; use crate::DistanceType; @@ -19,7 +19,7 @@ pub struct VectorIndex { } impl VectorIndex { - pub fn new_from_format(manifest: &Manifest, index: &Index) -> Self { + pub fn new_from_format(manifest: &Manifest, index: &IndexMetadata) -> Self { let fields = index .fields .iter() diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index 33187d1f..b4d6f4bb 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -1976,6 +1976,8 @@ impl NativeTable { /// Delete keys from the config pub async fn delete_config_keys(&self, delete_keys: &[&str]) -> Result<()> { let mut dataset = self.dataset.get_mut().await?; + // TODO: update this when we implement metadata APIs + #[allow(deprecated)] dataset.delete_config_keys(delete_keys).await?; Ok(()) } @@ -1986,6 +1988,8 @@ impl NativeTable { upsert_values: impl IntoIterator, ) -> Result<()> { let mut dataset = self.dataset.get_mut().await?; + // TODO: update this when we implement metadata APIs + #[allow(deprecated)] dataset.replace_schema_metadata(upsert_values).await?; Ok(()) }