feat: upgrade Lance to v0.37.0 (#2672)

Change logs:

* https://github.com/lancedb/lance/releases/tag/v0.37.0
* https://github.com/lancedb/lance/releases/tag/v0.36.0
This commit is contained in:
Will Jones
2025-09-23 13:41:47 -07:00
committed by GitHub
parent e921c90c1b
commit 1ab60fae7f
7 changed files with 109 additions and 60 deletions

100
Cargo.lock generated
View File

@@ -1153,7 +1153,7 @@ dependencies = [
"bitflags 2.9.4", "bitflags 2.9.4",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.11.0", "itertools 0.12.1",
"lazy_static", "lazy_static",
"lazycell", "lazycell",
"log", "log",
@@ -2929,6 +2929,18 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55"
[[package]]
name = "fastbloom"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18c1ddb9231d8554c2d6bdf4cfaabf0c59251658c68b6c95cd52dd0c513a912a"
dependencies = [
"getrandom 0.3.3",
"libm",
"rand 0.9.2",
"siphasher",
]
[[package]] [[package]]
name = "fastdivide" name = "fastdivide"
version = "0.4.2" version = "0.4.2"
@@ -3028,8 +3040,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]] [[package]]
name = "fsst" name = "fsst"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe0a0b1d16ce6b863be8ab766004d89ebf0779fd6ce31b0ef3bbc7fedaaad373"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"rand 0.9.2", "rand 0.9.2",
@@ -4206,8 +4219,9 @@ dependencies = [
[[package]] [[package]]
name = "lance" name = "lance"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42171f2af5d377e6bbcc8a8572144ee15b73a8f78ceb6160f1adeabf0d0f3e3c"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-arith", "arrow-arith",
@@ -4270,8 +4284,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-arrow" name = "lance-arrow"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25ef9499a1e581112f45fbf743fdc8e24830cda0bd13396b11c71aa6e6cba083"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-buffer", "arrow-buffer",
@@ -4289,8 +4304,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-bitpacking" name = "lance-bitpacking"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1101fffd5b161bbdc6e932d6c0a7f94cb1752b0f8cd6d18ef9064052ab901a84"
dependencies = [ dependencies = [
"arrayref", "arrayref",
"paste", "paste",
@@ -4299,8 +4315,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-core" name = "lance-core"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "527ee5e6472d058d8c66c702fbe318a3f60f971e652e60dcfc6349bdbc9b0733"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-buffer", "arrow-buffer",
@@ -4335,8 +4352,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-datafusion" name = "lance-datafusion"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65a80f7f15f2d941ec7b8253625cbb8e12081ea27584dd1fbc657fb9fb377f7a"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4365,8 +4383,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-datagen" name = "lance-datagen"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0495c8afa18f246ac4b337c47d7827560283783963dd2177862d91161478fd79"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4383,8 +4402,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-encoding" name = "lance-encoding"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e80e9ae49d68b95d58e77d9177f68983dce4f0803ef42840e1631b38dd66adc"
dependencies = [ dependencies = [
"arrow-arith", "arrow-arith",
"arrow-array", "arrow-array",
@@ -4412,6 +4432,7 @@ dependencies = [
"prost-types", "prost-types",
"rand 0.9.2", "rand 0.9.2",
"snafu", "snafu",
"strum",
"tokio", "tokio",
"tracing", "tracing",
"xxhash-rust", "xxhash-rust",
@@ -4420,8 +4441,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-file" name = "lance-file"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1707f9f5097b36c82d3a8524bb41c762c80d5dfa5e32aa7bfc6a1c0847a1cce"
dependencies = [ dependencies = [
"arrow-arith", "arrow-arith",
"arrow-array", "arrow-array",
@@ -4455,8 +4477,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-index" name = "lance-index"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28ab52586a5a7f5371a5abf4862968231f8c0232ce0780bc456f1ec16e9370f9"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4477,6 +4500,7 @@ dependencies = [
"datafusion-sql", "datafusion-sql",
"deepsize", "deepsize",
"dirs", "dirs",
"fastbloom",
"fst", "fst",
"futures", "futures",
"half", "half",
@@ -4491,6 +4515,7 @@ dependencies = [
"lance-io", "lance-io",
"lance-linalg", "lance-linalg",
"lance-table", "lance-table",
"libm",
"log", "log",
"num-traits", "num-traits",
"object_store", "object_store",
@@ -4507,13 +4532,15 @@ dependencies = [
"tempfile", "tempfile",
"tokio", "tokio",
"tracing", "tracing",
"twox-hash",
"uuid", "uuid",
] ]
[[package]] [[package]]
name = "lance-io" name = "lance-io"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d606f9f6a7f8ec2cacf28dfce7b2fc39e7db9f0ec77f907b8e47c756e3dd163b"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-arith", "arrow-arith",
@@ -4553,8 +4580,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-linalg" name = "lance-linalg"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9f1a94a5d966ff1eae817a835e3a57b34f73300f83a43bb28e7e2806695b8ba"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-buffer", "arrow-buffer",
@@ -4577,8 +4605,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-table" name = "lance-table"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fac5c0ca6e5c285645465b95fb99fc464a1fd22a6d4b32ae0e0760f06b4b8a7f"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4616,8 +4645,9 @@ dependencies = [
[[package]] [[package]]
name = "lance-testing" name = "lance-testing"
version = "0.35.0" version = "0.37.0"
source = "git+https://github.com/lancedb/lance.git?tag=v0.35.0-beta.4#e842a8f922b90c298c356dd1c6afdc83ca5253f2" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384acc1dd13379a2ae24f3e3635d9c1f4fb4dc1534f7ffd2740c268f2eb73455"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-schema", "arrow-schema",
@@ -7781,6 +7811,15 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "strum"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
dependencies = [
"strum_macros",
]
[[package]] [[package]]
name = "strum_macros" name = "strum_macros"
version = "0.25.3" version = "0.25.3"
@@ -8441,6 +8480,9 @@ name = "twox-hash"
version = "2.1.1" version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56"
dependencies = [
"rand 0.9.2",
]
[[package]] [[package]]
name = "typenum" name = "typenum"

View File

@@ -15,14 +15,14 @@ categories = ["database-implementations"]
rust-version = "1.78.0" rust-version = "1.78.0"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.35.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"] }
lance-io = { "version" = "=0.35.0", default-features = false, "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-io = { "version" = "=0.37.0", default-features = false }
lance-index = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-index = "=0.37.0"
lance-linalg = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-linalg = "=0.37.0"
lance-table = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-table = "=0.37.0"
lance-testing = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-testing = "=0.37.0"
lance-datafusion = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-datafusion = "=0.37.0"
lance-encoding = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" } lance-encoding = "=0.37.0"
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "55.1", optional = false } arrow = { version = "55.1", optional = false }
arrow-array = "55.1" arrow-array = "55.1"

View File

@@ -1,4 +1,5 @@
import argparse import argparse
import re
import sys import sys
import json import json
@@ -18,8 +19,12 @@ def run_command(command: str) -> str:
def get_latest_stable_version() -> str: def get_latest_stable_version() -> str:
version_line = run_command("cargo info lance | grep '^version:'") version_line = run_command("cargo info lance | grep '^version:'")
version = version_line.split(" ")[1].strip() # Example output: "version: 0.35.0 (latest 0.37.0)"
return version match = re.search(r'\(latest ([0-9.]+)\)', version_line)
if match:
return match.group(1)
# Fallback: use the first version after 'version:'
return version_line.split("version:")[1].split()[0].strip()
def get_latest_preview_version() -> str: def get_latest_preview_version() -> str:

View File

@@ -1470,10 +1470,7 @@ class Table(ABC):
be deleted unless they are at least 7 days old. If delete_unverified is True be deleted unless they are at least 7 days old. If delete_unverified is True
then these files will be deleted regardless of their age. then these files will be deleted regardless of their age.
retrain: bool, default False retrain: bool, default False
If True, retrain the vector indices, this would refine the IVF clustering This parameter is no longer used and is deprecated.
and quantization, which may improve the search accuracy. It's faster than
re-creating the index from scratch, so it's recommended to try this first,
when the data distribution has changed significantly.
Experimental API Experimental API
---------------- ----------------
@@ -2835,10 +2832,7 @@ class LanceTable(Table):
be deleted unless they are at least 7 days old. If delete_unverified is True be deleted unless they are at least 7 days old. If delete_unverified is True
then these files will be deleted regardless of their age. then these files will be deleted regardless of their age.
retrain: bool, default False retrain: bool, default False
If True, retrain the vector indices, this would refine the IVF clustering This parameter is no longer used and is deprecated.
and quantization, which may improve the search accuracy. It's faster than
re-creating the index from scratch, so it's recommended to try this first,
when the data distribution has changed significantly.
Experimental API Experimental API
---------------- ----------------
@@ -4298,10 +4292,7 @@ class AsyncTable:
be deleted unless they are at least 7 days old. If delete_unverified is True be deleted unless they are at least 7 days old. If delete_unverified is True
then these files will be deleted regardless of their age. then these files will be deleted regardless of their age.
retrain: bool, default False retrain: bool, default False
If True, retrain the vector indices, this would refine the IVF clustering This parameter is no longer used and is deprecated.
and quantization, which may improve the search accuracy. It's faster than
re-creating the index from scratch, so it's recommended to try this first,
when the data distribution has changed significantly.
Experimental API Experimental API
---------------- ----------------
@@ -4324,10 +4315,19 @@ class AsyncTable:
cleanup_since_ms: Optional[int] = None cleanup_since_ms: Optional[int] = None
if cleanup_older_than is not None: if cleanup_older_than is not None:
cleanup_since_ms = round(cleanup_older_than.total_seconds() * 1000) cleanup_since_ms = round(cleanup_older_than.total_seconds() * 1000)
if retrain:
import warnings
warnings.warn(
"The 'retrain' parameter is deprecated and will be removed in a "
"future version.",
DeprecationWarning,
)
return await self._inner.optimize( return await self._inner.optimize(
cleanup_since_ms=cleanup_since_ms, cleanup_since_ms=cleanup_since_ms,
delete_unverified=delete_unverified, delete_unverified=delete_unverified,
retrain=retrain,
) )
async def list_indices(self) -> Iterable[IndexConfig]: async def list_indices(self) -> Iterable[IndexConfig]:

View File

@@ -591,12 +591,11 @@ impl Table {
} }
/// Optimize the on-disk data by compacting and pruning old data, for better performance. /// Optimize the on-disk data by compacting and pruning old data, for better performance.
#[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None, retrain=None))] #[pyo3(signature = (cleanup_since_ms=None, delete_unverified=None))]
pub fn optimize( pub fn optimize(
self_: PyRef<'_, Self>, self_: PyRef<'_, Self>,
cleanup_since_ms: Option<u64>, cleanup_since_ms: Option<u64>,
delete_unverified: Option<bool>, delete_unverified: Option<bool>,
retrain: Option<bool>,
) -> PyResult<Bound<'_, PyAny>> { ) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone(); let inner = self_.inner_ref()?.clone();
let older_than = if let Some(ms) = cleanup_since_ms { let older_than = if let Some(ms) = cleanup_since_ms {
@@ -632,10 +631,9 @@ impl Table {
.prune .prune
.unwrap(); .unwrap();
inner inner
.optimize(lancedb::table::OptimizeAction::Index(match retrain { .optimize(lancedb::table::OptimizeAction::Index(
Some(true) => OptimizeOptions::retrain(), OptimizeOptions::default(),
_ => OptimizeOptions::default(), ))
}))
.await .await
.infer_error()?; .infer_error()?;
Ok(OptimizeStats { Ok(OptimizeStats {

View File

@@ -8,7 +8,7 @@
//! values //! values
use std::cmp::max; use std::cmp::max;
use lance::table::format::{Index, Manifest}; use lance::table::format::{IndexMetadata, Manifest};
use crate::DistanceType; use crate::DistanceType;
@@ -19,7 +19,7 @@ pub struct VectorIndex {
} }
impl VectorIndex { impl VectorIndex {
pub fn new_from_format(manifest: &Manifest, index: &Index) -> Self { pub fn new_from_format(manifest: &Manifest, index: &IndexMetadata) -> Self {
let fields = index let fields = index
.fields .fields
.iter() .iter()

View File

@@ -1976,6 +1976,8 @@ impl NativeTable {
/// Delete keys from the config /// Delete keys from the config
pub async fn delete_config_keys(&self, delete_keys: &[&str]) -> Result<()> { pub async fn delete_config_keys(&self, delete_keys: &[&str]) -> Result<()> {
let mut dataset = self.dataset.get_mut().await?; let mut dataset = self.dataset.get_mut().await?;
// TODO: update this when we implement metadata APIs
#[allow(deprecated)]
dataset.delete_config_keys(delete_keys).await?; dataset.delete_config_keys(delete_keys).await?;
Ok(()) Ok(())
} }
@@ -1986,6 +1988,8 @@ impl NativeTable {
upsert_values: impl IntoIterator<Item = (String, String)>, upsert_values: impl IntoIterator<Item = (String, String)>,
) -> Result<()> { ) -> Result<()> {
let mut dataset = self.dataset.get_mut().await?; let mut dataset = self.dataset.get_mut().await?;
// TODO: update this when we implement metadata APIs
#[allow(deprecated)]
dataset.replace_schema_metadata(upsert_values).await?; dataset.replace_schema_metadata(upsert_values).await?;
Ok(()) Ok(())
} }