From 1dadb2aefa8c7e20098f8bdd4fad45383a8ebab7 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Mon, 30 Jun 2025 11:10:53 -0700 Subject: [PATCH] feat: upgrade to lance 0.31.0-beta.1 (#2469) ## Summary by CodeRabbit * **Chores** * Updated dependencies to newer versions for improved compatibility and stability. * **Refactor** * Improved internal handling of data ranges and stream lifetimes for enhanced performance and reliability. * Simplified code style for Python query object conversions without affecting functionality. --- Cargo.lock | 146 +++++++----------- Cargo.toml | 18 +-- node/package-lock.json | 60 +++++++ python/src/query.rs | 27 ++-- rust/lancedb/src/io/object_store.rs | 2 +- .../src/io/object_store/io_tracking.rs | 12 +- 6 files changed, 147 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e04d091..faad3c9b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1852,7 +1852,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.12.2", + "object_store", "parking_lot", "rand 0.8.5", "regex", @@ -1884,7 +1884,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.12.2", + "object_store", "parking_lot", "tokio", ] @@ -1908,7 +1908,7 @@ dependencies = [ "datafusion-session", "futures", "log", - "object_store 0.12.2", + "object_store", "tokio", ] @@ -1927,7 +1927,7 @@ dependencies = [ "indexmap 2.9.0", "libc", "log", - "object_store 0.12.2", + "object_store", "paste", "sqlparser 0.55.0", "tokio", @@ -1967,7 +1967,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", - "object_store 0.12.2", + "object_store", "rand 0.8.5", "tokio", "url", @@ -1993,7 +1993,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.12.2", + "object_store", "regex", "tokio", ] @@ -2018,7 +2018,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "object_store 0.12.2", + "object_store", "serde_json", "tokio", ] @@ -2041,7 +2041,7 @@ dependencies = [ "datafusion-expr", "futures", "log", - "object_store 0.12.2", + "object_store", "parking_lot", "rand 0.8.5", "tempfile", @@ -2340,7 +2340,7 @@ dependencies = [ "futures", "itertools 0.14.0", "log", - "object_store 0.12.2", + "object_store", "parking_lot", "tokio", ] @@ -2813,9 +2813,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b6a55335126d20524dc83cf0638b7ca1b5d9736f9064a89c47e4d028cbaccdb" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "rand 0.8.5", ] @@ -3907,9 +3906,8 @@ dependencies = [ [[package]] name = "lance" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a9bf2cf9ff1d8b8a8c822cf4aaec7023fbe056d3348dce347957695470bd19" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow", "arrow-arith", @@ -3952,7 +3950,7 @@ dependencies = [ "lazy_static", "log", "moka", - "object_store 0.11.2", + "object_store", "permutation", "pin-project", "prost", @@ -3972,9 +3970,8 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82fc2b0dd2598f4b390445d63a3906f84d928c250b208d382d4cfc22681b23c0" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow-array", "arrow-buffer", @@ -3991,9 +3988,8 @@ dependencies = [ [[package]] name = "lance-core" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4118c6e2ac2d26ff80e55708f337c4593381a32751f2a79a03d92452885bd648" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow-array", "arrow-buffer", @@ -4013,7 +4009,7 @@ dependencies = [ "mock_instant", "moka", "num_cpus", - "object_store 0.11.2", + "object_store", "pin-project", "prost", "rand 0.8.5", @@ -4029,9 +4025,8 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf8b01e9a5f15d4975423ea1495df85cf36f9036c3ed999190d4631ffbd28b6" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow", "arrow-array", @@ -4060,9 +4055,8 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fbedb84243fb2fe255b4e9ac298019d2e93e83fcc9ce2eb67a4ac7cab427dda" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow", "arrow-array", @@ -4077,9 +4071,8 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a0e078414cce96da2e2b37290d0b38a81ba6b0ebcad6806b231c2cd8d04427a" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrayref", "arrow", @@ -4113,14 +4106,14 @@ dependencies = [ "snafu", "tokio", "tracing", + "xxhash-rust", "zstd", ] [[package]] name = "lance-file" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce7deba5b59118f7ef726859ace192b7cc7da4e6639147d2a3908a2de621ce98" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow-arith", "arrow-array", @@ -4141,7 +4134,7 @@ dependencies = [ "lance-io", "log", "num-traits", - "object_store 0.11.2", + "object_store", "prost", "prost-build", "prost-types", @@ -4154,9 +4147,8 @@ dependencies = [ [[package]] name = "lance-index" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bee1aecc60c759436d8f952e2d9c4e93d1940bfbdc1869068b4ac6b01e86b2f" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow", "arrow-array", @@ -4193,7 +4185,7 @@ dependencies = [ "log", "moka", "num-traits", - "object_store 0.11.2", + "object_store", "prost", "prost-build", "rand 0.8.5", @@ -4211,9 +4203,8 @@ dependencies = [ [[package]] name = "lance-io" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a48f6a3f5433ca5095993fcd8bb47efbf473af852b9aca1e175a3d7bbf67fd" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow", "arrow-arith", @@ -4237,7 +4228,7 @@ dependencies = [ "lance-core", "lazy_static", "log", - "object_store 0.11.2", + "object_store", "path_abs", "pin-project", "prost", @@ -4252,9 +4243,8 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "620dedc792311862fc336b2651e825d2b450bbade7bfc819b7b182c3bb585c1e" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow-array", "arrow-ord", @@ -4277,9 +4267,8 @@ dependencies = [ [[package]] name = "lance-table" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b010312330943c5e81628722a50e3679688d96065348659b7913964f13765cf" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow", "arrow-array", @@ -4300,7 +4289,7 @@ dependencies = [ "lance-io", "lazy_static", "log", - "object_store 0.11.2", + "object_store", "prost", "prost-build", "prost-types", @@ -4318,9 +4307,8 @@ dependencies = [ [[package]] name = "lance-testing" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efa10957cdadef40e853896a67282cd29898775b29715eec42dd49bc3b3c8554" +version = "0.31.0" +source = "git+https://github.com/lancedb/lance.git?tag=v0.31.0-beta.1#c594f7808f17d3b7749270085e1280372a7ce8d4" dependencies = [ "arrow-array", "arrow-schema", @@ -4378,7 +4366,7 @@ dependencies = [ "log", "moka", "num-traits", - "object_store 0.11.2", + "object_store", "pin-project", "polars", "polars-arrow", @@ -4435,7 +4423,7 @@ dependencies = [ "lancedb", "lzma-sys", "neon", - "object_store 0.11.2", + "object_store", "once_cell", "snafu", "tokio", @@ -5172,38 +5160,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "object_store" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" -dependencies = [ - "async-trait", - "base64 0.22.1", - "bytes", - "chrono", - "futures", - "httparse", - "humantime", - "hyper 1.6.0", - "itertools 0.13.0", - "md-5", - "parking_lot", - "percent-encoding", - "quick-xml", - "rand 0.8.5", - "reqwest", - "ring", - "rustls-pemfile 2.2.0", - "serde", - "serde_json", - "snafu", - "tokio", - "tracing", - "url", - "walkdir", -] - [[package]] name = "object_store" version = "0.12.2" @@ -5211,14 +5167,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7781f96d79ed0f961a7021424ab01840efbda64ae7a505aaea195efc91eaaec4" dependencies = [ "async-trait", + "base64 0.22.1", "bytes", "chrono", + "form_urlencoded", "futures", "http 1.3.1", + "http-body-util", + "httparse", "humantime", + "hyper 1.6.0", "itertools 0.14.0", + "md-5", "parking_lot", "percent-encoding", + "quick-xml", + "rand 0.9.1", + "reqwest", + "ring", + "rustls-pemfile 2.2.0", + "serde", + "serde_json", + "serde_urlencoded", "thiserror 2.0.12", "tokio", "tracing", diff --git a/Cargo.toml b/Cargo.toml index c1b658b2..568e87e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,14 +21,14 @@ categories = ["database-implementations"] rust-version = "1.78.0" [workspace.dependencies] -lance = { "version" = "=0.30.0", "features" = ["dynamodb"] } -lance-io = "=0.30.0" -lance-index = "=0.30.0" -lance-linalg = "=0.30.0" -lance-table = "=0.30.0" -lance-testing = "=0.30.0" -lance-datafusion = "=0.30.0" -lance-encoding = "=0.30.0" +lance = { "version" = "=0.31.0", tag="v0.31.0-beta.1", git="https://github.com/lancedb/lance.git", features = ["dynamodb"] } +lance-io = { "version" = "=0.31.0", tag="v0.31.0-beta.1", git="https://github.com/lancedb/lance.git" } +lance-index = { "version" = "=0.31.0", tag="v0.31.0-beta.1", git="https://github.com/lancedb/lance.git" } +lance-linalg = { "version" = "=0.31.0", tag="v0.31.0-beta.1", git="https://github.com/lancedb/lance.git" } +lance-table = { "version" = "=0.31.0", tag="v0.31.0-beta.1", git="https://github.com/lancedb/lance.git" } +lance-testing = { "version" = "=0.31.0", tag="v0.31.0-beta.1", git="https://github.com/lancedb/lance.git" } +lance-datafusion = { "version" = "=0.31.0", tag="v0.31.0-beta.1", git="https://github.com/lancedb/lance.git" } +lance-encoding = { "version" = "=0.31.0", tag="v0.31.0-beta.1", git="https://github.com/lancedb/lance.git" } # Note that this one does not include pyarrow arrow = { version = "55.1", optional = false } arrow-array = "55.1" @@ -52,7 +52,7 @@ half = { "version" = "=2.5.0", default-features = false, features = [ futures = "0" log = "0.4" moka = { version = "0.12", features = ["future"] } -object_store = "0.11.0" +object_store = "0.12.0" pin-project = "1.0.7" snafu = "0.8" url = "2" diff --git a/node/package-lock.json b/node/package-lock.json index 5057b379..3c9b51fe 100644 --- a/node/package-lock.json +++ b/node/package-lock.json @@ -326,6 +326,66 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, + "node_modules/@lancedb/vectordb-darwin-arm64": { + "version": "0.21.0", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.21.0.tgz", + "integrity": "sha512-FTKbdYG36mvQ75tId+esyRfRjIBzryRhAp/6h51tiXy8gsq/TButuiPdqIXeonNModEjhu8wkzsGFwgjCcePow==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@lancedb/vectordb-darwin-x64": { + "version": "0.21.0", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.21.0.tgz", + "integrity": "sha512-vGaFBr2sQZWE0mudg3LGTHiRE7p2Qce2ogiE2VAf1DLAJ4MrIhgVmEttf966ausIwNCgml+5AzUntw6zC0Oyuw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@lancedb/vectordb-linux-arm64-gnu": { + "version": "0.21.0", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.21.0.tgz", + "integrity": "sha512-KlxqhnX4eBN6rDqrPgf/x/vLpnHK2UcIzNLpiOZzSAhooCmKmnNpfs/EXt+KRFloEQMy25AHpMpqkSPv1Q2oDA==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@lancedb/vectordb-linux-x64-gnu": { + "version": "0.21.0", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.21.0.tgz", + "integrity": "sha512-t7dkFV6kga3rqXR1rH460GdpSVuY0tw7CIc0KqsIIkBcXzUPA1n0QDoazdwPQ1MXzG/+F5WWCTp3dYWx2vP0Lw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@lancedb/vectordb-win32-x64-msvc": { + "version": "0.21.0", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.21.0.tgz", + "integrity": "sha512-yovkW61RECBTsu0S527BX1uW0jCAZK9MAsJTknXmDjp78figx4/AyI5ajT63u/Uo4EKoheeNiiLdyU4v+A9YVw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ] + }, "node_modules/@neon-rs/cli": { "version": "0.0.160", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", diff --git a/python/src/query.rs b/python/src/query.rs index a7e92930..8fd87e7a 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -52,7 +52,7 @@ impl FromPyObject<'_> for PyLanceDB { let operator = ob.getattr("operator")?.extract::()?; let prefix_length = ob.getattr("prefix_length")?.extract()?; - Ok(PyLanceDB( + Ok(Self( MatchQuery::new(query) .with_column(Some(column)) .with_boost(boost) @@ -70,7 +70,7 @@ impl FromPyObject<'_> for PyLanceDB { let column = ob.getattr("column")?.extract()?; let slop = ob.getattr("slop")?.extract()?; - Ok(PyLanceDB( + Ok(Self( PhraseQuery::new(query) .with_column(Some(column)) .with_slop(slop) @@ -78,10 +78,10 @@ impl FromPyObject<'_> for PyLanceDB { )) } "BoostQuery" => { - let positive: PyLanceDB = ob.getattr("positive")?.extract()?; - let negative: PyLanceDB = ob.getattr("negative")?.extract()?; + let positive: Self = ob.getattr("positive")?.extract()?; + let negative: Self = ob.getattr("negative")?.extract()?; let negative_boost = ob.getattr("negative_boost")?.extract()?; - Ok(PyLanceDB( + Ok(Self( BoostQuery::new(positive.0, negative.0, negative_boost).into(), )) } @@ -103,18 +103,17 @@ impl FromPyObject<'_> for PyLanceDB { let op = Operator::try_from(operator.as_str()) .map_err(|e| PyValueError::new_err(format!("Invalid operator: {}", e)))?; - Ok(PyLanceDB(q.with_operator(op).into())) + Ok(Self(q.with_operator(op).into())) } "BooleanQuery" => { - let queries: Vec<(String, PyLanceDB)> = - ob.getattr("queries")?.extract()?; + let queries: Vec<(String, Self)> = ob.getattr("queries")?.extract()?; let mut sub_queries = Vec::with_capacity(queries.len()); for (occur, q) in queries { let occur = Occur::try_from(occur.as_str()) .map_err(|e| PyValueError::new_err(e.to_string()))?; sub_queries.push((occur, q.0)); } - Ok(PyLanceDB(BooleanQuery::new(sub_queries).into())) + Ok(Self(BooleanQuery::new(sub_queries).into())) } name => Err(PyValueError::new_err(format!( "Unsupported FTS query type: {}", @@ -155,8 +154,8 @@ impl<'py> IntoPyObject<'py> for PyLanceDB { .call((query.terms, query.column.unwrap()), Some(&kwargs)) } FtsQuery::Boost(query) => { - let positive = PyLanceDB(query.positive.as_ref().clone()).into_pyobject(py)?; - let negative = PyLanceDB(query.negative.as_ref().clone()).into_pyobject(py)?; + let positive = Self(query.positive.as_ref().clone()).into_pyobject(py)?; + let negative = Self(query.negative.as_ref().clone()).into_pyobject(py)?; let kwargs = PyDict::new(py); kwargs.set_item("negative_boost", query.negative_boost)?; namespace @@ -182,13 +181,13 @@ impl<'py> IntoPyObject<'py> for PyLanceDB { query.should.len() + query.must.len() + query.must_not.len(), ); for q in query.should { - queries.push((Occur::Should.into(), PyLanceDB(q).into_pyobject(py)?)); + queries.push((Occur::Should.into(), Self(q).into_pyobject(py)?)); } for q in query.must { - queries.push((Occur::Must.into(), PyLanceDB(q).into_pyobject(py)?)); + queries.push((Occur::Must.into(), Self(q).into_pyobject(py)?)); } for q in query.must_not { - queries.push((Occur::MustNot.into(), PyLanceDB(q).into_pyobject(py)?)); + queries.push((Occur::MustNot.into(), Self(q).into_pyobject(py)?)); } namespace diff --git a/rust/lancedb/src/io/object_store.rs b/rust/lancedb/src/io/object_store.rs index 47e58d1e..cb76a1e0 100644 --- a/rust/lancedb/src/io/object_store.rs +++ b/rust/lancedb/src/io/object_store.rs @@ -107,7 +107,7 @@ impl ObjectStore for MirroringObjectStore { self.primary.delete(location).await } - fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result> { + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result> { self.primary.list(prefix) } diff --git a/rust/lancedb/src/io/object_store/io_tracking.rs b/rust/lancedb/src/io/object_store/io_tracking.rs index 71c68068..7d580e6f 100644 --- a/rust/lancedb/src/io/object_store/io_tracking.rs +++ b/rust/lancedb/src/io/object_store/io_tracking.rs @@ -119,7 +119,7 @@ impl ObjectStore for IoTrackingStore { let result = self.target.get(location).await; if let Ok(result) = &result { let num_bytes = result.range.end - result.range.start; - self.record_read(num_bytes as u64); + self.record_read(num_bytes); } result } @@ -128,12 +128,12 @@ impl ObjectStore for IoTrackingStore { let result = self.target.get_opts(location, options).await; if let Ok(result) = &result { let num_bytes = result.range.end - result.range.start; - self.record_read(num_bytes as u64); + self.record_read(num_bytes); } result } - async fn get_range(&self, location: &Path, range: std::ops::Range) -> OSResult { + async fn get_range(&self, location: &Path, range: std::ops::Range) -> OSResult { let result = self.target.get_range(location, range).await; if let Ok(result) = &result { self.record_read(result.len() as u64); @@ -144,7 +144,7 @@ impl ObjectStore for IoTrackingStore { async fn get_ranges( &self, location: &Path, - ranges: &[std::ops::Range], + ranges: &[std::ops::Range], ) -> OSResult> { let result = self.target.get_ranges(location, ranges).await; if let Ok(result) = &result { @@ -170,7 +170,7 @@ impl ObjectStore for IoTrackingStore { self.target.delete_stream(locations) } - fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, OSResult> { + fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, OSResult> { self.record_read(0); self.target.list(prefix) } @@ -179,7 +179,7 @@ impl ObjectStore for IoTrackingStore { &self, prefix: Option<&Path>, offset: &Path, - ) -> BoxStream<'_, OSResult> { + ) -> BoxStream<'static, OSResult> { self.record_read(0); self.target.list_with_offset(prefix, offset) }