From 2e197ef38790b2e1b4a7e3e517ba8d03982d0d68 Mon Sep 17 00:00:00 2001 From: Rob Meng Date: Tue, 21 May 2024 18:53:19 -0400 Subject: [PATCH] feat: upgrade lance to 0.11.0 (#1317) upgrade lance and make fixes for the upgrade --- .github/workflows/python.yml | 2 +- .github/workflows/rust.yml | 4 ++-- Cargo.toml | 8 ++++---- nodejs/src/table.rs | 13 ++++++++++++- python/pyproject.toml | 2 +- python/src/table.rs | 13 ++++++++++++- rust/lancedb/Cargo.toml | 4 ++-- rust/lancedb/src/connection.rs | 2 +- rust/lancedb/src/embeddings.rs | 2 +- rust/lancedb/src/table.rs | 4 +--- 10 files changed, 37 insertions(+), 17 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 0659766b..a4f19340 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -75,7 +75,7 @@ jobs: timeout-minutes: 30 strategy: matrix: - python-minor-version: ["8", "11"] + python-minor-version: ["9", "11"] runs-on: "ubuntu-22.04" defaults: run: diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 8a1201d3..c5b4e874 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -74,11 +74,11 @@ jobs: run: | sudo apt update sudo apt install -y protobuf-compiler libssl-dev - - name: Build - run: cargo build --all-features - name: Start S3 integration test environment working-directory: . run: docker compose up --detach --wait + - name: Build + run: cargo build --all-features - name: Run tests run: cargo test --all-features - name: Run examples diff --git a/Cargo.toml b/Cargo.toml index df6f739f..82c5cdd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"] categories = ["database-implementations"] [workspace.dependencies] -lance = { "version" = "=0.10.18", "features" = ["dynamodb"] } -lance-index = { "version" = "=0.10.18" } -lance-linalg = { "version" = "=0.10.18" } -lance-testing = { "version" = "=0.10.18" } +lance = { "version" = "=0.11.0", "features" = ["dynamodb"] } +lance-index = { "version" = "=0.11.0" } +lance-linalg = { "version" = "=0.11.0" } +lance-testing = { "version" = "=0.11.0" } # Note that this one does not include pyarrow arrow = { version = "51.0", optional = false } arrow-array = "51.0" diff --git a/nodejs/src/table.rs b/nodejs/src/table.rs index 69c4e67d..124594d3 100644 --- a/nodejs/src/table.rs +++ b/nodejs/src/table.rs @@ -267,6 +267,18 @@ impl Table { pub async fn optimize(&self, older_than_ms: Option) -> napi::Result { let inner = self.inner_ref()?; + let older_than = if let Some(ms) = older_than_ms { + if ms == i64::MIN { + return Err(napi::Error::from_reason(format!( + "older_than_ms can not be {}", + i32::MIN, + ))); + } + Duration::try_milliseconds(ms) + } else { + None + }; + let compaction_stats = inner .optimize(OptimizeAction::Compact { options: lancedb::table::CompactionOptions::default(), @@ -276,7 +288,6 @@ impl Table { .default_error()? .compaction .unwrap(); - let older_than = older_than_ms.map(Duration::milliseconds); let prune_stats = inner .optimize(OptimizeAction::Prune { older_than, diff --git a/python/pyproject.toml b/python/pyproject.toml index e28336ef..cd09df7b 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -3,7 +3,7 @@ name = "lancedb" # version in Cargo.toml dependencies = [ "deprecation", - "pylance==0.10.12", + "pylance==0.11.0", "ratelimiter~=1.0", "requests>=2.31.0", "retry>=0.9.2", diff --git a/python/src/table.rs b/python/src/table.rs index 7b1fa632..1e12d4c0 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -230,6 +230,18 @@ impl Table { pub fn optimize(self_: PyRef<'_, Self>, cleanup_since_ms: Option) -> PyResult<&PyAny> { let inner = self_.inner_ref()?.clone(); + let older_than = if let Some(ms) = cleanup_since_ms { + if ms > i64::MAX as u64 { + return Err(PyValueError::new_err(format!( + "cleanup_since_ms must be between {} and -{}", + i32::MAX, + i32::MAX + ))); + } + Duration::try_milliseconds(ms as i64) + } else { + None + }; future_into_py(self_.py(), async move { let compaction_stats = inner .optimize(OptimizeAction::Compact { @@ -240,7 +252,6 @@ impl Table { .infer_error()? .compaction .unwrap(); - let older_than = cleanup_since_ms.map(|since| Duration::milliseconds(since as i64)); let prune_stats = inner .optimize(OptimizeAction::Prune { older_than, diff --git a/rust/lancedb/Cargo.toml b/rust/lancedb/Cargo.toml index d9ef3ac6..ed963445 100644 --- a/rust/lancedb/Cargo.toml +++ b/rust/lancedb/Cargo.toml @@ -40,8 +40,8 @@ serde = { version = "^1" } serde_json = { version = "1" } # For remote feature reqwest = { version = "0.11.24", features = ["gzip", "json"], optional = true } -polars-arrow = { version = ">=0.37", optional = true } -polars = { version = ">=0.37", optional = true} +polars-arrow = { version = ">=0.37,<0.40.0", optional = true } +polars = { version = ">=0.37,<0.40.0", optional = true} [dev-dependencies] tempfile = "3.5.0" diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index baea0cea..60a275f3 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -195,7 +195,7 @@ impl CreateTableBuilder { .embedding_registry() .get(&definition.embedding_name) .ok_or_else(|| Error::EmbeddingFunctionNotFound { - name: definition.embedding_name.to_string(), + name: definition.embedding_name.clone(), reason: "No embedding function found in the connection's embedding_registry" .to_string(), })?; diff --git a/rust/lancedb/src/embeddings.rs b/rust/lancedb/src/embeddings.rs index 07a5725a..007d0543 100644 --- a/rust/lancedb/src/embeddings.rs +++ b/rust/lancedb/src/embeddings.rs @@ -155,7 +155,7 @@ impl MaybeEmbedded { } None => { return Err(Error::EmbeddingFunctionNotFound { - name: embedding_def.embedding_name.to_string(), + name: embedding_def.embedding_name.clone(), reason: format!( "Table was defined with an embedding column `{}` but no embedding function was found with that name within the registry.", embedding_def.embedding_name diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index c7b56197..4485d242 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -1301,7 +1301,6 @@ impl NativeTable { num_partitions as usize, /*num_bits=*/ 8, num_sub_vectors as usize, - false, index.distance_type.into(), index.max_iterations as usize, ); @@ -1345,7 +1344,6 @@ impl NativeTable { ivf_params.max_iters = index.max_iterations as usize; let hnsw_params = HnswBuildParams::default() .num_edges(index.m as usize) - .max_num_edges(index.m as usize * 2) .ef_construction(index.ef_construction as usize); let sq_params = SQBuildParams { sample_rate: index.sample_rate as usize, @@ -1731,7 +1729,7 @@ impl TableInternal for NativeTable { } => { stats.prune = Some( self.cleanup_old_versions( - older_than.unwrap_or(Duration::days(7)), + older_than.unwrap_or(Duration::try_days(7).expect("valid delta")), delete_unverified, ) .await?,