feat: upgrade lance to 0.11.0 (#1317)

upgrade lance and make fixes for the upgrade
This commit is contained in:
Rob Meng
2024-05-21 18:53:19 -04:00
committed by GitHub
parent 4f512af024
commit 2e197ef387
10 changed files with 37 additions and 17 deletions

View File

@@ -75,7 +75,7 @@ jobs:
timeout-minutes: 30 timeout-minutes: 30
strategy: strategy:
matrix: matrix:
python-minor-version: ["8", "11"] python-minor-version: ["9", "11"]
runs-on: "ubuntu-22.04" runs-on: "ubuntu-22.04"
defaults: defaults:
run: run:

View File

@@ -74,11 +74,11 @@ jobs:
run: | run: |
sudo apt update sudo apt update
sudo apt install -y protobuf-compiler libssl-dev sudo apt install -y protobuf-compiler libssl-dev
- name: Build
run: cargo build --all-features
- name: Start S3 integration test environment - name: Start S3 integration test environment
working-directory: . working-directory: .
run: docker compose up --detach --wait run: docker compose up --detach --wait
- name: Build
run: cargo build --all-features
- name: Run tests - name: Run tests
run: cargo test --all-features run: cargo test --all-features
- name: Run examples - name: Run examples

View File

@@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"] categories = ["database-implementations"]
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.10.18", "features" = ["dynamodb"] } lance = { "version" = "=0.11.0", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.10.18" } lance-index = { "version" = "=0.11.0" }
lance-linalg = { "version" = "=0.10.18" } lance-linalg = { "version" = "=0.11.0" }
lance-testing = { "version" = "=0.10.18" } lance-testing = { "version" = "=0.11.0" }
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "51.0", optional = false } arrow = { version = "51.0", optional = false }
arrow-array = "51.0" arrow-array = "51.0"

View File

@@ -267,6 +267,18 @@ impl Table {
pub async fn optimize(&self, older_than_ms: Option<i64>) -> napi::Result<OptimizeStats> { pub async fn optimize(&self, older_than_ms: Option<i64>) -> napi::Result<OptimizeStats> {
let inner = self.inner_ref()?; let inner = self.inner_ref()?;
let older_than = if let Some(ms) = older_than_ms {
if ms == i64::MIN {
return Err(napi::Error::from_reason(format!(
"older_than_ms can not be {}",
i32::MIN,
)));
}
Duration::try_milliseconds(ms)
} else {
None
};
let compaction_stats = inner let compaction_stats = inner
.optimize(OptimizeAction::Compact { .optimize(OptimizeAction::Compact {
options: lancedb::table::CompactionOptions::default(), options: lancedb::table::CompactionOptions::default(),
@@ -276,7 +288,6 @@ impl Table {
.default_error()? .default_error()?
.compaction .compaction
.unwrap(); .unwrap();
let older_than = older_than_ms.map(Duration::milliseconds);
let prune_stats = inner let prune_stats = inner
.optimize(OptimizeAction::Prune { .optimize(OptimizeAction::Prune {
older_than, older_than,

View File

@@ -3,7 +3,7 @@ name = "lancedb"
# version in Cargo.toml # version in Cargo.toml
dependencies = [ dependencies = [
"deprecation", "deprecation",
"pylance==0.10.12", "pylance==0.11.0",
"ratelimiter~=1.0", "ratelimiter~=1.0",
"requests>=2.31.0", "requests>=2.31.0",
"retry>=0.9.2", "retry>=0.9.2",

View File

@@ -230,6 +230,18 @@ impl Table {
pub fn optimize(self_: PyRef<'_, Self>, cleanup_since_ms: Option<u64>) -> PyResult<&PyAny> { pub fn optimize(self_: PyRef<'_, Self>, cleanup_since_ms: Option<u64>) -> PyResult<&PyAny> {
let inner = self_.inner_ref()?.clone(); let inner = self_.inner_ref()?.clone();
let older_than = if let Some(ms) = cleanup_since_ms {
if ms > i64::MAX as u64 {
return Err(PyValueError::new_err(format!(
"cleanup_since_ms must be between {} and -{}",
i32::MAX,
i32::MAX
)));
}
Duration::try_milliseconds(ms as i64)
} else {
None
};
future_into_py(self_.py(), async move { future_into_py(self_.py(), async move {
let compaction_stats = inner let compaction_stats = inner
.optimize(OptimizeAction::Compact { .optimize(OptimizeAction::Compact {
@@ -240,7 +252,6 @@ impl Table {
.infer_error()? .infer_error()?
.compaction .compaction
.unwrap(); .unwrap();
let older_than = cleanup_since_ms.map(|since| Duration::milliseconds(since as i64));
let prune_stats = inner let prune_stats = inner
.optimize(OptimizeAction::Prune { .optimize(OptimizeAction::Prune {
older_than, older_than,

View File

@@ -40,8 +40,8 @@ serde = { version = "^1" }
serde_json = { version = "1" } serde_json = { version = "1" }
# For remote feature # For remote feature
reqwest = { version = "0.11.24", features = ["gzip", "json"], optional = true } reqwest = { version = "0.11.24", features = ["gzip", "json"], optional = true }
polars-arrow = { version = ">=0.37", optional = true } polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
polars = { version = ">=0.37", optional = true} polars = { version = ">=0.37,<0.40.0", optional = true}
[dev-dependencies] [dev-dependencies]
tempfile = "3.5.0" tempfile = "3.5.0"

View File

@@ -195,7 +195,7 @@ impl<T: IntoArrow> CreateTableBuilder<true, T> {
.embedding_registry() .embedding_registry()
.get(&definition.embedding_name) .get(&definition.embedding_name)
.ok_or_else(|| Error::EmbeddingFunctionNotFound { .ok_or_else(|| Error::EmbeddingFunctionNotFound {
name: definition.embedding_name.to_string(), name: definition.embedding_name.clone(),
reason: "No embedding function found in the connection's embedding_registry" reason: "No embedding function found in the connection's embedding_registry"
.to_string(), .to_string(),
})?; })?;

View File

@@ -155,7 +155,7 @@ impl<R: RecordBatchReader> MaybeEmbedded<R> {
} }
None => { None => {
return Err(Error::EmbeddingFunctionNotFound { return Err(Error::EmbeddingFunctionNotFound {
name: embedding_def.embedding_name.to_string(), name: embedding_def.embedding_name.clone(),
reason: format!( reason: format!(
"Table was defined with an embedding column `{}` but no embedding function was found with that name within the registry.", "Table was defined with an embedding column `{}` but no embedding function was found with that name within the registry.",
embedding_def.embedding_name embedding_def.embedding_name

View File

@@ -1301,7 +1301,6 @@ impl NativeTable {
num_partitions as usize, num_partitions as usize,
/*num_bits=*/ 8, /*num_bits=*/ 8,
num_sub_vectors as usize, num_sub_vectors as usize,
false,
index.distance_type.into(), index.distance_type.into(),
index.max_iterations as usize, index.max_iterations as usize,
); );
@@ -1345,7 +1344,6 @@ impl NativeTable {
ivf_params.max_iters = index.max_iterations as usize; ivf_params.max_iters = index.max_iterations as usize;
let hnsw_params = HnswBuildParams::default() let hnsw_params = HnswBuildParams::default()
.num_edges(index.m as usize) .num_edges(index.m as usize)
.max_num_edges(index.m as usize * 2)
.ef_construction(index.ef_construction as usize); .ef_construction(index.ef_construction as usize);
let sq_params = SQBuildParams { let sq_params = SQBuildParams {
sample_rate: index.sample_rate as usize, sample_rate: index.sample_rate as usize,
@@ -1731,7 +1729,7 @@ impl TableInternal for NativeTable {
} => { } => {
stats.prune = Some( stats.prune = Some(
self.cleanup_old_versions( self.cleanup_old_versions(
older_than.unwrap_or(Duration::days(7)), older_than.unwrap_or(Duration::try_days(7).expect("valid delta")),
delete_unverified, delete_unverified,
) )
.await?, .await?,