feat: upgrade lance to 0.11.0 (#1317)

upgrade lance and make fixes for the upgrade
This commit is contained in:
Rob Meng
2024-05-21 18:53:19 -04:00
committed by GitHub
parent 4f512af024
commit 2e197ef387
10 changed files with 37 additions and 17 deletions

View File

@@ -75,7 +75,7 @@ jobs:
timeout-minutes: 30
strategy:
matrix:
python-minor-version: ["8", "11"]
python-minor-version: ["9", "11"]
runs-on: "ubuntu-22.04"
defaults:
run:

View File

@@ -74,11 +74,11 @@ jobs:
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Build
run: cargo build --all-features
- name: Start S3 integration test environment
working-directory: .
run: docker compose up --detach --wait
- name: Build
run: cargo build --all-features
- name: Run tests
run: cargo test --all-features
- name: Run examples

View File

@@ -14,10 +14,10 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
[workspace.dependencies]
lance = { "version" = "=0.10.18", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.10.18" }
lance-linalg = { "version" = "=0.10.18" }
lance-testing = { "version" = "=0.10.18" }
lance = { "version" = "=0.11.0", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.11.0" }
lance-linalg = { "version" = "=0.11.0" }
lance-testing = { "version" = "=0.11.0" }
# Note that this one does not include pyarrow
arrow = { version = "51.0", optional = false }
arrow-array = "51.0"

View File

@@ -267,6 +267,18 @@ impl Table {
pub async fn optimize(&self, older_than_ms: Option<i64>) -> napi::Result<OptimizeStats> {
let inner = self.inner_ref()?;
let older_than = if let Some(ms) = older_than_ms {
if ms == i64::MIN {
return Err(napi::Error::from_reason(format!(
"older_than_ms can not be {}",
i32::MIN,
)));
}
Duration::try_milliseconds(ms)
} else {
None
};
let compaction_stats = inner
.optimize(OptimizeAction::Compact {
options: lancedb::table::CompactionOptions::default(),
@@ -276,7 +288,6 @@ impl Table {
.default_error()?
.compaction
.unwrap();
let older_than = older_than_ms.map(Duration::milliseconds);
let prune_stats = inner
.optimize(OptimizeAction::Prune {
older_than,

View File

@@ -3,7 +3,7 @@ name = "lancedb"
# version in Cargo.toml
dependencies = [
"deprecation",
"pylance==0.10.12",
"pylance==0.11.0",
"ratelimiter~=1.0",
"requests>=2.31.0",
"retry>=0.9.2",

View File

@@ -230,6 +230,18 @@ impl Table {
pub fn optimize(self_: PyRef<'_, Self>, cleanup_since_ms: Option<u64>) -> PyResult<&PyAny> {
let inner = self_.inner_ref()?.clone();
let older_than = if let Some(ms) = cleanup_since_ms {
if ms > i64::MAX as u64 {
return Err(PyValueError::new_err(format!(
"cleanup_since_ms must be between {} and -{}",
i32::MAX,
i32::MAX
)));
}
Duration::try_milliseconds(ms as i64)
} else {
None
};
future_into_py(self_.py(), async move {
let compaction_stats = inner
.optimize(OptimizeAction::Compact {
@@ -240,7 +252,6 @@ impl Table {
.infer_error()?
.compaction
.unwrap();
let older_than = cleanup_since_ms.map(|since| Duration::milliseconds(since as i64));
let prune_stats = inner
.optimize(OptimizeAction::Prune {
older_than,

View File

@@ -40,8 +40,8 @@ serde = { version = "^1" }
serde_json = { version = "1" }
# For remote feature
reqwest = { version = "0.11.24", features = ["gzip", "json"], optional = true }
polars-arrow = { version = ">=0.37", optional = true }
polars = { version = ">=0.37", optional = true}
polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
polars = { version = ">=0.37,<0.40.0", optional = true}
[dev-dependencies]
tempfile = "3.5.0"

View File

@@ -195,7 +195,7 @@ impl<T: IntoArrow> CreateTableBuilder<true, T> {
.embedding_registry()
.get(&definition.embedding_name)
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
name: definition.embedding_name.to_string(),
name: definition.embedding_name.clone(),
reason: "No embedding function found in the connection's embedding_registry"
.to_string(),
})?;

View File

@@ -155,7 +155,7 @@ impl<R: RecordBatchReader> MaybeEmbedded<R> {
}
None => {
return Err(Error::EmbeddingFunctionNotFound {
name: embedding_def.embedding_name.to_string(),
name: embedding_def.embedding_name.clone(),
reason: format!(
"Table was defined with an embedding column `{}` but no embedding function was found with that name within the registry.",
embedding_def.embedding_name

View File

@@ -1301,7 +1301,6 @@ impl NativeTable {
num_partitions as usize,
/*num_bits=*/ 8,
num_sub_vectors as usize,
false,
index.distance_type.into(),
index.max_iterations as usize,
);
@@ -1345,7 +1344,6 @@ impl NativeTable {
ivf_params.max_iters = index.max_iterations as usize;
let hnsw_params = HnswBuildParams::default()
.num_edges(index.m as usize)
.max_num_edges(index.m as usize * 2)
.ef_construction(index.ef_construction as usize);
let sq_params = SQBuildParams {
sample_rate: index.sample_rate as usize,
@@ -1731,7 +1729,7 @@ impl TableInternal for NativeTable {
} => {
stats.prune = Some(
self.cleanup_old_versions(
older_than.unwrap_or(Duration::days(7)),
older_than.unwrap_or(Duration::try_days(7).expect("valid delta")),
delete_unverified,
)
.await?,