diff --git a/Cargo.lock b/Cargo.lock index d4b6a611..4f044d52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -190,6 +190,7 @@ dependencies = [ "arrow-data", "arrow-schema", "flatbuffers", + "zstd", ] [[package]] @@ -654,6 +655,12 @@ version = "3.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b1ce199063694f33ffb7dd4e0ee620741495c32833cde5aa08f02a0bf96f0c8" +[[package]] +name = "bytemuck" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" + [[package]] name = "byteorder" version = "1.4.3" @@ -1646,9 +1653,9 @@ dependencies = [ [[package]] name = "lance" -version = "0.4.17" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86dda8185bd1ffae7b910c1f68035af23be9b717c52e9cc4de176cd30b47f772" +checksum = "3d6c2e7bcfc71c7167ec70cd06c6d55c644a148f6580218c5a0b66e13ac5b5cc" dependencies = [ "accelerate-src", "arrow", @@ -1657,7 +1664,9 @@ dependencies = [ "arrow-buffer", "arrow-cast", "arrow-data", + "arrow-ipc", "arrow-ord", + "arrow-row", "arrow-schema", "arrow-select", "async-recursion", @@ -1668,6 +1677,7 @@ dependencies = [ "bytes", "cblas", "chrono", + "dashmap", "datafusion", "futures", "lapack", @@ -1684,6 +1694,7 @@ dependencies = [ "prost-types", "rand", "reqwest", + "roaring", "shellexpand", "snafu", "sqlparser-lance", @@ -2598,6 +2609,12 @@ dependencies = [ "winreg", ] +[[package]] +name = "retain_mut" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086" + [[package]] name = "ring" version = "0.16.20" @@ -2613,6 +2630,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "roaring" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef0fb5e826a8bde011ecae6a8539dd333884335c57ff0f003fbe27c25bbe8f71" +dependencies = [ + "bytemuck", + "byteorder", + "retain_mut", +] + [[package]] name = "rustc_version" version = "0.4.0" diff --git a/rust/vectordb/Cargo.toml b/rust/vectordb/Cargo.toml index 2e534216..06e162fe 100644 --- a/rust/vectordb/Cargo.toml +++ b/rust/vectordb/Cargo.toml @@ -14,7 +14,7 @@ arrow-data = "37.0" arrow-schema = "37.0" object_store = "0.5.6" snafu = "0.7.4" -lance = "0.4.17" +lance = "0.4.21" tokio = { version = "1.23", features = ["rt-multi-thread"] } [dev-dependencies] diff --git a/rust/vectordb/src/database.rs b/rust/vectordb/src/database.rs index 163a2baa..2d26fc36 100644 --- a/rust/vectordb/src/database.rs +++ b/rust/vectordb/src/database.rs @@ -42,7 +42,7 @@ impl Database { /// /// * A [Database] object. pub async fn connect(uri: &str) -> Result { - let object_store = ObjectStore::new(uri).await?; + let (object_store, _) = ObjectStore::from_uri(uri).await?; if object_store.is_local() { Self::try_create_dir(uri).context(CreateDirSnafu { path: uri })?; } diff --git a/rust/vectordb/src/index/vector.rs b/rust/vectordb/src/index/vector.rs index aa7491fc..36fbd100 100644 --- a/rust/vectordb/src/index/vector.rs +++ b/rust/vectordb/src/index/vector.rs @@ -20,6 +20,8 @@ pub trait VectorIndexBuilder { fn get_column(&self) -> Option; fn get_index_name(&self) -> Option; fn build(&self) -> VectorIndexParams; + + fn get_replace(&self) -> bool; } pub struct IvfPQIndexBuilder { @@ -28,6 +30,7 @@ pub struct IvfPQIndexBuilder { metric_type: Option, ivf_params: Option, pq_params: Option, + replace: bool, } impl IvfPQIndexBuilder { @@ -38,6 +41,7 @@ impl IvfPQIndexBuilder { metric_type: None, ivf_params: None, pq_params: None, + replace: true, } } } @@ -67,6 +71,11 @@ impl IvfPQIndexBuilder { self.pq_params = Some(pq_params); self } + + pub fn replace(&mut self, replace: bool) -> &mut IvfPQIndexBuilder { + self.replace = replace; + self + } } impl VectorIndexBuilder for IvfPQIndexBuilder { @@ -84,6 +93,10 @@ impl VectorIndexBuilder for IvfPQIndexBuilder { VectorIndexParams::with_ivf_pq_params(pq_params.metric_type, ivf_params, pq_params) } + + fn get_replace(&self) -> bool { + self.replace + } } #[cfg(test)] diff --git a/rust/vectordb/src/query.rs b/rust/vectordb/src/query.rs index 27361f6b..972524fa 100644 --- a/rust/vectordb/src/query.rs +++ b/rust/vectordb/src/query.rs @@ -177,7 +177,7 @@ mod tests { #[tokio::test] async fn test_setters_getters() { let mut batches: Box = Box::new(make_test_batches()); - let ds = Dataset::write(&mut batches, ":memory:", None) + let ds = Dataset::write(&mut batches, "memory://foo", None) .await .unwrap(); @@ -206,7 +206,7 @@ mod tests { #[tokio::test] async fn test_execute() { let mut batches: Box = Box::new(make_test_batches()); - let ds = Dataset::write(&mut batches, ":memory:", None) + let ds = Dataset::write(&mut batches, "memory://foo", None) .await .unwrap(); diff --git a/rust/vectordb/src/table.rs b/rust/vectordb/src/table.rs index d3ce7acb..e63b336e 100644 --- a/rust/vectordb/src/table.rs +++ b/rust/vectordb/src/table.rs @@ -130,6 +130,7 @@ impl Table { IndexType::Vector, index_builder.get_index_name(), &index_builder.build(), + index_builder.get_replace(), ) .await?; self.dataset = Arc::new(dataset); @@ -233,7 +234,7 @@ mod tests { let uri = tmp_dir.path().to_str().unwrap(); let batches: Box = Box::new(make_test_batches()); - let schema = batches.schema().clone(); + let _ = batches.schema().clone(); Table::create(&uri, "test", batches).await.unwrap(); let batches: Box = Box::new(make_test_batches());