bump lance to 0.9.7 (#826)

This commit is contained in:
Bert
2024-01-18 23:44:22 -05:00
committed by Weston Pace
parent e6bb907d81
commit 4243eaee93
5 changed files with 67 additions and 13 deletions

View File

@@ -5,10 +5,10 @@ exclude = ["python"]
resolver = "2" resolver = "2"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.9.6", "features" = ["dynamodb"] } lance = { "version" = "=0.9.7", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.9.6" } lance-index = { "version" = "=0.9.7" }
lance-linalg = { "version" = "=0.9.6" } lance-linalg = { "version" = "=0.9.7" }
lance-testing = { "version" = "=0.9.6" } lance-testing = { "version" = "=0.9.7" }
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "49.0.0", optional = false } arrow = { version = "49.0.0", optional = false }
arrow-array = "49.0" arrow-array = "49.0"
@@ -23,6 +23,6 @@ half = { "version" = "=2.3.1", default-features = false, features = [
"num-traits", "num-traits",
] } ] }
log = "0.4" log = "0.4"
object_store = "0.8.0" object_store = "0.9.0"
snafu = "0.7.4" snafu = "0.7.4"
url = "2" url = "2"

View File

@@ -3,7 +3,7 @@ name = "lancedb"
version = "0.5.0" version = "0.5.0"
dependencies = [ dependencies = [
"deprecation", "deprecation",
"pylance==0.9.6", "pylance==0.9.7",
"ratelimiter~=1.0", "ratelimiter~=1.0",
"retry>=0.9.2", "retry>=0.9.2",
"tqdm>=4.27.0", "tqdm>=4.27.0",

View File

@@ -31,6 +31,8 @@ bytes = "1"
futures = "0" futures = "0"
num-traits = "0" num-traits = "0"
url = { workspace = true } url = { workspace = true }
serde = { version = "^1" }
serde_json = { version = "1" }
[dev-dependencies] [dev-dependencies]
tempfile = "3.5.0" tempfile = "3.5.0"

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use serde::Deserialize;
use lance::format::{Index, Manifest}; use lance::format::{Index, Manifest};
use lance::index::vector::pq::PQBuildParams; use lance::index::vector::pq::PQBuildParams;
use lance::index::vector::VectorIndexParams; use lance::index::vector::VectorIndexParams;
@@ -132,6 +134,12 @@ impl VectorIndex {
} }
} }
#[derive(Debug, Deserialize)]
pub struct VectorIndexStatistics {
pub num_indexed_rows: usize,
pub num_unindexed_rows: usize,
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View File

@@ -15,6 +15,7 @@
use chrono::Duration; use chrono::Duration;
use lance::dataset::builder::DatasetBuilder; use lance::dataset::builder::DatasetBuilder;
use lance::index::scalar::ScalarIndexParams; use lance::index::scalar::ScalarIndexParams;
use lance_index::optimize::OptimizeOptions;
use lance_index::IndexType; use lance_index::IndexType;
use std::sync::Arc; use std::sync::Arc;
@@ -25,12 +26,12 @@ use lance::dataset::optimize::{
compact_files, CompactionMetrics, CompactionOptions, IndexRemapperOptions, compact_files, CompactionMetrics, CompactionOptions, IndexRemapperOptions,
}; };
use lance::dataset::{Dataset, UpdateBuilder, WriteParams}; use lance::dataset::{Dataset, UpdateBuilder, WriteParams};
use lance::index::DatasetIndexExt;
use lance::io::object_store::WrappingObjectStore; use lance::io::object_store::WrappingObjectStore;
use lance_index::DatasetIndexExt;
use std::path::Path; use std::path::Path;
use crate::error::{Error, Result}; use crate::error::{Error, Result};
use crate::index::vector::{VectorIndex, VectorIndexBuilder}; use crate::index::vector::{VectorIndex, VectorIndexBuilder, VectorIndexStatistics};
use crate::query::Query; use crate::query::Query;
use crate::utils::{PatchReadParam, PatchWriteParam}; use crate::utils::{PatchReadParam, PatchWriteParam};
use crate::WriteMode; use crate::WriteMode;
@@ -273,10 +274,9 @@ impl Table {
Ok(()) Ok(())
} }
pub async fn optimize_indices(&mut self) -> Result<()> { pub async fn optimize_indices(&mut self, options: &OptimizeOptions) -> Result<()> {
let mut dataset = self.dataset.as_ref().clone(); let mut dataset = self.dataset.as_ref().clone();
dataset.optimize_indices(options).await?;
dataset.optimize_indices().await?;
Ok(()) Ok(())
} }
@@ -426,11 +426,17 @@ impl Table {
} }
pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> { pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
Ok(self.dataset.count_indexed_rows(index_uuid).await?) match self.load_index_stats(index_uuid).await? {
Some(stats) => Ok(Some(stats.num_indexed_rows)),
None => Ok(None),
}
} }
pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> { pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
Ok(self.dataset.count_unindexed_rows(index_uuid).await?) match self.load_index_stats(index_uuid).await? {
Some(stats) => Ok(Some(stats.num_unindexed_rows)),
None => Ok(None),
}
} }
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> { pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
@@ -441,6 +447,30 @@ impl Table {
.map(|i| VectorIndex::new_from_format(&mf, i)) .map(|i| VectorIndex::new_from_format(&mf, i))
.collect()) .collect())
} }
async fn load_index_stats(&self, index_uuid: &str) -> Result<Option<VectorIndexStatistics>> {
let index = self
.load_indices()
.await?
.into_iter()
.find(|i| i.index_uuid == index_uuid);
if index.is_none() {
return Ok(None);
}
let index_stats = self
.dataset
.index_statistics(&index.unwrap().index_name)
.await?;
let index_stats: VectorIndexStatistics =
serde_json::from_str(&index_stats).map_err(|e| Error::Lance {
message: format!(
"error deserializing index statistics {}: {}",
e, index_stats
),
})?;
Ok(Some(index_stats))
}
} }
#[cfg(test)] #[cfg(test)]
@@ -963,6 +993,9 @@ mod tests {
.unwrap(); .unwrap();
let mut i = IvfPQIndexBuilder::new(); let mut i = IvfPQIndexBuilder::new();
assert_eq!(table.count_indexed_rows("my_index").await.unwrap(), None);
assert_eq!(table.count_unindexed_rows("my_index").await.unwrap(), None);
let index_builder = i let index_builder = i
.column("embeddings".to_string()) .column("embeddings".to_string())
.index_name("my_index".to_string()) .index_name("my_index".to_string())
@@ -974,6 +1007,17 @@ mod tests {
assert_eq!(table.dataset.load_indices().await.unwrap().len(), 1); assert_eq!(table.dataset.load_indices().await.unwrap().len(), 1);
assert_eq!(table.count_rows().await.unwrap(), 512); assert_eq!(table.count_rows().await.unwrap(), 512);
assert_eq!(table.name, "test"); assert_eq!(table.name, "test");
let indices = table.load_indices().await.unwrap();
let index_uuid = &indices[0].index_uuid;
assert_eq!(
table.count_indexed_rows(index_uuid).await.unwrap(),
Some(512)
);
assert_eq!(
table.count_unindexed_rows(index_uuid).await.unwrap(),
Some(0)
);
} }
fn create_fixed_size_list<T: Array>(values: T, list_size: i32) -> Result<FixedSizeListArray> { fn create_fixed_size_list<T: Array>(values: T, list_size: i32) -> Result<FixedSizeListArray> {