mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-16 08:42:57 +00:00
bump lance to 0.9.7 (#826)
This commit is contained in:
10
Cargo.toml
10
Cargo.toml
@@ -5,10 +5,10 @@ exclude = ["python"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.9.6", "features" = ["dynamodb"] }
|
||||
lance-index = { "version" = "=0.9.6" }
|
||||
lance-linalg = { "version" = "=0.9.6" }
|
||||
lance-testing = { "version" = "=0.9.6" }
|
||||
lance = { "version" = "=0.9.7", "features" = ["dynamodb"] }
|
||||
lance-index = { "version" = "=0.9.7" }
|
||||
lance-linalg = { "version" = "=0.9.7" }
|
||||
lance-testing = { "version" = "=0.9.7" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "49.0.0", optional = false }
|
||||
arrow-array = "49.0"
|
||||
@@ -23,6 +23,6 @@ half = { "version" = "=2.3.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
] }
|
||||
log = "0.4"
|
||||
object_store = "0.8.0"
|
||||
object_store = "0.9.0"
|
||||
snafu = "0.7.4"
|
||||
url = "2"
|
||||
|
||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
||||
version = "0.5.0"
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.9.6",
|
||||
"pylance==0.9.7",
|
||||
"ratelimiter~=1.0",
|
||||
"retry>=0.9.2",
|
||||
"tqdm>=4.27.0",
|
||||
|
||||
@@ -31,6 +31,8 @@ bytes = "1"
|
||||
futures = "0"
|
||||
num-traits = "0"
|
||||
url = { workspace = true }
|
||||
serde = { version = "^1" }
|
||||
serde_json = { version = "1" }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.5.0"
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::Deserialize;
|
||||
|
||||
use lance::format::{Index, Manifest};
|
||||
use lance::index::vector::pq::PQBuildParams;
|
||||
use lance::index::vector::VectorIndexParams;
|
||||
@@ -132,6 +134,12 @@ impl VectorIndex {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct VectorIndexStatistics {
|
||||
pub num_indexed_rows: usize,
|
||||
pub num_unindexed_rows: usize,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use chrono::Duration;
|
||||
use lance::dataset::builder::DatasetBuilder;
|
||||
use lance::index::scalar::ScalarIndexParams;
|
||||
use lance_index::optimize::OptimizeOptions;
|
||||
use lance_index::IndexType;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -25,12 +26,12 @@ use lance::dataset::optimize::{
|
||||
compact_files, CompactionMetrics, CompactionOptions, IndexRemapperOptions,
|
||||
};
|
||||
use lance::dataset::{Dataset, UpdateBuilder, WriteParams};
|
||||
use lance::index::DatasetIndexExt;
|
||||
use lance::io::object_store::WrappingObjectStore;
|
||||
use lance_index::DatasetIndexExt;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::index::vector::{VectorIndex, VectorIndexBuilder};
|
||||
use crate::index::vector::{VectorIndex, VectorIndexBuilder, VectorIndexStatistics};
|
||||
use crate::query::Query;
|
||||
use crate::utils::{PatchReadParam, PatchWriteParam};
|
||||
use crate::WriteMode;
|
||||
@@ -273,10 +274,9 @@ impl Table {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn optimize_indices(&mut self) -> Result<()> {
|
||||
pub async fn optimize_indices(&mut self, options: &OptimizeOptions) -> Result<()> {
|
||||
let mut dataset = self.dataset.as_ref().clone();
|
||||
|
||||
dataset.optimize_indices().await?;
|
||||
dataset.optimize_indices(options).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -426,11 +426,17 @@ impl Table {
|
||||
}
|
||||
|
||||
pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
||||
Ok(self.dataset.count_indexed_rows(index_uuid).await?)
|
||||
match self.load_index_stats(index_uuid).await? {
|
||||
Some(stats) => Ok(Some(stats.num_indexed_rows)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
||||
Ok(self.dataset.count_unindexed_rows(index_uuid).await?)
|
||||
match self.load_index_stats(index_uuid).await? {
|
||||
Some(stats) => Ok(Some(stats.num_unindexed_rows)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
|
||||
@@ -441,6 +447,30 @@ impl Table {
|
||||
.map(|i| VectorIndex::new_from_format(&mf, i))
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn load_index_stats(&self, index_uuid: &str) -> Result<Option<VectorIndexStatistics>> {
|
||||
let index = self
|
||||
.load_indices()
|
||||
.await?
|
||||
.into_iter()
|
||||
.find(|i| i.index_uuid == index_uuid);
|
||||
if index.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
let index_stats = self
|
||||
.dataset
|
||||
.index_statistics(&index.unwrap().index_name)
|
||||
.await?;
|
||||
let index_stats: VectorIndexStatistics =
|
||||
serde_json::from_str(&index_stats).map_err(|e| Error::Lance {
|
||||
message: format!(
|
||||
"error deserializing index statistics {}: {}",
|
||||
e, index_stats
|
||||
),
|
||||
})?;
|
||||
|
||||
Ok(Some(index_stats))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -963,6 +993,9 @@ mod tests {
|
||||
.unwrap();
|
||||
let mut i = IvfPQIndexBuilder::new();
|
||||
|
||||
assert_eq!(table.count_indexed_rows("my_index").await.unwrap(), None);
|
||||
assert_eq!(table.count_unindexed_rows("my_index").await.unwrap(), None);
|
||||
|
||||
let index_builder = i
|
||||
.column("embeddings".to_string())
|
||||
.index_name("my_index".to_string())
|
||||
@@ -974,6 +1007,17 @@ mod tests {
|
||||
assert_eq!(table.dataset.load_indices().await.unwrap().len(), 1);
|
||||
assert_eq!(table.count_rows().await.unwrap(), 512);
|
||||
assert_eq!(table.name, "test");
|
||||
|
||||
let indices = table.load_indices().await.unwrap();
|
||||
let index_uuid = &indices[0].index_uuid;
|
||||
assert_eq!(
|
||||
table.count_indexed_rows(index_uuid).await.unwrap(),
|
||||
Some(512)
|
||||
);
|
||||
assert_eq!(
|
||||
table.count_unindexed_rows(index_uuid).await.unwrap(),
|
||||
Some(0)
|
||||
);
|
||||
}
|
||||
|
||||
fn create_fixed_size_list<T: Array>(values: T, list_size: i32) -> Result<FixedSizeListArray> {
|
||||
|
||||
Reference in New Issue
Block a user