mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-04 02:42:57 +00:00
fix: parse index_stats for scalar index (#1319)
parse the index stats for scalar index - it is different from the index stats for vector index
This commit is contained in:
@@ -38,6 +38,7 @@ url.workspace = true
|
||||
regex.workspace = true
|
||||
serde = { version = "^1" }
|
||||
serde_json = { version = "1" }
|
||||
serde_with = { version = "3.8.1" }
|
||||
# For remote feature
|
||||
reqwest = { version = "0.11.24", features = ["gzip", "json"], optional = true }
|
||||
polars-arrow = { version = ">=0.37,<0.40.0", optional = true }
|
||||
|
||||
@@ -14,6 +14,9 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde_with::skip_serializing_none;
|
||||
|
||||
use crate::{table::TableInternal, Result};
|
||||
|
||||
use self::{
|
||||
@@ -83,3 +86,19 @@ pub struct IndexConfig {
|
||||
/// be more columns to represent composite indices.
|
||||
pub columns: Vec<String>,
|
||||
}
|
||||
|
||||
#[skip_serializing_none]
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct IndexMetadata {
|
||||
pub metric_type: Option<String>,
|
||||
pub index_type: Option<String>,
|
||||
}
|
||||
|
||||
#[skip_serializing_none]
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct IndexStatistics {
|
||||
pub num_indexed_rows: usize,
|
||||
pub num_unindexed_rows: usize,
|
||||
pub index_type: Option<String>,
|
||||
pub indices: Vec<IndexMetadata>,
|
||||
}
|
||||
|
||||
@@ -19,8 +19,6 @@
|
||||
//! values
|
||||
use std::cmp::max;
|
||||
|
||||
use serde::Deserialize;
|
||||
|
||||
use lance::table::format::{Index, Manifest};
|
||||
|
||||
use crate::DistanceType;
|
||||
@@ -46,20 +44,6 @@ impl VectorIndex {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct VectorIndexMetadata {
|
||||
pub metric_type: String,
|
||||
pub index_type: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct VectorIndexStatistics {
|
||||
pub num_indexed_rows: usize,
|
||||
pub num_unindexed_rows: usize,
|
||||
pub index_type: String,
|
||||
pub indices: Vec<VectorIndexMetadata>,
|
||||
}
|
||||
|
||||
/// Builder for an IVF PQ index.
|
||||
///
|
||||
/// This index stores a compressed (quantized) copy of every vector. These vectors
|
||||
|
||||
@@ -48,10 +48,9 @@ use crate::arrow::IntoArrow;
|
||||
use crate::connection::NoData;
|
||||
use crate::embeddings::{EmbeddingDefinition, EmbeddingRegistry, MaybeEmbedded, MemoryRegistry};
|
||||
use crate::error::{Error, Result};
|
||||
use crate::index::vector::{
|
||||
IvfHnswSqIndexBuilder, IvfPqIndexBuilder, VectorIndex, VectorIndexStatistics,
|
||||
};
|
||||
use crate::index::vector::{IvfHnswSqIndexBuilder, IvfPqIndexBuilder, VectorIndex};
|
||||
use crate::index::IndexConfig;
|
||||
use crate::index::IndexStatistics;
|
||||
use crate::index::{
|
||||
vector::{suggested_num_partitions, suggested_num_sub_vectors},
|
||||
Index, IndexBuilder,
|
||||
@@ -1217,7 +1216,7 @@ impl NativeTable {
|
||||
|
||||
pub async fn get_index_type(&self, index_uuid: &str) -> Result<Option<String>> {
|
||||
match self.load_index_stats(index_uuid).await? {
|
||||
Some(stats) => Ok(Some(stats.index_type)),
|
||||
Some(stats) => Ok(Some(stats.index_type.unwrap_or_default())),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
@@ -1228,7 +1227,7 @@ impl NativeTable {
|
||||
stats
|
||||
.indices
|
||||
.iter()
|
||||
.map(|i| i.metric_type.clone())
|
||||
.filter_map(|i| i.metric_type.clone())
|
||||
.collect(),
|
||||
)),
|
||||
None => Ok(None),
|
||||
@@ -1244,7 +1243,7 @@ impl NativeTable {
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn load_index_stats(&self, index_uuid: &str) -> Result<Option<VectorIndexStatistics>> {
|
||||
async fn load_index_stats(&self, index_uuid: &str) -> Result<Option<IndexStatistics>> {
|
||||
let index = self
|
||||
.load_indices()
|
||||
.await?
|
||||
@@ -1255,7 +1254,7 @@ impl NativeTable {
|
||||
}
|
||||
let dataset = self.dataset.get().await?;
|
||||
let index_stats = dataset.index_statistics(&index.unwrap().index_name).await?;
|
||||
let index_stats: VectorIndexStatistics = whatever!(
|
||||
let index_stats: IndexStatistics = whatever!(
|
||||
serde_json::from_str(&index_stats),
|
||||
"error deserializing index statistics {index_stats}",
|
||||
);
|
||||
@@ -2475,6 +2474,25 @@ mod tests {
|
||||
.unwrap(),
|
||||
Some(0)
|
||||
);
|
||||
assert_eq!(
|
||||
table
|
||||
.as_native()
|
||||
.unwrap()
|
||||
.get_index_type(index_uuid)
|
||||
.await
|
||||
.unwrap()
|
||||
.map(|index_type| index_type.to_string()),
|
||||
Some("IVF".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
table
|
||||
.as_native()
|
||||
.unwrap()
|
||||
.get_distance_type(index_uuid)
|
||||
.await
|
||||
.unwrap(),
|
||||
Some(crate::DistanceType::L2.to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -2644,6 +2662,27 @@ mod tests {
|
||||
let index = index_configs.into_iter().next().unwrap();
|
||||
assert_eq!(index.index_type, crate::index::IndexType::BTree);
|
||||
assert_eq!(index.columns, vec!["i".to_string()]);
|
||||
|
||||
let indices = table.as_native().unwrap().load_indices().await.unwrap();
|
||||
let index_uuid = &indices[0].index_uuid;
|
||||
assert_eq!(
|
||||
table
|
||||
.as_native()
|
||||
.unwrap()
|
||||
.count_indexed_rows(index_uuid)
|
||||
.await
|
||||
.unwrap(),
|
||||
Some(1)
|
||||
);
|
||||
assert_eq!(
|
||||
table
|
||||
.as_native()
|
||||
.unwrap()
|
||||
.count_unindexed_rows(index_uuid)
|
||||
.await
|
||||
.unwrap(),
|
||||
Some(0)
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
Reference in New Issue
Block a user