feat: upgrade to lance v0.25.0-beta.5 (#2248)

- adds `loss` into the index stats for vector index
- now `optimize` can retrain the vector index

---------

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
This commit is contained in:
BubbleCal
2025-03-22 01:12:23 +08:00
committed by GitHub
parent ba1ded933a
commit 7ff6ec7fe3
11 changed files with 89 additions and 25 deletions

View File

@@ -174,6 +174,7 @@ pub(crate) struct IndexMetadata {
pub metric_type: Option<DistanceType>,
// Sometimes the index type is provided at this level.
pub index_type: Option<IndexType>,
pub loss: Option<f64>,
}
// This struct is used to deserialize the JSON data returned from the Lance API
@@ -205,4 +206,6 @@ pub struct IndexStatistics {
pub distance_type: Option<DistanceType>,
/// The number of parts this index is split into.
pub num_indices: Option<u32>,
/// The loss value used by the index.
pub loss: Option<f64>,
}

View File

@@ -1884,6 +1884,7 @@ mod tests {
index_type: IndexType::IvfPq,
distance_type: Some(DistanceType::L2),
num_indices: None,
loss: None,
};
assert_eq!(indices, expected);

View File

@@ -2373,12 +2373,20 @@ impl BaseTable for NativeTable {
.ok_or_else(|| Error::InvalidInput {
message: "index statistics was missing index type".to_string(),
})?;
let loss = stats
.indices
.iter()
.map(|index| index.loss.unwrap_or_default())
.sum::<f64>();
let loss = first_index.loss.map(|first_loss| first_loss + loss);
Ok(Some(IndexStatistics {
num_indexed_rows: stats.num_indexed_rows,
num_unindexed_rows: stats.num_unindexed_rows,
index_type,
distance_type: first_index.metric_type,
num_indices: stats.num_indices,
loss,
}))
}
}
@@ -3045,6 +3053,7 @@ mod tests {
assert_eq!(stats.num_unindexed_rows, 0);
assert_eq!(stats.index_type, crate::index::IndexType::IvfPq);
assert_eq!(stats.distance_type, Some(crate::DistanceType::L2));
assert!(stats.loss.is_some());
table.drop_index(index_name).await.unwrap();
assert_eq!(table.list_indices().await.unwrap().len(), 0);