mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-07 04:12:59 +00:00
bump lance to 0.9.7 (#826)
This commit is contained in:
10
Cargo.toml
10
Cargo.toml
@@ -5,10 +5,10 @@ exclude = ["python"]
|
|||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.9.6", "features" = ["dynamodb"] }
|
lance = { "version" = "=0.9.7", "features" = ["dynamodb"] }
|
||||||
lance-index = { "version" = "=0.9.6" }
|
lance-index = { "version" = "=0.9.7" }
|
||||||
lance-linalg = { "version" = "=0.9.6" }
|
lance-linalg = { "version" = "=0.9.7" }
|
||||||
lance-testing = { "version" = "=0.9.6" }
|
lance-testing = { "version" = "=0.9.7" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "49.0.0", optional = false }
|
arrow = { version = "49.0.0", optional = false }
|
||||||
arrow-array = "49.0"
|
arrow-array = "49.0"
|
||||||
@@ -23,6 +23,6 @@ half = { "version" = "=2.3.1", default-features = false, features = [
|
|||||||
"num-traits",
|
"num-traits",
|
||||||
] }
|
] }
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
object_store = "0.8.0"
|
object_store = "0.9.0"
|
||||||
snafu = "0.7.4"
|
snafu = "0.7.4"
|
||||||
url = "2"
|
url = "2"
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
|||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"pylance==0.9.6",
|
"pylance==0.9.7",
|
||||||
"ratelimiter~=1.0",
|
"ratelimiter~=1.0",
|
||||||
"retry>=0.9.2",
|
"retry>=0.9.2",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
|
|||||||
@@ -31,6 +31,8 @@ bytes = "1"
|
|||||||
futures = "0"
|
futures = "0"
|
||||||
num-traits = "0"
|
num-traits = "0"
|
||||||
url = { workspace = true }
|
url = { workspace = true }
|
||||||
|
serde = { version = "^1" }
|
||||||
|
serde_json = { version = "1" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3.5.0"
|
tempfile = "3.5.0"
|
||||||
|
|||||||
@@ -12,6 +12,8 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
use lance::format::{Index, Manifest};
|
use lance::format::{Index, Manifest};
|
||||||
use lance::index::vector::pq::PQBuildParams;
|
use lance::index::vector::pq::PQBuildParams;
|
||||||
use lance::index::vector::VectorIndexParams;
|
use lance::index::vector::VectorIndexParams;
|
||||||
@@ -132,6 +134,12 @@ impl VectorIndex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize)]
|
||||||
|
pub struct VectorIndexStatistics {
|
||||||
|
pub num_indexed_rows: usize,
|
||||||
|
pub num_unindexed_rows: usize,
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
use chrono::Duration;
|
use chrono::Duration;
|
||||||
use lance::dataset::builder::DatasetBuilder;
|
use lance::dataset::builder::DatasetBuilder;
|
||||||
use lance::index::scalar::ScalarIndexParams;
|
use lance::index::scalar::ScalarIndexParams;
|
||||||
|
use lance_index::optimize::OptimizeOptions;
|
||||||
use lance_index::IndexType;
|
use lance_index::IndexType;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@@ -25,12 +26,12 @@ use lance::dataset::optimize::{
|
|||||||
compact_files, CompactionMetrics, CompactionOptions, IndexRemapperOptions,
|
compact_files, CompactionMetrics, CompactionOptions, IndexRemapperOptions,
|
||||||
};
|
};
|
||||||
use lance::dataset::{Dataset, UpdateBuilder, WriteParams};
|
use lance::dataset::{Dataset, UpdateBuilder, WriteParams};
|
||||||
use lance::index::DatasetIndexExt;
|
|
||||||
use lance::io::object_store::WrappingObjectStore;
|
use lance::io::object_store::WrappingObjectStore;
|
||||||
|
use lance_index::DatasetIndexExt;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
use crate::index::vector::{VectorIndex, VectorIndexBuilder};
|
use crate::index::vector::{VectorIndex, VectorIndexBuilder, VectorIndexStatistics};
|
||||||
use crate::query::Query;
|
use crate::query::Query;
|
||||||
use crate::utils::{PatchReadParam, PatchWriteParam};
|
use crate::utils::{PatchReadParam, PatchWriteParam};
|
||||||
use crate::WriteMode;
|
use crate::WriteMode;
|
||||||
@@ -273,10 +274,9 @@ impl Table {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn optimize_indices(&mut self) -> Result<()> {
|
pub async fn optimize_indices(&mut self, options: &OptimizeOptions) -> Result<()> {
|
||||||
let mut dataset = self.dataset.as_ref().clone();
|
let mut dataset = self.dataset.as_ref().clone();
|
||||||
|
dataset.optimize_indices(options).await?;
|
||||||
dataset.optimize_indices().await?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -426,11 +426,17 @@ impl Table {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
||||||
Ok(self.dataset.count_indexed_rows(index_uuid).await?)
|
match self.load_index_stats(index_uuid).await? {
|
||||||
|
Some(stats) => Ok(Some(stats.num_indexed_rows)),
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
||||||
Ok(self.dataset.count_unindexed_rows(index_uuid).await?)
|
match self.load_index_stats(index_uuid).await? {
|
||||||
|
Some(stats) => Ok(Some(stats.num_unindexed_rows)),
|
||||||
|
None => Ok(None),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
|
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
|
||||||
@@ -441,6 +447,30 @@ impl Table {
|
|||||||
.map(|i| VectorIndex::new_from_format(&mf, i))
|
.map(|i| VectorIndex::new_from_format(&mf, i))
|
||||||
.collect())
|
.collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn load_index_stats(&self, index_uuid: &str) -> Result<Option<VectorIndexStatistics>> {
|
||||||
|
let index = self
|
||||||
|
.load_indices()
|
||||||
|
.await?
|
||||||
|
.into_iter()
|
||||||
|
.find(|i| i.index_uuid == index_uuid);
|
||||||
|
if index.is_none() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
let index_stats = self
|
||||||
|
.dataset
|
||||||
|
.index_statistics(&index.unwrap().index_name)
|
||||||
|
.await?;
|
||||||
|
let index_stats: VectorIndexStatistics =
|
||||||
|
serde_json::from_str(&index_stats).map_err(|e| Error::Lance {
|
||||||
|
message: format!(
|
||||||
|
"error deserializing index statistics {}: {}",
|
||||||
|
e, index_stats
|
||||||
|
),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Some(index_stats))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -963,6 +993,9 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
let mut i = IvfPQIndexBuilder::new();
|
let mut i = IvfPQIndexBuilder::new();
|
||||||
|
|
||||||
|
assert_eq!(table.count_indexed_rows("my_index").await.unwrap(), None);
|
||||||
|
assert_eq!(table.count_unindexed_rows("my_index").await.unwrap(), None);
|
||||||
|
|
||||||
let index_builder = i
|
let index_builder = i
|
||||||
.column("embeddings".to_string())
|
.column("embeddings".to_string())
|
||||||
.index_name("my_index".to_string())
|
.index_name("my_index".to_string())
|
||||||
@@ -974,6 +1007,17 @@ mod tests {
|
|||||||
assert_eq!(table.dataset.load_indices().await.unwrap().len(), 1);
|
assert_eq!(table.dataset.load_indices().await.unwrap().len(), 1);
|
||||||
assert_eq!(table.count_rows().await.unwrap(), 512);
|
assert_eq!(table.count_rows().await.unwrap(), 512);
|
||||||
assert_eq!(table.name, "test");
|
assert_eq!(table.name, "test");
|
||||||
|
|
||||||
|
let indices = table.load_indices().await.unwrap();
|
||||||
|
let index_uuid = &indices[0].index_uuid;
|
||||||
|
assert_eq!(
|
||||||
|
table.count_indexed_rows(index_uuid).await.unwrap(),
|
||||||
|
Some(512)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
table.count_unindexed_rows(index_uuid).await.unwrap(),
|
||||||
|
Some(0)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_fixed_size_list<T: Array>(values: T, list_size: i32) -> Result<FixedSizeListArray> {
|
fn create_fixed_size_list<T: Array>(values: T, list_size: i32) -> Result<FixedSizeListArray> {
|
||||||
|
|||||||
Reference in New Issue
Block a user