mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-05 19:32:56 +00:00
feat: added data stats apis (#596)
This commit is contained in:
@@ -5,9 +5,9 @@ exclude = ["python"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.8.7", "features" = ["dynamodb"] }
|
||||
lance-linalg = { "version" = "=0.8.7" }
|
||||
lance-testing = { "version" = "=0.8.7" }
|
||||
lance = { "version" = "=0.8.8", "features" = ["dynamodb"] }
|
||||
lance-linalg = { "version" = "=0.8.8" }
|
||||
lance-testing = { "version" = "=0.8.8" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "47.0.0", optional = false }
|
||||
arrow-array = "47.0"
|
||||
@@ -19,7 +19,7 @@ arrow-arith = "47.0"
|
||||
arrow-cast = "47.0"
|
||||
chrono = "0.4.23"
|
||||
half = { "version" = "=2.3.1", default-features = false, features = [
|
||||
"num-traits"
|
||||
"num-traits",
|
||||
] }
|
||||
log = "0.4"
|
||||
object_store = "0.7.1"
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use lance::format::{Index, Manifest};
|
||||
use lance::index::vector::ivf::IvfBuildParams;
|
||||
use lance::index::vector::pq::PQBuildParams;
|
||||
use lance::index::vector::VectorIndexParams;
|
||||
@@ -106,6 +107,27 @@ impl VectorIndexBuilder for IvfPQIndexBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct VectorIndex {
|
||||
pub columns: Vec<String>,
|
||||
pub index_name: String,
|
||||
pub index_uuid: String,
|
||||
}
|
||||
|
||||
impl VectorIndex {
|
||||
pub fn new_from_format(manifest: &Manifest, index: &Index) -> VectorIndex {
|
||||
let fields = index
|
||||
.fields
|
||||
.iter()
|
||||
.map(|i| manifest.schema.fields[*i as usize].name.clone())
|
||||
.collect();
|
||||
VectorIndex {
|
||||
columns: fields,
|
||||
index_name: index.name.clone(),
|
||||
index_uuid: index.uuid.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -27,7 +27,7 @@ use lance::io::object_store::WrappingObjectStore;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::index::vector::VectorIndexBuilder;
|
||||
use crate::index::vector::{VectorIndexBuilder, VectorIndex};
|
||||
use crate::query::Query;
|
||||
use crate::utils::{PatchReadParam, PatchWriteParam};
|
||||
use crate::WriteMode;
|
||||
@@ -371,6 +371,34 @@ impl Table {
|
||||
self.dataset = Arc::new(dataset);
|
||||
Ok(metrics)
|
||||
}
|
||||
|
||||
pub fn count_fragments(&self) -> usize {
|
||||
self.dataset.count_fragments()
|
||||
}
|
||||
|
||||
pub fn count_deleted_rows(&self) -> usize {
|
||||
self.dataset.count_deleted_rows()
|
||||
}
|
||||
|
||||
pub fn num_small_files(&self, max_rows_per_group: usize) -> usize {
|
||||
self.dataset.num_small_files(max_rows_per_group)
|
||||
}
|
||||
|
||||
pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
||||
Ok(self.dataset.count_indexed_rows(index_uuid).await?)
|
||||
}
|
||||
|
||||
pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
|
||||
Ok(self.dataset.count_unindexed_rows(index_uuid).await?)
|
||||
}
|
||||
|
||||
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
|
||||
let (indices, mf) = futures::try_join!(
|
||||
self.dataset.load_indices(),
|
||||
self.dataset.latest_manifest()
|
||||
)?;
|
||||
Ok(indices.iter().map(|i| VectorIndex::new_from_format(&mf, i)).collect())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
Reference in New Issue
Block a user