feat: added data stats apis (#596)

This commit is contained in:
Bert
2023-10-26 13:10:17 -04:00
committed by Weston Pace
parent 178e016ff2
commit f5e9c073f0
3 changed files with 55 additions and 5 deletions

View File

@@ -5,9 +5,9 @@ exclude = ["python"]
resolver = "2"
[workspace.dependencies]
lance = { "version" = "=0.8.7", "features" = ["dynamodb"] }
lance-linalg = { "version" = "=0.8.7" }
lance-testing = { "version" = "=0.8.7" }
lance = { "version" = "=0.8.8", "features" = ["dynamodb"] }
lance-linalg = { "version" = "=0.8.8" }
lance-testing = { "version" = "=0.8.8" }
# Note that this one does not include pyarrow
arrow = { version = "47.0.0", optional = false }
arrow-array = "47.0"
@@ -19,7 +19,7 @@ arrow-arith = "47.0"
arrow-cast = "47.0"
chrono = "0.4.23"
half = { "version" = "=2.3.1", default-features = false, features = [
"num-traits"
"num-traits",
] }
log = "0.4"
object_store = "0.7.1"

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use lance::format::{Index, Manifest};
use lance::index::vector::ivf::IvfBuildParams;
use lance::index::vector::pq::PQBuildParams;
use lance::index::vector::VectorIndexParams;
@@ -106,6 +107,27 @@ impl VectorIndexBuilder for IvfPQIndexBuilder {
}
}
pub struct VectorIndex {
pub columns: Vec<String>,
pub index_name: String,
pub index_uuid: String,
}
impl VectorIndex {
pub fn new_from_format(manifest: &Manifest, index: &Index) -> VectorIndex {
let fields = index
.fields
.iter()
.map(|i| manifest.schema.fields[*i as usize].name.clone())
.collect();
VectorIndex {
columns: fields,
index_name: index.name.clone(),
index_uuid: index.uuid.to_string(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -27,7 +27,7 @@ use lance::io::object_store::WrappingObjectStore;
use std::path::Path;
use crate::error::{Error, Result};
use crate::index::vector::VectorIndexBuilder;
use crate::index::vector::{VectorIndexBuilder, VectorIndex};
use crate::query::Query;
use crate::utils::{PatchReadParam, PatchWriteParam};
use crate::WriteMode;
@@ -371,6 +371,34 @@ impl Table {
self.dataset = Arc::new(dataset);
Ok(metrics)
}
pub fn count_fragments(&self) -> usize {
self.dataset.count_fragments()
}
pub fn count_deleted_rows(&self) -> usize {
self.dataset.count_deleted_rows()
}
pub fn num_small_files(&self, max_rows_per_group: usize) -> usize {
self.dataset.num_small_files(max_rows_per_group)
}
pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
Ok(self.dataset.count_indexed_rows(index_uuid).await?)
}
pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
Ok(self.dataset.count_unindexed_rows(index_uuid).await?)
}
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
let (indices, mf) = futures::try_join!(
self.dataset.load_indices(),
self.dataset.latest_manifest()
)?;
Ok(indices.iter().map(|i| VectorIndex::new_from_format(&mf, i)).collect())
}
}
#[cfg(test)]