mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-16 11:30:41 +00:00
feat: a utility for creating "permutation views" (#2552)
I'm working on a lancedb version of pytorch data loading (and hopefully addressing https://github.com/lancedb/lance/issues/3727). However, rather than rely on pytorch for everything I'm moving some of the things that pytorch does into rust. This gives us more control over data loading (e.g. using shards or a hash-based split) and it allows permutations to be persistent. In particular I hope to be able to: * Create a persistent permutation * This permutation can handle splits, filtering, shuffling, and sharding * Create a rust data loader that can read a permutation (one or more splits), or a subset of a permutation (for DDP) * Create a python data loader that delegates to the rust data loader Eventually create integrations for other data loading libraries, including rust & node
This commit is contained in:
@@ -4,7 +4,10 @@
|
||||
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||
|
||||
use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow};
|
||||
use lancedb::{connection::Connection as LanceConnection, database::CreateTableMode};
|
||||
use lancedb::{
|
||||
connection::Connection as LanceConnection,
|
||||
database::{CreateTableMode, ReadConsistency},
|
||||
};
|
||||
use pyo3::{
|
||||
exceptions::{PyRuntimeError, PyValueError},
|
||||
pyclass, pyfunction, pymethods, Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||
@@ -23,7 +26,7 @@ impl Connection {
|
||||
Self { inner: Some(inner) }
|
||||
}
|
||||
|
||||
fn get_inner(&self) -> PyResult<&LanceConnection> {
|
||||
pub(crate) fn get_inner(&self) -> PyResult<&LanceConnection> {
|
||||
self.inner
|
||||
.as_ref()
|
||||
.ok_or_else(|| PyRuntimeError::new_err("Connection is closed"))
|
||||
@@ -63,6 +66,18 @@ impl Connection {
|
||||
self.get_inner().map(|inner| inner.uri().to_string())
|
||||
}
|
||||
|
||||
#[pyo3(signature = ())]
|
||||
pub fn get_read_consistency_interval(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
Ok(match inner.read_consistency().await.infer_error()? {
|
||||
ReadConsistency::Manual => None,
|
||||
ReadConsistency::Eventual(duration) => Some(duration.as_secs_f64()),
|
||||
ReadConsistency::Strong => Some(0.0_f64),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (namespace=vec![], start_after=None, limit=None))]
|
||||
pub fn table_names(
|
||||
self_: PyRef<'_, Self>,
|
||||
|
||||
Reference in New Issue
Block a user