mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-01 03:10:43 +00:00
Compare commits
1 Commits
python-v0.
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
054d09abe9 |
2486
Cargo.lock
generated
2486
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
56
Cargo.toml
56
Cargo.toml
@@ -15,37 +15,37 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.88.0"
|
rust-version = "1.88.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance = { "version" = "=2.0.0-rc.4", default-features = false, "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-core = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-core = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-datagen = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-datagen = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-file = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-file = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-io = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-io = { "version" = "=2.0.0-rc.4", default-features = false, "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-index = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-index = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-linalg = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-linalg = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-namespace = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-namespace = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-namespace-impls = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-namespace-impls = { "version" = "=2.0.0-rc.4", default-features = false, "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-table = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-table = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-testing = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-testing = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-datafusion = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-datafusion = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-encoding = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-encoding = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-arrow = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
lance-arrow = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
ahash = "0.8"
|
ahash = "0.8"
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "56.2", optional = false }
|
arrow = { version = "57.2", optional = false }
|
||||||
arrow-array = "56.2"
|
arrow-array = "57.2"
|
||||||
arrow-data = "56.2"
|
arrow-data = "57.2"
|
||||||
arrow-ipc = "56.2"
|
arrow-ipc = "57.2"
|
||||||
arrow-ord = "56.2"
|
arrow-ord = "57.2"
|
||||||
arrow-schema = "56.2"
|
arrow-schema = "57.2"
|
||||||
arrow-select = "56.2"
|
arrow-select = "57.2"
|
||||||
arrow-cast = "56.2"
|
arrow-cast = "57.2"
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
datafusion = { version = "50.1", default-features = false }
|
datafusion = { version = "51.0", default-features = false }
|
||||||
datafusion-catalog = "50.1"
|
datafusion-catalog = "51.0"
|
||||||
datafusion-common = { version = "50.1", default-features = false }
|
datafusion-common = { version = "51.0", default-features = false }
|
||||||
datafusion-execution = "50.1"
|
datafusion-execution = "51.0"
|
||||||
datafusion-expr = "50.1"
|
datafusion-expr = "51.0"
|
||||||
datafusion-physical-plan = "50.1"
|
datafusion-physical-plan = "51.0"
|
||||||
env_logger = "0.11"
|
env_logger = "0.11"
|
||||||
half = { "version" = "2.6.0", default-features = false, features = [
|
half = { "version" = "2.6.0", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
|||||||
@@ -14,15 +14,15 @@ name = "_lancedb"
|
|||||||
crate-type = ["cdylib"]
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arrow = { version = "56.2", features = ["pyarrow"] }
|
arrow = { version = "57.2", features = ["pyarrow"] }
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||||
lance-core.workspace = true
|
lance-core.workspace = true
|
||||||
lance-namespace.workspace = true
|
lance-namespace.workspace = true
|
||||||
lance-io.workspace = true
|
lance-io.workspace = true
|
||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
|
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
||||||
pyo3-async-runtimes = { version = "0.25", features = [
|
pyo3-async-runtimes = { version = "0.26", features = [
|
||||||
"attributes",
|
"attributes",
|
||||||
"tokio-runtime",
|
"tokio-runtime",
|
||||||
] }
|
] }
|
||||||
@@ -32,7 +32,7 @@ snafu.workspace = true
|
|||||||
tokio = { version = "1.40", features = ["sync"] }
|
tokio = { version = "1.40", features = ["sync"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
pyo3-build-config = { version = "0.25", features = [
|
pyo3-build-config = { version = "0.26", features = [
|
||||||
"extension-module",
|
"extension-module",
|
||||||
"abi3-py39",
|
"abi3-py39",
|
||||||
] }
|
] }
|
||||||
|
|||||||
@@ -10,8 +10,7 @@ use arrow::{
|
|||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use lancedb::arrow::SendableRecordBatchStream;
|
use lancedb::arrow::SendableRecordBatchStream;
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
|
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
|
||||||
Python,
|
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
@@ -36,8 +35,11 @@ impl RecordBatchStream {
|
|||||||
#[pymethods]
|
#[pymethods]
|
||||||
impl RecordBatchStream {
|
impl RecordBatchStream {
|
||||||
#[getter]
|
#[getter]
|
||||||
pub fn schema(&self, py: Python) -> PyResult<PyObject> {
|
pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> {
|
||||||
(*self.schema).clone().into_pyarrow(py)
|
(*self.schema)
|
||||||
|
.clone()
|
||||||
|
.into_pyarrow(py)
|
||||||
|
.map(|obj| obj.unbind())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
|
pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
|
||||||
@@ -53,7 +55,12 @@ impl RecordBatchStream {
|
|||||||
.next()
|
.next()
|
||||||
.await
|
.await
|
||||||
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
|
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
|
||||||
Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
|
Python::attach(|py| {
|
||||||
|
inner_next
|
||||||
|
.infer_error()?
|
||||||
|
.to_pyarrow(py)
|
||||||
|
.map(|obj| obj.unbind())
|
||||||
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ use pyo3::{
|
|||||||
exceptions::{PyRuntimeError, PyValueError},
|
exceptions::{PyRuntimeError, PyValueError},
|
||||||
pyclass, pyfunction, pymethods,
|
pyclass, pyfunction, pymethods,
|
||||||
types::{PyDict, PyDictMethods},
|
types::{PyDict, PyDictMethods},
|
||||||
Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
|
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
@@ -114,7 +114,7 @@ impl Connection {
|
|||||||
data: Bound<'_, PyAny>,
|
data: Bound<'_, PyAny>,
|
||||||
namespace: Vec<String>,
|
namespace: Vec<String>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
storage_options_provider: Option<PyObject>,
|
storage_options_provider: Option<Py<PyAny>>,
|
||||||
location: Option<String>,
|
location: Option<String>,
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
@@ -152,7 +152,7 @@ impl Connection {
|
|||||||
schema: Bound<'_, PyAny>,
|
schema: Bound<'_, PyAny>,
|
||||||
namespace: Vec<String>,
|
namespace: Vec<String>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
storage_options_provider: Option<PyObject>,
|
storage_options_provider: Option<Py<PyAny>>,
|
||||||
location: Option<String>,
|
location: Option<String>,
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
@@ -187,7 +187,7 @@ impl Connection {
|
|||||||
name: String,
|
name: String,
|
||||||
namespace: Vec<String>,
|
namespace: Vec<String>,
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
storage_options_provider: Option<PyObject>,
|
storage_options_provider: Option<Py<PyAny>>,
|
||||||
index_cache_size: Option<u32>,
|
index_cache_size: Option<u32>,
|
||||||
location: Option<String>,
|
location: Option<String>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
@@ -307,7 +307,7 @@ impl Connection {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let response = inner.list_namespaces(request).await.infer_error()?;
|
let response = inner.list_namespaces(request).await.infer_error()?;
|
||||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||||
let dict = PyDict::new(py);
|
let dict = PyDict::new(py);
|
||||||
dict.set_item("namespaces", response.namespaces)?;
|
dict.set_item("namespaces", response.namespaces)?;
|
||||||
dict.set_item("page_token", response.page_token)?;
|
dict.set_item("page_token", response.page_token)?;
|
||||||
@@ -345,7 +345,7 @@ impl Connection {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let response = inner.create_namespace(request).await.infer_error()?;
|
let response = inner.create_namespace(request).await.infer_error()?;
|
||||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||||
let dict = PyDict::new(py);
|
let dict = PyDict::new(py);
|
||||||
dict.set_item("properties", response.properties)?;
|
dict.set_item("properties", response.properties)?;
|
||||||
Ok(dict.unbind())
|
Ok(dict.unbind())
|
||||||
@@ -386,7 +386,7 @@ impl Connection {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let response = inner.drop_namespace(request).await.infer_error()?;
|
let response = inner.drop_namespace(request).await.infer_error()?;
|
||||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||||
let dict = PyDict::new(py);
|
let dict = PyDict::new(py);
|
||||||
dict.set_item("properties", response.properties)?;
|
dict.set_item("properties", response.properties)?;
|
||||||
dict.set_item("transaction_id", response.transaction_id)?;
|
dict.set_item("transaction_id", response.transaction_id)?;
|
||||||
@@ -413,7 +413,7 @@ impl Connection {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let response = inner.describe_namespace(request).await.infer_error()?;
|
let response = inner.describe_namespace(request).await.infer_error()?;
|
||||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||||
let dict = PyDict::new(py);
|
let dict = PyDict::new(py);
|
||||||
dict.set_item("properties", response.properties)?;
|
dict.set_item("properties", response.properties)?;
|
||||||
Ok(dict.unbind())
|
Ok(dict.unbind())
|
||||||
@@ -443,7 +443,7 @@ impl Connection {
|
|||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let response = inner.list_tables(request).await.infer_error()?;
|
let response = inner.list_tables(request).await.infer_error()?;
|
||||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||||
let dict = PyDict::new(py);
|
let dict = PyDict::new(py);
|
||||||
dict.set_item("tables", response.tables)?;
|
dict.set_item("tables", response.tables)?;
|
||||||
dict.set_item("page_token", response.page_token)?;
|
dict.set_item("page_token", response.page_token)?;
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
|||||||
request_id,
|
request_id,
|
||||||
source,
|
source,
|
||||||
status_code,
|
status_code,
|
||||||
} => Python::with_gil(|py| {
|
} => Python::attach(|py| {
|
||||||
let message = err.to_string();
|
let message = err.to_string();
|
||||||
let http_err_cls = py
|
let http_err_cls = py
|
||||||
.import(intern!(py, "lancedb.remote.errors"))?
|
.import(intern!(py, "lancedb.remote.errors"))?
|
||||||
@@ -75,7 +75,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
|||||||
max_read_failures,
|
max_read_failures,
|
||||||
source,
|
source,
|
||||||
status_code,
|
status_code,
|
||||||
} => Python::with_gil(|py| {
|
} => Python::attach(|py| {
|
||||||
let cause_err = http_from_rust_error(
|
let cause_err = http_from_rust_error(
|
||||||
py,
|
py,
|
||||||
source.as_ref(),
|
source.as_ref(),
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ pub struct PyHeaderProvider {
|
|||||||
|
|
||||||
impl Clone for PyHeaderProvider {
|
impl Clone for PyHeaderProvider {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
Python::with_gil(|py| Self {
|
Python::attach(|py| Self {
|
||||||
provider: self.provider.clone_ref(py),
|
provider: self.provider.clone_ref(py),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -25,7 +25,7 @@ impl PyHeaderProvider {
|
|||||||
|
|
||||||
/// Get headers from the Python provider (internal implementation)
|
/// Get headers from the Python provider (internal implementation)
|
||||||
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
|
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
|
||||||
Python::with_gil(|py| {
|
Python::attach(|py| {
|
||||||
// Call the get_headers method
|
// Call the get_headers method
|
||||||
let result = self.provider.call_method0(py, "get_headers");
|
let result = self.provider.call_method0(py, "get_headers");
|
||||||
|
|
||||||
|
|||||||
@@ -281,7 +281,7 @@ impl PyPermutationReader {
|
|||||||
let reader = slf.reader.clone();
|
let reader = slf.reader.clone();
|
||||||
future_into_py(slf.py(), async move {
|
future_into_py(slf.py(), async move {
|
||||||
let schema = reader.output_schema(selection).await.infer_error()?;
|
let schema = reader.output_schema(selection).await.infer_error()?;
|
||||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -453,7 +453,7 @@ impl Query {
|
|||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let schema = inner.output_schema().await.infer_error()?;
|
let schema = inner.output_schema().await.infer_error()?;
|
||||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -532,7 +532,7 @@ impl TakeQuery {
|
|||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let schema = inner.output_schema().await.infer_error()?;
|
let schema = inner.output_schema().await.infer_error()?;
|
||||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -627,7 +627,7 @@ impl FTSQuery {
|
|||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let schema = inner.output_schema().await.infer_error()?;
|
let schema = inner.output_schema().await.infer_error()?;
|
||||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -806,7 +806,7 @@ impl VectorQuery {
|
|||||||
let inner = self_.inner.clone();
|
let inner = self_.inner.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let schema = inner.output_schema().await.infer_error()?;
|
let schema = inner.output_schema().await.infer_error()?;
|
||||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -17,20 +17,20 @@ use pyo3::types::PyDict;
|
|||||||
/// Internal wrapper around a Python object implementing StorageOptionsProvider
|
/// Internal wrapper around a Python object implementing StorageOptionsProvider
|
||||||
pub struct PyStorageOptionsProvider {
|
pub struct PyStorageOptionsProvider {
|
||||||
/// The Python object implementing fetch_storage_options()
|
/// The Python object implementing fetch_storage_options()
|
||||||
inner: PyObject,
|
inner: Py<PyAny>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for PyStorageOptionsProvider {
|
impl Clone for PyStorageOptionsProvider {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
Python::with_gil(|py| Self {
|
Python::attach(|py| Self {
|
||||||
inner: self.inner.clone_ref(py),
|
inner: self.inner.clone_ref(py),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PyStorageOptionsProvider {
|
impl PyStorageOptionsProvider {
|
||||||
pub fn new(obj: PyObject) -> PyResult<Self> {
|
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
|
||||||
Python::with_gil(|py| {
|
Python::attach(|py| {
|
||||||
// Verify the object has a fetch_storage_options method
|
// Verify the object has a fetch_storage_options method
|
||||||
if !obj.bind(py).hasattr("fetch_storage_options")? {
|
if !obj.bind(py).hasattr("fetch_storage_options")? {
|
||||||
return Err(pyo3::exceptions::PyTypeError::new_err(
|
return Err(pyo3::exceptions::PyTypeError::new_err(
|
||||||
@@ -60,7 +60,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
|||||||
let py_provider = self.py_provider.clone();
|
let py_provider = self.py_provider.clone();
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
Python::with_gil(|py| {
|
Python::attach(|py| {
|
||||||
// Call the Python fetch_storage_options method
|
// Call the Python fetch_storage_options method
|
||||||
let result = py_provider
|
let result = py_provider
|
||||||
.inner
|
.inner
|
||||||
@@ -119,7 +119,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn provider_id(&self) -> String {
|
fn provider_id(&self) -> String {
|
||||||
Python::with_gil(|py| {
|
Python::attach(|py| {
|
||||||
// Call provider_id() method on the Python object
|
// Call provider_id() method on the Python object
|
||||||
let obj = self.py_provider.inner.bind(py);
|
let obj = self.py_provider.inner.bind(py);
|
||||||
obj.call_method0("provider_id")
|
obj.call_method0("provider_id")
|
||||||
@@ -143,7 +143,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
|
|||||||
/// This is the main entry point for converting Python StorageOptionsProvider objects
|
/// This is the main entry point for converting Python StorageOptionsProvider objects
|
||||||
/// to Rust trait objects that can be used by the Lance ecosystem.
|
/// to Rust trait objects that can be used by the Lance ecosystem.
|
||||||
pub fn py_object_to_storage_options_provider(
|
pub fn py_object_to_storage_options_provider(
|
||||||
py_obj: PyObject,
|
py_obj: Py<PyAny>,
|
||||||
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
|
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
|
||||||
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
|
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
|
||||||
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
|
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
|
||||||
|
|||||||
@@ -287,7 +287,7 @@ impl Table {
|
|||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let schema = inner.schema().await.infer_error()?;
|
let schema = inner.schema().await.infer_error()?;
|
||||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -437,7 +437,7 @@ impl Table {
|
|||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let stats = inner.index_stats(&index_name).await.infer_error()?;
|
let stats = inner.index_stats(&index_name).await.infer_error()?;
|
||||||
if let Some(stats) = stats {
|
if let Some(stats) = stats {
|
||||||
Python::with_gil(|py| {
|
Python::attach(|py| {
|
||||||
let dict = PyDict::new(py);
|
let dict = PyDict::new(py);
|
||||||
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
|
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
|
||||||
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
|
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
|
||||||
@@ -467,7 +467,7 @@ impl Table {
|
|||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let stats = inner.stats().await.infer_error()?;
|
let stats = inner.stats().await.infer_error()?;
|
||||||
Python::with_gil(|py| {
|
Python::attach(|py| {
|
||||||
let dict = PyDict::new(py);
|
let dict = PyDict::new(py);
|
||||||
dict.set_item("total_bytes", stats.total_bytes)?;
|
dict.set_item("total_bytes", stats.total_bytes)?;
|
||||||
dict.set_item("num_rows", stats.num_rows)?;
|
dict.set_item("num_rows", stats.num_rows)?;
|
||||||
@@ -521,7 +521,7 @@ impl Table {
|
|||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let versions = inner.list_versions().await.infer_error()?;
|
let versions = inner.list_versions().await.infer_error()?;
|
||||||
let versions_as_dict = Python::with_gil(|py| {
|
let versions_as_dict = Python::attach(|py| {
|
||||||
versions
|
versions
|
||||||
.iter()
|
.iter()
|
||||||
.map(|v| {
|
.map(|v| {
|
||||||
@@ -872,7 +872,7 @@ impl Tags {
|
|||||||
let tags = inner.tags().await.infer_error()?;
|
let tags = inner.tags().await.infer_error()?;
|
||||||
let res = tags.list().await.infer_error()?;
|
let res = tags.list().await.infer_error()?;
|
||||||
|
|
||||||
Python::with_gil(|py| {
|
Python::attach(|py| {
|
||||||
let py_dict = PyDict::new(py);
|
let py_dict = PyDict::new(py);
|
||||||
for (key, contents) in res {
|
for (key, contents) in res {
|
||||||
let value_dict = PyDict::new(py);
|
let value_dict = PyDict::new(py);
|
||||||
|
|||||||
@@ -1,14 +1,22 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use std::{collections::HashMap, sync::Arc};
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
sync::{Arc, Mutex},
|
||||||
|
};
|
||||||
|
|
||||||
use datafusion::prelude::{SessionConfig, SessionContext};
|
use datafusion::prelude::{SessionConfig, SessionContext};
|
||||||
|
use datafusion_catalog::streaming::StreamingTable;
|
||||||
|
use datafusion_common::DataFusionError;
|
||||||
use datafusion_execution::{disk_manager::DiskManagerBuilder, runtime_env::RuntimeEnvBuilder};
|
use datafusion_execution::{disk_manager::DiskManagerBuilder, runtime_env::RuntimeEnvBuilder};
|
||||||
use datafusion_expr::col;
|
use datafusion_expr::col;
|
||||||
|
use datafusion_physical_plan::{
|
||||||
|
stream::RecordBatchStreamAdapter, streaming::PartitionStream,
|
||||||
|
SendableRecordBatchStream as DataFusionRecordBatchStream,
|
||||||
|
};
|
||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use lance_core::ROW_ID;
|
use lance_core::ROW_ID;
|
||||||
use lance_datafusion::exec::SessionContextExt;
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream},
|
arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream},
|
||||||
@@ -29,6 +37,49 @@ pub const SPLIT_NAMES_CONFIG_KEY: &str = "split_names";
|
|||||||
|
|
||||||
pub const DEFAULT_MEMORY_LIMIT: usize = 100 * 1024 * 1024;
|
pub const DEFAULT_MEMORY_LIMIT: usize = 100 * 1024 * 1024;
|
||||||
|
|
||||||
|
struct OneShotPartitionStream {
|
||||||
|
schema: arrow_schema::SchemaRef,
|
||||||
|
stream: Mutex<Option<DataFusionRecordBatchStream>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OneShotPartitionStream {
|
||||||
|
fn new(schema: arrow_schema::SchemaRef, stream: DataFusionRecordBatchStream) -> Self {
|
||||||
|
Self {
|
||||||
|
schema,
|
||||||
|
stream: Mutex::new(Some(stream)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartitionStream for OneShotPartitionStream {
|
||||||
|
fn schema(&self) -> &arrow_schema::SchemaRef {
|
||||||
|
&self.schema
|
||||||
|
}
|
||||||
|
|
||||||
|
fn execute(&self, _ctx: Arc<datafusion_execution::TaskContext>) -> DataFusionRecordBatchStream {
|
||||||
|
self.stream
|
||||||
|
.lock()
|
||||||
|
.ok()
|
||||||
|
.and_then(|mut stream| stream.take())
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
Box::pin(RecordBatchStreamAdapter::new(
|
||||||
|
Arc::clone(&self.schema),
|
||||||
|
futures::stream::empty::<
|
||||||
|
std::result::Result<arrow_array::RecordBatch, DataFusionError>,
|
||||||
|
>(),
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for OneShotPartitionStream {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("OneShotPartitionStream")
|
||||||
|
.field("schema", &self.schema)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Where to store the permutation table
|
/// Where to store the permutation table
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
enum PermutationDestination {
|
enum PermutationDestination {
|
||||||
@@ -190,12 +241,17 @@ impl PermutationBuilder {
|
|||||||
.build_arc()
|
.build_arc()
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
);
|
);
|
||||||
let df = ctx
|
let df_stream = data.into_df_stream();
|
||||||
.read_one_shot(data.into_df_stream())
|
let schema = df_stream.schema();
|
||||||
.map_err(|e| Error::Other {
|
let partition = Arc::new(OneShotPartitionStream::new(schema.clone(), df_stream));
|
||||||
message: format!("Failed to setup sort by split id: {}", e),
|
let table = StreamingTable::try_new(schema, vec![partition]).map_err(|e| Error::Other {
|
||||||
source: Some(e.into()),
|
message: format!("Failed to create streaming table: {}", e),
|
||||||
})?;
|
source: Some(e.into()),
|
||||||
|
})?;
|
||||||
|
let df = ctx.read_table(Arc::new(table)).map_err(|e| Error::Other {
|
||||||
|
message: format!("Failed to setup sort by split id: {}", e),
|
||||||
|
source: Some(e.into()),
|
||||||
|
})?;
|
||||||
let df_stream = df
|
let df_stream = df
|
||||||
.sort_by(vec![col(SPLIT_ID_COLUMN)])
|
.sort_by(vec![col(SPLIT_ID_COLUMN)])
|
||||||
.map_err(|e| Error::Other {
|
.map_err(|e| Error::Other {
|
||||||
|
|||||||
@@ -1425,7 +1425,10 @@ impl Table {
|
|||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let unioned = Arc::new(UnionExec::new(projected_plans));
|
let unioned = UnionExec::try_new(projected_plans).map_err(|e| Error::Other {
|
||||||
|
message: format!("Failed to union query plans: {}", e),
|
||||||
|
source: Some(e.into()),
|
||||||
|
})?;
|
||||||
// We require 1 partition in the final output
|
// We require 1 partition in the final output
|
||||||
let repartitioned = RepartitionExec::try_new(
|
let repartitioned = RepartitionExec::try_new(
|
||||||
unioned,
|
unioned,
|
||||||
|
|||||||
@@ -101,6 +101,7 @@ impl DatasetRef {
|
|||||||
refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
|
refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
|
||||||
refs::Ref::Version(_, None) => true, // No specific version, always checkout
|
refs::Ref::Version(_, None) => true, // No specific version, always checkout
|
||||||
refs::Ref::Tag(_) => true, // Always checkout for tags
|
refs::Ref::Tag(_) => true, // Always checkout for tags
|
||||||
|
refs::Ref::VersionNumber(target_ver) => version != target_ver,
|
||||||
};
|
};
|
||||||
|
|
||||||
if should_checkout {
|
if should_checkout {
|
||||||
|
|||||||
Reference in New Issue
Block a user