mirror of
https://github.com/lancedb/lancedb.git
synced 2026-03-26 02:20:40 +00:00
Compare commits
2 Commits
python-v0.
...
jack/upgra
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e6465552fb | ||
|
|
5baaa92782 |
831
Cargo.lock
generated
831
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
58
Cargo.toml
58
Cargo.toml
@@ -15,39 +15,39 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.88.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=2.0.0-rc.4", default-features = false, "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=2.0.0-rc.4", default-features = false, "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=2.0.0-rc.4", default-features = false, "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=2.0.0-rc.4", "tag" = "v2.0.0-rc.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "56.2", optional = false }
|
||||
arrow-array = "56.2"
|
||||
arrow-data = "56.2"
|
||||
arrow-ipc = "56.2"
|
||||
arrow-ord = "56.2"
|
||||
arrow-schema = "56.2"
|
||||
arrow-select = "56.2"
|
||||
arrow-cast = "56.2"
|
||||
arrow = { version = "57.2", optional = false }
|
||||
arrow-array = "57.2"
|
||||
arrow-data = "57.2"
|
||||
arrow-ipc = "57.2"
|
||||
arrow-ord = "57.2"
|
||||
arrow-schema = "57.2"
|
||||
arrow-select = "57.2"
|
||||
arrow-cast = "57.2"
|
||||
async-trait = "0"
|
||||
datafusion = { version = "50.1", default-features = false }
|
||||
datafusion-catalog = "50.1"
|
||||
datafusion-common = { version = "50.1", default-features = false }
|
||||
datafusion-execution = "50.1"
|
||||
datafusion-expr = "50.1"
|
||||
datafusion-physical-plan = "50.1"
|
||||
datafusion = { version = "51.0", default-features = false }
|
||||
datafusion-catalog = "51.0"
|
||||
datafusion-common = { version = "51.0", default-features = false }
|
||||
datafusion-execution = "51.0"
|
||||
datafusion-expr = "51.0"
|
||||
datafusion-physical-plan = "51.0"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "2.6.0", default-features = false, features = [
|
||||
half = { "version" = "2.7.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
] }
|
||||
futures = "0"
|
||||
|
||||
@@ -1520,9 +1520,9 @@ describe("when optimizing a dataset", () => {
|
||||
|
||||
it("delete unverified", async () => {
|
||||
const version = await table.version();
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
|
||||
version - 1
|
||||
}.manifest`;
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${String(
|
||||
18446744073709551615n - (BigInt(version) - 1n),
|
||||
).padStart(20, "0")}.manifest`;
|
||||
fs.rmSync(versionFile);
|
||||
|
||||
let stats = await table.optimize({ deleteUnverified: false });
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.28.0-beta.0"
|
||||
version = "0.27.0"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -14,15 +14,15 @@ name = "_lancedb"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
arrow = { version = "56.2", features = ["pyarrow"] }
|
||||
arrow = { version = "57.2", features = ["pyarrow"] }
|
||||
async-trait = "0.1"
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
lance-core.workspace = true
|
||||
lance-namespace.workspace = true
|
||||
lance-io.workspace = true
|
||||
env_logger.workspace = true
|
||||
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py310"] }
|
||||
pyo3-async-runtimes = { version = "0.25", features = [
|
||||
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3-async-runtimes = { version = "0.26", features = [
|
||||
"attributes",
|
||||
"tokio-runtime",
|
||||
] }
|
||||
@@ -32,9 +32,9 @@ snafu.workspace = true
|
||||
tokio = { version = "1.40", features = ["sync"] }
|
||||
|
||||
[build-dependencies]
|
||||
pyo3-build-config = { version = "0.25", features = [
|
||||
pyo3-build-config = { version = "0.26", features = [
|
||||
"extension-module",
|
||||
"abi3-py310",
|
||||
"abi3-py39",
|
||||
] }
|
||||
|
||||
[features]
|
||||
|
||||
@@ -961,22 +961,27 @@ class LanceQueryBuilder(ABC):
|
||||
>>> query = [100, 100]
|
||||
>>> plan = table.search(query).analyze_plan()
|
||||
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
|
||||
TracedExec, metrics=[], cumulative_cpu=...
|
||||
ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
|
||||
GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
|
||||
FilterExec: _distance@2 IS NOT NULL,
|
||||
metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
|
||||
SortExec: TopK(fetch=10), expr=[...],
|
||||
AnalyzeExec verbose=true, elapsed=..., metrics=...
|
||||
TracedExec, elapsed=..., metrics=...
|
||||
ProjectionExec: elapsed=..., expr=[...],
|
||||
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
|
||||
GlobalLimitExec: elapsed=..., skip=0, fetch=10,
|
||||
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
|
||||
FilterExec: elapsed=..., _distance@2 IS NOT NULL, metrics=[...]
|
||||
SortExec: elapsed=..., TopK(fetch=10), expr=[...],
|
||||
preserve_partitioning=[...],
|
||||
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
|
||||
cumulative_cpu=...
|
||||
KNNVectorDistance: metric=l2,
|
||||
metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
|
||||
cumulative_cpu=...
|
||||
LanceRead: uri=..., projection=[vector], ...
|
||||
metrics=[output_rows=..., elapsed_compute=...,
|
||||
bytes_read=..., iops=..., requests=...], cumulative_cpu=...
|
||||
metrics=[output_rows=..., elapsed_compute=...,
|
||||
output_bytes=..., row_replacements=...]
|
||||
KNNVectorDistance: elapsed=..., metric=l2,
|
||||
metrics=[output_rows=..., elapsed_compute=...,
|
||||
output_bytes=..., output_batches=...]
|
||||
LanceRead: elapsed=..., uri=..., projection=[vector],
|
||||
num_fragments=..., range_before=None, range_after=None,
|
||||
row_id=true, row_addr=false,
|
||||
full_filter=--, refine_filter=--,
|
||||
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...,
|
||||
fragments_scanned=..., ranges_scanned=1, rows_scanned=1,
|
||||
bytes_read=..., iops=..., requests=..., task_wait_time=...]
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
@@ -601,7 +601,6 @@ def test_head():
|
||||
def test_query_sync_minimal():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"refine_factor": None,
|
||||
@@ -685,7 +684,6 @@ def test_query_sync_maximal():
|
||||
def test_query_sync_nprobes():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"fast_search": True,
|
||||
@@ -715,7 +713,6 @@ def test_query_sync_nprobes():
|
||||
def test_query_sync_no_max_nprobes():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"fast_search": True,
|
||||
@@ -838,7 +835,6 @@ def test_query_sync_hybrid():
|
||||
else:
|
||||
# Vector query
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 42,
|
||||
"prefilter": True,
|
||||
"refine_factor": None,
|
||||
|
||||
@@ -1880,8 +1880,13 @@ async def test_optimize_delete_unverified(tmp_db_async: AsyncConnection, tmp_pat
|
||||
],
|
||||
)
|
||||
version = await table.version()
|
||||
path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
|
||||
assert version == 2
|
||||
|
||||
# By removing a manifest file, we make the data files we just inserted unverified
|
||||
version_name = 18446744073709551615 - (version - 1)
|
||||
path = tmp_path / "test.lance" / "_versions" / f"{version_name:020}.manifest"
|
||||
os.remove(path)
|
||||
|
||||
stats = await table.optimize(delete_unverified=False)
|
||||
assert stats.prune.old_versions_removed == 0
|
||||
stats = await table.optimize(
|
||||
|
||||
@@ -10,8 +10,7 @@ use arrow::{
|
||||
use futures::stream::StreamExt;
|
||||
use lancedb::arrow::SendableRecordBatchStream;
|
||||
use pyo3::{
|
||||
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
|
||||
Python,
|
||||
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
|
||||
};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
|
||||
@@ -36,8 +35,11 @@ impl RecordBatchStream {
|
||||
#[pymethods]
|
||||
impl RecordBatchStream {
|
||||
#[getter]
|
||||
pub fn schema(&self, py: Python) -> PyResult<PyObject> {
|
||||
(*self.schema).clone().into_pyarrow(py)
|
||||
pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> {
|
||||
(*self.schema)
|
||||
.clone()
|
||||
.into_pyarrow(py)
|
||||
.map(|obj| obj.unbind())
|
||||
}
|
||||
|
||||
pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
|
||||
@@ -53,7 +55,12 @@ impl RecordBatchStream {
|
||||
.next()
|
||||
.await
|
||||
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
|
||||
Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
|
||||
Python::attach(|py| {
|
||||
inner_next
|
||||
.infer_error()?
|
||||
.to_pyarrow(py)
|
||||
.map(|obj| obj.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ use pyo3::{
|
||||
exceptions::{PyRuntimeError, PyValueError},
|
||||
pyclass, pyfunction, pymethods,
|
||||
types::{PyDict, PyDictMethods},
|
||||
Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
|
||||
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||
};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
|
||||
@@ -114,7 +114,7 @@ impl Connection {
|
||||
data: Bound<'_, PyAny>,
|
||||
namespace: Vec<String>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
storage_options_provider: Option<PyObject>,
|
||||
storage_options_provider: Option<Py<PyAny>>,
|
||||
location: Option<String>,
|
||||
) -> PyResult<Bound<'a, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
@@ -152,7 +152,7 @@ impl Connection {
|
||||
schema: Bound<'_, PyAny>,
|
||||
namespace: Vec<String>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
storage_options_provider: Option<PyObject>,
|
||||
storage_options_provider: Option<Py<PyAny>>,
|
||||
location: Option<String>,
|
||||
) -> PyResult<Bound<'a, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
@@ -187,7 +187,7 @@ impl Connection {
|
||||
name: String,
|
||||
namespace: Vec<String>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
storage_options_provider: Option<PyObject>,
|
||||
storage_options_provider: Option<Py<PyAny>>,
|
||||
index_cache_size: Option<u32>,
|
||||
location: Option<String>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
@@ -307,7 +307,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.list_namespaces(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("namespaces", response.namespaces)?;
|
||||
dict.set_item("page_token", response.page_token)?;
|
||||
@@ -345,7 +345,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.create_namespace(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
Ok(dict.unbind())
|
||||
@@ -386,7 +386,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.drop_namespace(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
dict.set_item("transaction_id", response.transaction_id)?;
|
||||
@@ -413,7 +413,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.describe_namespace(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
Ok(dict.unbind())
|
||||
@@ -443,7 +443,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.list_tables(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("tables", response.tables)?;
|
||||
dict.set_item("page_token", response.page_token)?;
|
||||
|
||||
@@ -40,7 +40,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
request_id,
|
||||
source,
|
||||
status_code,
|
||||
} => Python::with_gil(|py| {
|
||||
} => Python::attach(|py| {
|
||||
let message = err.to_string();
|
||||
let http_err_cls = py
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
@@ -75,7 +75,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
max_read_failures,
|
||||
source,
|
||||
status_code,
|
||||
} => Python::with_gil(|py| {
|
||||
} => Python::attach(|py| {
|
||||
let cause_err = http_from_rust_error(
|
||||
py,
|
||||
source.as_ref(),
|
||||
|
||||
@@ -12,7 +12,7 @@ pub struct PyHeaderProvider {
|
||||
|
||||
impl Clone for PyHeaderProvider {
|
||||
fn clone(&self) -> Self {
|
||||
Python::with_gil(|py| Self {
|
||||
Python::attach(|py| Self {
|
||||
provider: self.provider.clone_ref(py),
|
||||
})
|
||||
}
|
||||
@@ -25,7 +25,7 @@ impl PyHeaderProvider {
|
||||
|
||||
/// Get headers from the Python provider (internal implementation)
|
||||
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
// Call the get_headers method
|
||||
let result = self.provider.call_method0(py, "get_headers");
|
||||
|
||||
|
||||
@@ -281,7 +281,7 @@ impl PyPermutationReader {
|
||||
let reader = slf.reader.clone();
|
||||
future_into_py(slf.py(), async move {
|
||||
let schema = reader.output_schema(selection).await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -453,7 +453,7 @@ impl Query {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -532,7 +532,7 @@ impl TakeQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -627,7 +627,7 @@ impl FTSQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -806,7 +806,7 @@ impl VectorQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -17,20 +17,20 @@ use pyo3::types::PyDict;
|
||||
/// Internal wrapper around a Python object implementing StorageOptionsProvider
|
||||
pub struct PyStorageOptionsProvider {
|
||||
/// The Python object implementing fetch_storage_options()
|
||||
inner: PyObject,
|
||||
inner: Py<PyAny>,
|
||||
}
|
||||
|
||||
impl Clone for PyStorageOptionsProvider {
|
||||
fn clone(&self) -> Self {
|
||||
Python::with_gil(|py| Self {
|
||||
Python::attach(|py| Self {
|
||||
inner: self.inner.clone_ref(py),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl PyStorageOptionsProvider {
|
||||
pub fn new(obj: PyObject) -> PyResult<Self> {
|
||||
Python::with_gil(|py| {
|
||||
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
|
||||
Python::attach(|py| {
|
||||
// Verify the object has a fetch_storage_options method
|
||||
if !obj.bind(py).hasattr("fetch_storage_options")? {
|
||||
return Err(pyo3::exceptions::PyTypeError::new_err(
|
||||
@@ -60,7 +60,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
let py_provider = self.py_provider.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
// Call the Python fetch_storage_options method
|
||||
let result = py_provider
|
||||
.inner
|
||||
@@ -119,7 +119,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
}
|
||||
|
||||
fn provider_id(&self) -> String {
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
// Call provider_id() method on the Python object
|
||||
let obj = self.py_provider.inner.bind(py);
|
||||
obj.call_method0("provider_id")
|
||||
@@ -143,7 +143,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
|
||||
/// This is the main entry point for converting Python StorageOptionsProvider objects
|
||||
/// to Rust trait objects that can be used by the Lance ecosystem.
|
||||
pub fn py_object_to_storage_options_provider(
|
||||
py_obj: PyObject,
|
||||
py_obj: Py<PyAny>,
|
||||
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
|
||||
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
|
||||
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
|
||||
|
||||
@@ -287,7 +287,7 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.schema().await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -437,7 +437,7 @@ impl Table {
|
||||
future_into_py(self_.py(), async move {
|
||||
let stats = inner.index_stats(&index_name).await.infer_error()?;
|
||||
if let Some(stats) = stats {
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
|
||||
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
|
||||
@@ -467,7 +467,7 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let stats = inner.stats().await.infer_error()?;
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("total_bytes", stats.total_bytes)?;
|
||||
dict.set_item("num_rows", stats.num_rows)?;
|
||||
@@ -521,7 +521,7 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let versions = inner.list_versions().await.infer_error()?;
|
||||
let versions_as_dict = Python::with_gil(|py| {
|
||||
let versions_as_dict = Python::attach(|py| {
|
||||
versions
|
||||
.iter()
|
||||
.map(|v| {
|
||||
@@ -872,7 +872,7 @@ impl Tags {
|
||||
let tags = inner.tags().await.infer_error()?;
|
||||
let res = tags.list().await.infer_error()?;
|
||||
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
let py_dict = PyDict::new(py);
|
||||
for (key, contents) in res {
|
||||
let value_dict = PyDict::new(py);
|
||||
|
||||
@@ -468,7 +468,9 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
self.apply_query_params(&mut body, &query.base)?;
|
||||
|
||||
// Apply general parameters, before we dispatch based on number of query vectors.
|
||||
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
|
||||
if let Some(distance_type) = query.distance_type {
|
||||
body["distance_type"] = serde_json::json!(distance_type);
|
||||
}
|
||||
// In 0.23.1 we migrated from `nprobes` to `minimum_nprobes` and `maximum_nprobes`.
|
||||
// Old client / new server: since minimum_nprobes is missing, fallback to nprobes
|
||||
// New client / old server: old server will only see nprobes, make sure to set both
|
||||
@@ -2230,7 +2232,6 @@ mod tests {
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
let mut expected_body = serde_json::json!({
|
||||
"prefilter": true,
|
||||
"distance_type": "l2",
|
||||
"nprobes": 20,
|
||||
"minimum_nprobes": 20,
|
||||
"maximum_nprobes": 20,
|
||||
|
||||
@@ -1417,7 +1417,9 @@ impl Table {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let unioned = Arc::new(UnionExec::new(projected_plans));
|
||||
let unioned = UnionExec::try_new(projected_plans).map_err(|err| Error::Runtime {
|
||||
message: err.to_string(),
|
||||
})?;
|
||||
// We require 1 partition in the final output
|
||||
let repartitioned = RepartitionExec::try_new(
|
||||
unioned,
|
||||
|
||||
@@ -100,7 +100,8 @@ impl DatasetRef {
|
||||
let should_checkout = match &target_ref {
|
||||
refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
|
||||
refs::Ref::Version(_, None) => true, // No specific version, always checkout
|
||||
refs::Ref::Tag(_) => true, // Always checkout for tags
|
||||
refs::Ref::VersionNumber(target_ver) => version != target_ver,
|
||||
refs::Ref::Tag(_) => true, // Always checkout for tags
|
||||
};
|
||||
|
||||
if should_checkout {
|
||||
|
||||
Reference in New Issue
Block a user