chore(revert): revert update lance dependency to v2.0.0-rc.1 (#2936) (#2941)

This reverts commit bd84bba14d, so that we
can bump version to 1.0.4-rc.1
This commit is contained in:
Jack Ye
2026-01-26 11:13:59 -08:00
committed by GitHub
parent f979a902ad
commit e4552e577a
18 changed files with 576 additions and 460 deletions

View File

@@ -14,15 +14,15 @@ name = "_lancedb"
crate-type = ["cdylib"]
[dependencies]
arrow = { version = "57.2", features = ["pyarrow"] }
arrow = { version = "56.2", features = ["pyarrow"] }
async-trait = "0.1"
lancedb = { path = "../rust/lancedb", default-features = false }
lance-core.workspace = true
lance-namespace.workspace = true
lance-io.workspace = true
env_logger.workspace = true
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.26", features = [
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.25", features = [
"attributes",
"tokio-runtime",
] }
@@ -32,7 +32,7 @@ snafu.workspace = true
tokio = { version = "1.40", features = ["sync"] }
[build-dependencies]
pyo3-build-config = { version = "0.26", features = [
pyo3-build-config = { version = "0.25", features = [
"extension-module",
"abi3-py39",
] }

View File

@@ -961,27 +961,22 @@ class LanceQueryBuilder(ABC):
>>> query = [100, 100]
>>> plan = table.search(query).analyze_plan()
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
AnalyzeExec verbose=true, elapsed=..., metrics=...
TracedExec, elapsed=..., metrics=...
ProjectionExec: elapsed=..., expr=[...],
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
GlobalLimitExec: elapsed=..., skip=0, fetch=10,
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
FilterExec: elapsed=..., _distance@2 IS NOT NULL, metrics=[...]
SortExec: elapsed=..., TopK(fetch=10), expr=[...],
AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
TracedExec, metrics=[], cumulative_cpu=...
ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
FilterExec: _distance@2 IS NOT NULL,
metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
SortExec: TopK(fetch=10), expr=[...],
preserve_partitioning=[...],
metrics=[output_rows=..., elapsed_compute=...,
output_bytes=..., row_replacements=...]
KNNVectorDistance: elapsed=..., metric=l2,
metrics=[output_rows=..., elapsed_compute=...,
output_bytes=..., output_batches=...]
LanceRead: elapsed=..., uri=..., projection=[vector],
num_fragments=..., range_before=None, range_after=None,
row_id=true, row_addr=false,
full_filter=--, refine_filter=--,
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...,
fragments_scanned=..., ranges_scanned=1, rows_scanned=1,
bytes_read=..., iops=..., requests=..., task_wait_time=...]
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
cumulative_cpu=...
KNNVectorDistance: metric=l2,
metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
cumulative_cpu=...
LanceRead: uri=..., projection=[vector], ...
metrics=[output_rows=..., elapsed_compute=...,
bytes_read=..., iops=..., requests=...], cumulative_cpu=...
Returns
-------

View File

@@ -601,6 +601,7 @@ def test_head():
def test_query_sync_minimal():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"refine_factor": None,
@@ -684,6 +685,7 @@ def test_query_sync_maximal():
def test_query_sync_nprobes():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"fast_search": True,
@@ -713,6 +715,7 @@ def test_query_sync_nprobes():
def test_query_sync_no_max_nprobes():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"fast_search": True,
@@ -835,6 +838,7 @@ def test_query_sync_hybrid():
else:
# Vector query
assert body == {
"distance_type": "l2",
"k": 42,
"prefilter": True,
"refine_factor": None,

View File

@@ -1880,13 +1880,8 @@ async def test_optimize_delete_unverified(tmp_db_async: AsyncConnection, tmp_pat
],
)
version = await table.version()
assert version == 2
# By removing a manifest file, we make the data files we just inserted unverified
version_name = 18446744073709551615 - (version - 1)
path = tmp_path / "test.lance" / "_versions" / f"{version_name:020}.manifest"
path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
os.remove(path)
stats = await table.optimize(delete_unverified=False)
assert stats.prune.old_versions_removed == 0
stats = await table.optimize(

View File

@@ -10,7 +10,8 @@ use arrow::{
use futures::stream::StreamExt;
use lancedb::arrow::SendableRecordBatchStream;
use pyo3::{
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -35,11 +36,8 @@ impl RecordBatchStream {
#[pymethods]
impl RecordBatchStream {
#[getter]
pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> {
(*self.schema)
.clone()
.into_pyarrow(py)
.map(|obj| obj.unbind())
pub fn schema(&self, py: Python) -> PyResult<PyObject> {
(*self.schema).clone().into_pyarrow(py)
}
pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
@@ -55,12 +53,7 @@ impl RecordBatchStream {
.next()
.await
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
Python::attach(|py| {
inner_next
.infer_error()?
.to_pyarrow(py)
.map(|obj| obj.unbind())
})
Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
})
}
}

View File

@@ -12,7 +12,7 @@ use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods,
types::{PyDict, PyDictMethods},
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -114,7 +114,7 @@ impl Connection {
data: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>,
storage_options_provider: Option<PyObject>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -152,7 +152,7 @@ impl Connection {
schema: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>,
storage_options_provider: Option<PyObject>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -187,7 +187,7 @@ impl Connection {
name: String,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>,
storage_options_provider: Option<PyObject>,
index_cache_size: Option<u32>,
location: Option<String>,
) -> PyResult<Bound<'_, PyAny>> {
@@ -307,7 +307,7 @@ impl Connection {
..Default::default()
};
let response = inner.list_namespaces(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("namespaces", response.namespaces)?;
dict.set_item("page_token", response.page_token)?;
@@ -345,7 +345,7 @@ impl Connection {
..Default::default()
};
let response = inner.create_namespace(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
@@ -386,7 +386,7 @@ impl Connection {
..Default::default()
};
let response = inner.drop_namespace(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
dict.set_item("transaction_id", response.transaction_id)?;
@@ -413,7 +413,7 @@ impl Connection {
..Default::default()
};
let response = inner.describe_namespace(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
@@ -443,7 +443,7 @@ impl Connection {
..Default::default()
};
let response = inner.list_tables(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("tables", response.tables)?;
dict.set_item("page_token", response.page_token)?;

View File

@@ -40,7 +40,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
request_id,
source,
status_code,
} => Python::attach(|py| {
} => Python::with_gil(|py| {
let message = err.to_string();
let http_err_cls = py
.import(intern!(py, "lancedb.remote.errors"))?
@@ -75,7 +75,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
max_read_failures,
source,
status_code,
} => Python::attach(|py| {
} => Python::with_gil(|py| {
let cause_err = http_from_rust_error(
py,
source.as_ref(),

View File

@@ -12,7 +12,7 @@ pub struct PyHeaderProvider {
impl Clone for PyHeaderProvider {
fn clone(&self) -> Self {
Python::attach(|py| Self {
Python::with_gil(|py| Self {
provider: self.provider.clone_ref(py),
})
}
@@ -25,7 +25,7 @@ impl PyHeaderProvider {
/// Get headers from the Python provider (internal implementation)
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
Python::attach(|py| {
Python::with_gil(|py| {
// Call the get_headers method
let result = self.provider.call_method0(py, "get_headers");

View File

@@ -281,7 +281,7 @@ impl PyPermutationReader {
let reader = slf.reader.clone();
future_into_py(slf.py(), async move {
let schema = reader.output_schema(selection).await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py))
})
}

View File

@@ -453,7 +453,7 @@ impl Query {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py))
})
}
@@ -532,7 +532,7 @@ impl TakeQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py))
})
}
@@ -627,7 +627,7 @@ impl FTSQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py))
})
}
@@ -806,7 +806,7 @@ impl VectorQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py))
})
}

View File

@@ -17,20 +17,20 @@ use pyo3::types::PyDict;
/// Internal wrapper around a Python object implementing StorageOptionsProvider
pub struct PyStorageOptionsProvider {
/// The Python object implementing fetch_storage_options()
inner: Py<PyAny>,
inner: PyObject,
}
impl Clone for PyStorageOptionsProvider {
fn clone(&self) -> Self {
Python::attach(|py| Self {
Python::with_gil(|py| Self {
inner: self.inner.clone_ref(py),
})
}
}
impl PyStorageOptionsProvider {
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
Python::attach(|py| {
pub fn new(obj: PyObject) -> PyResult<Self> {
Python::with_gil(|py| {
// Verify the object has a fetch_storage_options method
if !obj.bind(py).hasattr("fetch_storage_options")? {
return Err(pyo3::exceptions::PyTypeError::new_err(
@@ -60,7 +60,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
let py_provider = self.py_provider.clone();
tokio::task::spawn_blocking(move || {
Python::attach(|py| {
Python::with_gil(|py| {
// Call the Python fetch_storage_options method
let result = py_provider
.inner
@@ -119,7 +119,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
}
fn provider_id(&self) -> String {
Python::attach(|py| {
Python::with_gil(|py| {
// Call provider_id() method on the Python object
let obj = self.py_provider.inner.bind(py);
obj.call_method0("provider_id")
@@ -143,7 +143,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
/// This is the main entry point for converting Python StorageOptionsProvider objects
/// to Rust trait objects that can be used by the Lance ecosystem.
pub fn py_object_to_storage_options_provider(
py_obj: Py<PyAny>,
py_obj: PyObject,
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))

View File

@@ -287,7 +287,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let schema = inner.schema().await.infer_error()?;
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py))
})
}
@@ -437,7 +437,7 @@ impl Table {
future_into_py(self_.py(), async move {
let stats = inner.index_stats(&index_name).await.infer_error()?;
if let Some(stats) = stats {
Python::attach(|py| {
Python::with_gil(|py| {
let dict = PyDict::new(py);
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
@@ -467,7 +467,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let stats = inner.stats().await.infer_error()?;
Python::attach(|py| {
Python::with_gil(|py| {
let dict = PyDict::new(py);
dict.set_item("total_bytes", stats.total_bytes)?;
dict.set_item("num_rows", stats.num_rows)?;
@@ -521,7 +521,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let versions = inner.list_versions().await.infer_error()?;
let versions_as_dict = Python::attach(|py| {
let versions_as_dict = Python::with_gil(|py| {
versions
.iter()
.map(|v| {
@@ -872,7 +872,7 @@ impl Tags {
let tags = inner.tags().await.infer_error()?;
let res = tags.list().await.infer_error()?;
Python::attach(|py| {
Python::with_gil(|py| {
let py_dict = PyDict::new(py);
for (key, contents) in res {
let value_dict = PyDict::new(py);