feat: upgrade lance to 0.15.0 (#1477)

Changelog: https://github.com/lancedb/lance/releases/tag/v0.15.0

* Fixes #1466
* Closes #1475
* Fixes #1446
This commit is contained in:
Will Jones
2024-07-26 09:13:49 -07:00
committed by GitHub
parent 513926960d
commit 9555efacf9
10 changed files with 114 additions and 243 deletions

View File

@@ -9,8 +9,8 @@ use arrow::{
};
use futures::stream::StreamExt;
use lancedb::arrow::SendableRecordBatchStream;
use pyo3::{pyclass, pymethods, PyAny, PyObject, PyRef, PyResult, Python};
use pyo3_asyncio::tokio::future_into_py;
use pyo3::{pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult, Python};
use pyo3_asyncio_0_21::tokio::future_into_py;
use crate::error::PythonErrorExt;
@@ -36,7 +36,7 @@ impl RecordBatchStream {
(*self.schema).clone().into_pyarrow(py)
}
pub fn next(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
pub fn next(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let inner_next = inner.lock().await.next().await;

View File

@@ -18,9 +18,9 @@ use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::From
use lancedb::connection::{Connection as LanceConnection, CreateTableMode};
use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods, PyAny, PyRef, PyResult, Python,
pyclass, pyfunction, pymethods, Bound, PyAny, PyRef, PyResult, Python,
};
use pyo3_asyncio::tokio::future_into_py;
use pyo3_asyncio_0_21::tokio::future_into_py;
use crate::{error::PythonErrorExt, table::Table};
@@ -73,7 +73,7 @@ impl Connection {
self_: PyRef<'_, Self>,
start_after: Option<String>,
limit: Option<u32>,
) -> PyResult<&PyAny> {
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let mut op = inner.table_names();
if let Some(start_after) = start_after {
@@ -89,15 +89,15 @@ impl Connection {
self_: PyRef<'a, Self>,
name: String,
mode: &str,
data: &PyAny,
data: Bound<'_, PyAny>,
storage_options: Option<HashMap<String, String>>,
use_legacy_format: Option<bool>,
) -> PyResult<&'a PyAny> {
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
let mode = Self::parse_create_mode_str(mode)?;
let batches = ArrowArrayStreamReader::from_pyarrow(data)?;
let batches = ArrowArrayStreamReader::from_pyarrow_bound(&data)?;
let mut builder = inner.create_table(name, batches).mode(mode);
if let Some(storage_options) = storage_options {
@@ -118,15 +118,15 @@ impl Connection {
self_: PyRef<'a, Self>,
name: String,
mode: &str,
schema: &PyAny,
schema: Bound<'_, PyAny>,
storage_options: Option<HashMap<String, String>>,
use_legacy_format: Option<bool>,
) -> PyResult<&'a PyAny> {
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
let mode = Self::parse_create_mode_str(mode)?;
let schema = Schema::from_pyarrow(schema)?;
let schema = Schema::from_pyarrow_bound(&schema)?;
let mut builder = inner.create_empty_table(name, Arc::new(schema)).mode(mode);
@@ -150,7 +150,7 @@ impl Connection {
name: String,
storage_options: Option<HashMap<String, String>>,
index_cache_size: Option<u32>,
) -> PyResult<&PyAny> {
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let mut builder = inner.open_table(name);
if let Some(storage_options) = storage_options {
@@ -165,14 +165,14 @@ impl Connection {
})
}
pub fn drop_table(self_: PyRef<'_, Self>, name: String) -> PyResult<&PyAny> {
pub fn drop_table(self_: PyRef<'_, Self>, name: String) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(self_.py(), async move {
inner.drop_table(name).await.infer_error()
})
}
pub fn drop_db(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
pub fn drop_db(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
future_into_py(
self_.py(),
@@ -190,7 +190,7 @@ pub fn connect(
host_override: Option<String>,
read_consistency_interval: Option<f64>,
storage_options: Option<HashMap<String, String>>,
) -> PyResult<&PyAny> {
) -> PyResult<Bound<'_, PyAny>> {
future_into_py(py, async move {
let mut builder = lancedb::connect(&uri);
if let Some(api_key) = api_key {

View File

@@ -22,10 +22,11 @@ use lancedb::query::{
use pyo3::exceptions::PyRuntimeError;
use pyo3::pyclass;
use pyo3::pymethods;
use pyo3::Bound;
use pyo3::PyAny;
use pyo3::PyRef;
use pyo3::PyResult;
use pyo3_asyncio::tokio::future_into_py;
use pyo3_asyncio_0_21::tokio::future_into_py;
use crate::arrow::RecordBatchStream;
use crate::error::PythonErrorExt;
@@ -60,14 +61,17 @@ impl Query {
self.inner = self.inner.clone().limit(limit as usize);
}
pub fn nearest_to(&mut self, vector: &PyAny) -> PyResult<VectorQuery> {
let data: ArrayData = ArrayData::from_pyarrow(vector)?;
pub fn nearest_to(&mut self, vector: Bound<'_, PyAny>) -> PyResult<VectorQuery> {
let data: ArrayData = ArrayData::from_pyarrow_bound(&vector)?;
let array = make_array(data);
let inner = self.inner.clone().nearest_to(array).infer_error()?;
Ok(VectorQuery { inner })
}
pub fn execute(self_: PyRef<'_, Self>, max_batch_length: Option<u32>) -> PyResult<&PyAny> {
pub fn execute(
self_: PyRef<'_, Self>,
max_batch_length: Option<u32>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let mut opts = QueryExecutionOptions::default();
@@ -79,7 +83,7 @@ impl Query {
})
}
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<&PyAny> {
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
inner
@@ -139,7 +143,10 @@ impl VectorQuery {
self.inner = self.inner.clone().bypass_vector_index()
}
pub fn execute(self_: PyRef<'_, Self>, max_batch_length: Option<u32>) -> PyResult<&PyAny> {
pub fn execute(
self_: PyRef<'_, Self>,
max_batch_length: Option<u32>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let mut opts = QueryExecutionOptions::default();
@@ -151,7 +158,7 @@ impl VectorQuery {
})
}
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<&PyAny> {
fn explain_plan(self_: PyRef<'_, Self>, verbose: bool) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
inner

View File

@@ -9,9 +9,9 @@ use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pymethods,
types::{PyDict, PyString},
PyAny, PyRef, PyResult, Python,
Bound, PyAny, PyRef, PyResult, Python,
};
use pyo3_asyncio::tokio::future_into_py;
use pyo3_asyncio_0_21::tokio::future_into_py;
use crate::{
error::PythonErrorExt,
@@ -91,7 +91,7 @@ impl Table {
self.inner.take();
}
pub fn schema(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
pub fn schema(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let schema = inner.schema().await.infer_error()?;
@@ -99,8 +99,12 @@ impl Table {
})
}
pub fn add<'a>(self_: PyRef<'a, Self>, data: &PyAny, mode: String) -> PyResult<&'a PyAny> {
let batches = ArrowArrayStreamReader::from_pyarrow(data)?;
pub fn add<'a>(
self_: PyRef<'a, Self>,
data: Bound<'_, PyAny>,
mode: String,
) -> PyResult<Bound<'a, PyAny>> {
let batches = ArrowArrayStreamReader::from_pyarrow_bound(&data)?;
let mut op = self_.inner_ref()?.add(batches);
if mode == "append" {
op = op.mode(AddDataMode::Append);
@@ -116,7 +120,7 @@ impl Table {
})
}
pub fn delete(self_: PyRef<'_, Self>, condition: String) -> PyResult<&PyAny> {
pub fn delete(self_: PyRef<'_, Self>, condition: String) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
inner.delete(&condition).await.infer_error()
@@ -127,7 +131,7 @@ impl Table {
self_: PyRef<'a, Self>,
updates: &PyDict,
r#where: Option<String>,
) -> PyResult<&'a PyAny> {
) -> PyResult<Bound<'a, PyAny>> {
let mut op = self_.inner_ref()?.update();
if let Some(only_if) = r#where {
op = op.only_if(only_if);
@@ -145,7 +149,10 @@ impl Table {
})
}
pub fn count_rows(self_: PyRef<'_, Self>, filter: Option<String>) -> PyResult<&PyAny> {
pub fn count_rows(
self_: PyRef<'_, Self>,
filter: Option<String>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
inner.count_rows(filter).await.infer_error()
@@ -157,7 +164,7 @@ impl Table {
column: String,
index: Option<&Index>,
replace: Option<bool>,
) -> PyResult<&'a PyAny> {
) -> PyResult<Bound<'a, PyAny>> {
let index = if let Some(index) = index {
index.consume()?
} else {
@@ -174,7 +181,7 @@ impl Table {
})
}
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
Ok(inner
@@ -194,7 +201,7 @@ impl Table {
}
}
pub fn version(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
pub fn version(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(
self_.py(),
@@ -202,21 +209,21 @@ impl Table {
)
}
pub fn checkout(self_: PyRef<'_, Self>, version: u64) -> PyResult<&PyAny> {
pub fn checkout(self_: PyRef<'_, Self>, version: u64) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
inner.checkout(version).await.infer_error()
})
}
pub fn checkout_latest(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
pub fn checkout_latest(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
inner.checkout_latest().await.infer_error()
})
}
pub fn restore(self_: PyRef<'_, Self>) -> PyResult<&PyAny> {
pub fn restore(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(
self_.py(),
@@ -228,7 +235,10 @@ impl Table {
Query::new(self.inner_ref().unwrap().query())
}
pub fn optimize(self_: PyRef<'_, Self>, cleanup_since_ms: Option<u64>) -> PyResult<&PyAny> {
pub fn optimize(
self_: PyRef<'_, Self>,
cleanup_since_ms: Option<u64>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
let older_than = if let Some(ms) = cleanup_since_ms {
if ms > i64::MAX as u64 {