Compare commits

..

1 Commits

Author SHA1 Message Date
lancedb automation
edf8159780 chore: update lance dependency to v2.0.0-beta.6 2026-01-09 23:54:25 +00:00
15 changed files with 839 additions and 782 deletions

1358
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,37 +15,37 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=2.0.0-beta.7", default-features = false, "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-beta.7", default-features = false, "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-beta.7", default-features = false, "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-beta.7", "tag" = "v2.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=2.0.0-beta.6", default-features = false, "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=2.0.0-beta.6", default-features = false, "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=2.0.0-beta.6", default-features = false, "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=2.0.0-beta.6", "tag" = "v2.0.0-beta.6", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2.0", optional = false }
arrow-array = "57.2.0"
arrow-data = "57.2.0"
arrow-ipc = "57.2.0"
arrow-ord = "57.2.0"
arrow-schema = "57.2.0"
arrow-select = "57.2.0"
arrow-cast = "57.2.0"
arrow = { version = "57.1", optional = false }
arrow-array = "57.1"
arrow-data = "57.1"
arrow-ipc = "57.1"
arrow-ord = "57.1"
arrow-schema = "57.1"
arrow-select = "57.1"
arrow-cast = "57.1"
async-trait = "0"
datafusion = { version = "51.0.0", default-features = false }
datafusion-catalog = "51.0.0"
datafusion-common = { version = "51.0.0", default-features = false }
datafusion-execution = "51.0.0"
datafusion-expr = "51.0.0"
datafusion-physical-plan = "51.0.0"
datafusion = { version = "51.0", default-features = false }
datafusion-catalog = "51.0"
datafusion-common = { version = "51.0", default-features = false }
datafusion-execution = "51.0"
datafusion-expr = "51.0"
datafusion-physical-plan = "51.0"
env_logger = "0.11"
half = { "version" = "2.7.1", default-features = false, features = [
"num-traits",
@@ -59,7 +59,7 @@ rand = "0.9"
snafu = "0.8"
url = "2"
num-traits = "0.2"
regex = "1.12"
regex = "1.10"
lazy_static = "1"
semver = "1.0.25"
chrono = "0.4"

View File

@@ -14,7 +14,7 @@ name = "_lancedb"
crate-type = ["cdylib"]
[dependencies]
arrow = { version = "57.2.0", features = ["pyarrow"] }
arrow = { workspace = true, features = ["pyarrow"] }
async-trait = "0.1"
lancedb = { path = "../rust/lancedb", default-features = false }
lance-core.workspace = true

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -56,7 +55,7 @@ impl RecordBatchStream {
.next()
.await
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
Python::with_gil(|py| {
Python::attach(|py| {
inner_next
.infer_error()?
.to_pyarrow(py)

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -13,7 +12,7 @@ use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods,
types::{PyDict, PyDictMethods},
Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -115,7 +114,7 @@ impl Connection {
data: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
storage_options_provider: Option<Py<PyAny>>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -153,7 +152,7 @@ impl Connection {
schema: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
storage_options_provider: Option<Py<PyAny>>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -188,7 +187,7 @@ impl Connection {
name: String,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
storage_options_provider: Option<Py<PyAny>>,
index_cache_size: Option<u32>,
location: Option<String>,
) -> PyResult<Bound<'_, PyAny>> {
@@ -328,13 +327,19 @@ impl Connection {
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::CreateNamespaceRequest;
let mode_enum = mode.map(|m| match m.to_lowercase().as_str() {
"create" => "Create".to_string(),
"exist_ok" => "ExistOk".to_string(),
"overwrite" => "Overwrite".to_string(),
other => other.to_string(),
});
let request = CreateNamespaceRequest {
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
mode,
mode: mode_enum,
properties,
..Default::default()
};
@@ -358,14 +363,24 @@ impl Connection {
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::DropNamespaceRequest;
let mode_enum = mode.map(|m| match m.to_uppercase().as_str() {
"SKIP" => "Skip".to_string(),
"FAIL" => "Fail".to_string(),
other => other.to_string(),
});
let behavior_enum = behavior.map(|b| match b.to_uppercase().as_str() {
"RESTRICT" => "Restrict".to_string(),
"CASCADE" => "Cascade".to_string(),
other => other.to_string(),
});
let request = DropNamespaceRequest {
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
mode,
behavior,
mode: mode_enum,
behavior: behavior_enum,
..Default::default()
};
let response = inner.drop_namespace(request).await.infer_error()?;

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors

View File

@@ -1,3 +1,4 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -282,7 +281,7 @@ impl PyPermutationReader {
let reader = slf.reader.clone();
future_into_py(slf.py(), async move {
let schema = reader.output_schema(selection).await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py).map(|bound| bound.unbind()))
})
}

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -217,7 +216,7 @@ impl<'py> IntoPyObject<'py> for PyQueryVectors {
let py_objs = self
.0
.into_iter()
.map(|v| v.to_data().into_pyarrow(py).map(|b| b.unbind()))
.map(|v| v.to_data().into_pyarrow(py).map(|bound| bound.unbind()))
.collect::<Result<Vec<_>, _>>()?;
PyList::new(py, py_objs)
}
@@ -454,7 +453,7 @@ impl Query {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py).map(|bound| bound.unbind()))
})
}
@@ -533,7 +532,7 @@ impl TakeQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py).map(|bound| bound.unbind()))
})
}
@@ -628,7 +627,7 @@ impl FTSQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py).map(|bound| bound.unbind()))
})
}
@@ -807,7 +806,7 @@ impl VectorQuery {
let inner = self_.inner.clone();
future_into_py(self_.py(), async move {
let schema = inner.output_schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py).map(|bound| bound.unbind()))
})
}

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
@@ -18,7 +17,7 @@ use pyo3::types::PyDict;
/// Internal wrapper around a Python object implementing StorageOptionsProvider
pub struct PyStorageOptionsProvider {
/// The Python object implementing fetch_storage_options()
inner: PyObject,
inner: Py<PyAny>,
}
impl Clone for PyStorageOptionsProvider {
@@ -30,7 +29,7 @@ impl Clone for PyStorageOptionsProvider {
}
impl PyStorageOptionsProvider {
pub fn new(obj: PyObject) -> PyResult<Self> {
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
Python::with_gil(|py| {
// Verify the object has a fetch_storage_options method
if !obj.bind(py).hasattr("fetch_storage_options")? {
@@ -144,7 +143,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
/// This is the main entry point for converting Python StorageOptionsProvider objects
/// to Rust trait objects that can be used by the Lance ecosystem.
pub fn py_object_to_storage_options_provider(
py_obj: PyObject,
py_obj: Py<PyAny>,
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))

View File

@@ -1,4 +1,3 @@
#![allow(deprecated)]
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::{collections::HashMap, sync::Arc};
@@ -288,7 +287,7 @@ impl Table {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let schema = inner.schema().await.infer_error()?;
Python::with_gil(|py| schema.to_pyarrow(py).map(|b| b.unbind()))
Python::with_gil(|py| schema.to_pyarrow(py).map(|bound| bound.unbind()))
})
}

View File

@@ -8,9 +8,10 @@ use datafusion_execution::{disk_manager::DiskManagerBuilder, runtime_env::Runtim
use datafusion_expr::col;
use futures::TryStreamExt;
use lance_core::ROW_ID;
use lance_datafusion::exec::SessionContextExt;
use crate::{
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream},
connect,
database::{CreateTableData, CreateTableRequest, Database},
dataloader::permutation::{
@@ -177,17 +178,12 @@ impl PermutationBuilder {
.build_arc()
.unwrap(),
);
let batches = data
let df = ctx
.read_one_shot(data.into_df_stream())
.map_err(|e| Error::Other {
message: format!("Failed to setup sort by split id: {}", e),
source: Some(e.into()),
})
.try_collect::<Vec<_>>()
.await?;
let df = ctx.read_batches(batches).map_err(|e| Error::Other {
message: format!("Failed to setup sort by split id: {}", e),
source: Some(e.into()),
})?;
})?;
let df_stream = df
.sort_by(vec![col(SPLIT_ID_COLUMN)])
.map_err(|e| Error::Other {

View File

@@ -1411,35 +1411,26 @@ impl Table {
let projected_plans = plans
.into_iter()
.enumerate()
.map(
|(plan_i, plan)| -> Result<Arc<dyn datafusion_physical_plan::ExecutionPlan>> {
let query_index = datafusion_common::ScalarValue::Int32(Some(plan_i as i32));
let query_index_expr =
datafusion_physical_plan::expressions::Literal::new(query_index);
let query_index_expr = Arc::new(query_index_expr)
as Arc<dyn datafusion_physical_plan::PhysicalExpr>;
let mut projections = vec![(query_index_expr, "query_index".to_string())];
projections.extend_from_slice(&project_all_columns);
let projection =
ProjectionExec::try_new(projections, plan).map_err(|e| Error::Runtime {
message: format!("Failed to build projection plan: {e}"),
})?;
Ok(Arc::new(projection) as Arc<dyn datafusion_physical_plan::ExecutionPlan>)
},
)
.collect::<Result<Vec<_>>>()?;
.map(|(plan_i, plan)| {
let query_index = datafusion_common::ScalarValue::Int32(Some(plan_i as i32));
let query_index_expr =
datafusion_physical_plan::expressions::Literal::new(query_index);
let query_index_expr =
Arc::new(query_index_expr) as Arc<dyn datafusion_physical_plan::PhysicalExpr>;
let mut projections = vec![(query_index_expr, "query_index".to_string())];
projections.extend_from_slice(&project_all_columns);
let projection = ProjectionExec::try_new(projections, plan).unwrap();
Arc::new(projection) as Arc<dyn datafusion_physical_plan::ExecutionPlan>
})
.collect::<Vec<_>>();
let unioned = UnionExec::try_new(projected_plans).map_err(|e| Error::Runtime {
message: format!("Failed to union query plans: {e}"),
})?;
let unioned = UnionExec::try_new(projected_plans).unwrap();
// We require 1 partition in the final output
let repartitioned = RepartitionExec::try_new(
unioned,
datafusion_physical_plan::Partitioning::RoundRobinBatch(1),
)
.map_err(|e| Error::Runtime {
message: format!("Failed to repartition query plans: {e}"),
})?;
.unwrap();
Ok(Arc::new(repartitioned))
}
@@ -2341,25 +2332,20 @@ impl NativeTable {
}
}
/// Convert selected columns into the namespace request format.
fn namespace_columns(&self, columns: &[String]) -> Option<Box<QueryTableRequestColumns>> {
if columns.is_empty() {
None
} else {
Some(Box::new(QueryTableRequestColumns {
column_names: Some(columns.to_vec()),
column_aliases: None,
}))
}
}
/// Convert an AnyQuery to the namespace QueryTableRequest format.
fn convert_to_namespace_query(&self, query: &AnyQuery) -> Result<NsQueryTableRequest> {
let to_namespace_columns =
|select: &Select| -> Result<Option<Box<QueryTableRequestColumns>>> {
match select {
Select::All => Ok(None),
Select::Columns(cols) => {
let mut columns = QueryTableRequestColumns::new();
columns.column_names = Some(cols.clone());
Ok(Some(Box::new(columns)))
}
Select::Dynamic(_) => Err(Error::NotSupported {
message:
"Dynamic column selection is not supported for server-side queries"
.to_string(),
}),
}
};
match query {
AnyQuery::VectorQuery(vq) => {
// Extract the query vector(s)
@@ -2371,6 +2357,19 @@ impl NativeTable {
None => None,
};
// Convert select to columns list
let columns = match &vq.base.select {
Select::All => None,
Select::Columns(cols) => self.namespace_columns(cols),
Select::Dynamic(_) => {
return Err(Error::NotSupported {
message:
"Dynamic column selection is not supported for server-side queries"
.to_string(),
});
}
};
// Check for unsupported features
if vq.base.reranker.is_some() {
return Err(Error::NotSupported {
@@ -2378,8 +2377,6 @@ impl NativeTable {
});
}
let columns = to_namespace_columns(&vq.base.select)?;
// Convert FTS query if present
let full_text_query = vq.base.full_text_search.as_ref().map(|fts| {
let columns = fts.columns();
@@ -2398,6 +2395,8 @@ impl NativeTable {
});
Ok(NsQueryTableRequest {
identity: None,
context: None,
id: None, // Will be set in namespace_query
k: vq.base.limit.unwrap_or(10) as i32,
vector: Box::new(vector),
@@ -2417,7 +2416,6 @@ impl NativeTable {
bypass_vector_index: Some(!vq.use_index),
full_text_query,
version: None,
..Default::default()
})
}
AnyQuery::Query(q) => {
@@ -2435,7 +2433,16 @@ impl NativeTable {
.map(|f| self.filter_to_sql(f))
.transpose()?;
let columns = to_namespace_columns(&q.select)?;
let columns = match &q.select {
Select::All => None,
Select::Columns(cols) => self.namespace_columns(cols),
Select::Dynamic(_) => {
return Err(Error::NotSupported {
message: "Dynamic columns are not supported for server-side query"
.to_string(),
});
}
};
// Handle full text search if present
let full_text_query = q.full_text_search.as_ref().map(|fts| {
@@ -2460,6 +2467,8 @@ impl NativeTable {
});
Ok(NsQueryTableRequest {
identity: None,
context: None,
id: None, // Will be set by caller
vector,
k: q.limit.unwrap_or(10) as i32,
@@ -2479,7 +2488,6 @@ impl NativeTable {
fast_search: None,
lower_bound: None,
upper_bound: None,
..Default::default()
})
}
}
@@ -5154,13 +5162,13 @@ mod tests {
assert_eq!(ns_request.k, 10);
assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 0".to_string()));
assert_eq!(
ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone()),
Some(vec!["id".to_string()])
);
let column_names = ns_request
.columns
.as_ref()
.and_then(|c| c.column_names.as_ref())
.cloned()
.unwrap();
assert_eq!(column_names, vec!["id".to_string()]);
assert_eq!(ns_request.vector_column, Some("vector".to_string()));
assert_eq!(ns_request.distance_type, Some("l2".to_string()));
assert!(ns_request.vector.single_vector.is_some());
@@ -5201,13 +5209,13 @@ mod tests {
assert_eq!(ns_request.k, 20);
assert_eq!(ns_request.offset, Some(5));
assert_eq!(ns_request.filter, Some("id > 5".to_string()));
assert_eq!(
ns_request
.columns
.as_ref()
.and_then(|cols| cols.column_names.clone()),
Some(vec!["id".to_string()])
);
let column_names = ns_request
.columns
.as_ref()
.and_then(|c| c.column_names.as_ref())
.cloned()
.unwrap();
assert_eq!(column_names, vec!["id".to_string()]);
assert_eq!(ns_request.with_row_id, Some(true));
assert_eq!(ns_request.bypass_vector_index, Some(true));
assert!(ns_request.vector_column.is_none()); // No vector column for plain queries

View File

@@ -100,8 +100,8 @@ impl DatasetRef {
let should_checkout = match &target_ref {
refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
refs::Ref::Version(_, None) => true, // No specific version, always checkout
refs::Ref::Tag(_) => true, // Always checkout for tags
refs::Ref::VersionNumber(target_ver) => version != target_ver,
refs::Ref::Tag(_) => true, // Always checkout for tags
};
if should_checkout {