From c2e543f1b7a4d8eeb00673fcabf2fcb51c18c1bd Mon Sep 17 00:00:00 2001 From: Mesut-Doner <81925277+Mesut-Doner@users.noreply.github.com> Date: Fri, 13 Mar 2026 22:54:26 +0300 Subject: [PATCH] feat(rust): support Expr in projection query (#3069) Referred and followed [`Select::Dynamic`] implementation. Closes #3039 --- python/src/query.rs | 13 +++ .../src/dataloader/permutation/reader.rs | 6 ++ rust/lancedb/src/query.rs | 94 +++++++++++++++++++ rust/lancedb/src/remote/table.rs | 10 ++ rust/lancedb/src/table/query.rs | 29 ++++++ 5 files changed, 152 insertions(+) diff --git a/python/src/query.rs b/python/src/query.rs index 0c17cf703..6a22f53a9 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -316,6 +316,19 @@ impl<'py> IntoPyObject<'py> for PySelect { Select::All => Ok(py.None().into_bound(py).into_any()), Select::Columns(columns) => Ok(columns.into_pyobject(py)?.into_any()), Select::Dynamic(columns) => Ok(columns.into_pyobject(py)?.into_any()), + Select::Expr(pairs) => { + // Serialize DataFusion Expr -> SQL string so Python sees the same + // format as Select::Dynamic: a list of (name, sql_string) tuples. + let sql_pairs: PyResult> = pairs + .into_iter() + .map(|(name, expr)| { + lancedb::expr::expr_to_sql_string(&expr) + .map(|sql| (name, sql)) + .map_err(|e| PyRuntimeError::new_err(e.to_string())) + }) + .collect(); + Ok(sql_pairs?.into_pyobject(py)?.into_any()) + } } } } diff --git a/rust/lancedb/src/dataloader/permutation/reader.rs b/rust/lancedb/src/dataloader/permutation/reader.rs index 72f146995..43c229a0a 100644 --- a/rust/lancedb/src/dataloader/permutation/reader.rs +++ b/rust/lancedb/src/dataloader/permutation/reader.rs @@ -339,6 +339,12 @@ impl PermutationReader { } Ok(false) } + Select::Expr(columns) => { + // For Expr projections, we check if any alias is _rowid. + // We can't validate the expression itself (it may differ from _rowid) + // but we allow it through; the column will be included. + Ok(columns.iter().any(|(alias, _)| alias == ROW_ID)) + } } } diff --git a/rust/lancedb/src/query.rs b/rust/lancedb/src/query.rs index aa77dcc39..a1804a79c 100644 --- a/rust/lancedb/src/query.rs +++ b/rust/lancedb/src/query.rs @@ -47,6 +47,25 @@ pub enum Select { /// /// See [`Query::select`] for more details and examples Dynamic(Vec<(String, String)>), + /// Advanced selection using type-safe DataFusion expressions + /// + /// Similar to [`Select::Dynamic`] but uses [`datafusion_expr::Expr`] instead of + /// raw SQL strings. Use [`crate::expr`] helpers to build expressions: + /// + /// ``` + /// use lancedb::expr::{col, lit}; + /// use lancedb::query::Select; + /// + /// // SELECT id, id * 2 AS id2 FROM ... + /// let selection = Select::expr_projection(&[ + /// ("id", col("id")), + /// ("id2", col("id") * lit(2)), + /// ]); + /// ``` + /// + /// Note: For remote/server-side queries the expressions are serialized to SQL strings + /// automatically (same as [`Select::Dynamic`]). + Expr(Vec<(String, datafusion_expr::Expr)>), } impl Select { @@ -69,6 +88,29 @@ impl Select { .collect(), ) } + /// Create a typed-expression projection. + /// + /// This is a convenience method for creating a [`Select::Expr`] variant from + /// a slice of `(name, expr)` pairs where each `expr` is a [`datafusion_expr::Expr`]. + /// + /// # Example + /// ``` + /// use lancedb::expr::{col, lit}; + /// use lancedb::query::Select; + /// + /// let selection = Select::expr_projection(&[ + /// ("id", col("id")), + /// ("id2", col("id") * lit(2)), + /// ]); + /// ``` + pub fn expr_projection(columns: &[(impl AsRef, datafusion_expr::Expr)]) -> Self { + Self::Expr( + columns + .iter() + .map(|(name, expr)| (name.as_ref().to_string(), expr.clone())) + .collect(), + ) + } } /// A trait for converting a type to a query vector @@ -1591,6 +1633,58 @@ mod tests { }); } + #[tokio::test] + async fn test_select_with_expr_projection() { + // Mirrors test_select_with_transform but uses Select::Expr instead of Select::Dynamic + let tmp_dir = tempdir().unwrap(); + let dataset_path = tmp_dir.path().join("test_expr.lance"); + let uri = dataset_path.to_str().unwrap(); + + let batches = make_non_empty_batches(); + let conn = connect(uri).execute().await.unwrap(); + let table = conn + .create_table("my_table", batches) + .execute() + .await + .unwrap(); + + use crate::expr::{col, lit}; + let query = table.query().limit(10).select(Select::expr_projection(&[ + ("id2", col("id") * lit(2i32)), + ("id", col("id")), + ])); + + let schema = query.output_schema().await.unwrap(); + assert_eq!( + schema, + Arc::new(ArrowSchema::new(vec![ + ArrowField::new("id2", DataType::Int32, true), + ArrowField::new("id", DataType::Int32, true), + ])) + ); + + let result = query.execute().await; + let mut batches = result + .expect("should have result") + .try_collect::>() + .await + .unwrap(); + assert_eq!(batches.len(), 1); + let batch = batches.pop().unwrap(); + + // id and id2 + assert_eq!(batch.num_columns(), 2); + + let id: &Int32Array = batch.column_by_name("id").unwrap().as_primitive(); + let id2: &Int32Array = batch.column_by_name("id2").unwrap().as_primitive(); + + id.iter().zip(id2.iter()).for_each(|(id, id2)| { + let id = id.unwrap(); + let id2 = id2.unwrap(); + assert_eq!(id * 2, id2); + }); + } + #[tokio::test] async fn test_execute_no_vector() { // TODO: Switch back to memory://foo after https://github.com/lancedb/lancedb/issues/1051 diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index 2532a9962..13edae94f 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -477,6 +477,16 @@ impl RemoteTable { })); body["columns"] = alias_map.into(); } + Select::Expr(pairs) => { + let alias_map: Result> = pairs + .iter() + .map(|(name, expr)| { + expr_to_sql_string(expr) + .map(|sql| (name.clone(), serde_json::Value::String(sql))) + }) + .collect(); + body["columns"] = alias_map?.into(); + } } if params.fast_search { diff --git a/rust/lancedb/src/table/query.rs b/rust/lancedb/src/table/query.rs index 529551269..abce6d325 100644 --- a/rust/lancedb/src/table/query.rs +++ b/rust/lancedb/src/table/query.rs @@ -186,6 +186,13 @@ pub async fn create_plan( Select::Dynamic(ref select_with_transform) => { scanner.project_with_transform(select_with_transform.as_slice())?; } + Select::Expr(ref expr_pairs) => { + let sql_pairs: crate::Result> = expr_pairs + .iter() + .map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql))) + .collect(); + scanner.project_with_transform(sql_pairs?.as_slice())?; + } Select::All => {} } @@ -340,6 +347,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result { .to_string(), }); } + Select::Expr(pairs) => { + let sql_pairs: crate::Result> = pairs + .iter() + .map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql))) + .collect(); + let sql_pairs = sql_pairs?; + Some(Box::new(QueryTableRequestColumns { + column_names: None, + column_aliases: Some(sql_pairs.into_iter().collect()), + })) + } }; // Check for unsupported features @@ -411,6 +429,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result { .to_string(), }); } + Select::Expr(pairs) => { + let sql_pairs: crate::Result> = pairs + .iter() + .map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql))) + .collect(); + let sql_pairs = sql_pairs?; + Some(Box::new(QueryTableRequestColumns { + column_names: None, + column_aliases: Some(sql_pairs.into_iter().collect()), + })) + } }; // Handle full text search if present