mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-28 01:10:45 +00:00
feat(rust): support Expr in projection query (#3069)
Referred and followed [`Select::Dynamic`] implementation. Closes #3039
This commit is contained in:
@@ -339,6 +339,12 @@ impl PermutationReader {
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
Select::Expr(columns) => {
|
||||
// For Expr projections, we check if any alias is _rowid.
|
||||
// We can't validate the expression itself (it may differ from _rowid)
|
||||
// but we allow it through; the column will be included.
|
||||
Ok(columns.iter().any(|(alias, _)| alias == ROW_ID))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -47,6 +47,25 @@ pub enum Select {
|
||||
///
|
||||
/// See [`Query::select`] for more details and examples
|
||||
Dynamic(Vec<(String, String)>),
|
||||
/// Advanced selection using type-safe DataFusion expressions
|
||||
///
|
||||
/// Similar to [`Select::Dynamic`] but uses [`datafusion_expr::Expr`] instead of
|
||||
/// raw SQL strings. Use [`crate::expr`] helpers to build expressions:
|
||||
///
|
||||
/// ```
|
||||
/// use lancedb::expr::{col, lit};
|
||||
/// use lancedb::query::Select;
|
||||
///
|
||||
/// // SELECT id, id * 2 AS id2 FROM ...
|
||||
/// let selection = Select::expr_projection(&[
|
||||
/// ("id", col("id")),
|
||||
/// ("id2", col("id") * lit(2)),
|
||||
/// ]);
|
||||
/// ```
|
||||
///
|
||||
/// Note: For remote/server-side queries the expressions are serialized to SQL strings
|
||||
/// automatically (same as [`Select::Dynamic`]).
|
||||
Expr(Vec<(String, datafusion_expr::Expr)>),
|
||||
}
|
||||
|
||||
impl Select {
|
||||
@@ -69,6 +88,29 @@ impl Select {
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
/// Create a typed-expression projection.
|
||||
///
|
||||
/// This is a convenience method for creating a [`Select::Expr`] variant from
|
||||
/// a slice of `(name, expr)` pairs where each `expr` is a [`datafusion_expr::Expr`].
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use lancedb::expr::{col, lit};
|
||||
/// use lancedb::query::Select;
|
||||
///
|
||||
/// let selection = Select::expr_projection(&[
|
||||
/// ("id", col("id")),
|
||||
/// ("id2", col("id") * lit(2)),
|
||||
/// ]);
|
||||
/// ```
|
||||
pub fn expr_projection(columns: &[(impl AsRef<str>, datafusion_expr::Expr)]) -> Self {
|
||||
Self::Expr(
|
||||
columns
|
||||
.iter()
|
||||
.map(|(name, expr)| (name.as_ref().to_string(), expr.clone()))
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait for converting a type to a query vector
|
||||
@@ -1591,6 +1633,58 @@ mod tests {
|
||||
});
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_select_with_expr_projection() {
|
||||
// Mirrors test_select_with_transform but uses Select::Expr instead of Select::Dynamic
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let dataset_path = tmp_dir.path().join("test_expr.lance");
|
||||
let uri = dataset_path.to_str().unwrap();
|
||||
|
||||
let batches = make_non_empty_batches();
|
||||
let conn = connect(uri).execute().await.unwrap();
|
||||
let table = conn
|
||||
.create_table("my_table", batches)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
use crate::expr::{col, lit};
|
||||
let query = table.query().limit(10).select(Select::expr_projection(&[
|
||||
("id2", col("id") * lit(2i32)),
|
||||
("id", col("id")),
|
||||
]));
|
||||
|
||||
let schema = query.output_schema().await.unwrap();
|
||||
assert_eq!(
|
||||
schema,
|
||||
Arc::new(ArrowSchema::new(vec![
|
||||
ArrowField::new("id2", DataType::Int32, true),
|
||||
ArrowField::new("id", DataType::Int32, true),
|
||||
]))
|
||||
);
|
||||
|
||||
let result = query.execute().await;
|
||||
let mut batches = result
|
||||
.expect("should have result")
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(batches.len(), 1);
|
||||
let batch = batches.pop().unwrap();
|
||||
|
||||
// id and id2
|
||||
assert_eq!(batch.num_columns(), 2);
|
||||
|
||||
let id: &Int32Array = batch.column_by_name("id").unwrap().as_primitive();
|
||||
let id2: &Int32Array = batch.column_by_name("id2").unwrap().as_primitive();
|
||||
|
||||
id.iter().zip(id2.iter()).for_each(|(id, id2)| {
|
||||
let id = id.unwrap();
|
||||
let id2 = id2.unwrap();
|
||||
assert_eq!(id * 2, id2);
|
||||
});
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute_no_vector() {
|
||||
// TODO: Switch back to memory://foo after https://github.com/lancedb/lancedb/issues/1051
|
||||
|
||||
@@ -477,6 +477,16 @@ impl<S: HttpSend> RemoteTable<S> {
|
||||
}));
|
||||
body["columns"] = alias_map.into();
|
||||
}
|
||||
Select::Expr(pairs) => {
|
||||
let alias_map: Result<serde_json::Map<String, serde_json::Value>> = pairs
|
||||
.iter()
|
||||
.map(|(name, expr)| {
|
||||
expr_to_sql_string(expr)
|
||||
.map(|sql| (name.clone(), serde_json::Value::String(sql)))
|
||||
})
|
||||
.collect();
|
||||
body["columns"] = alias_map?.into();
|
||||
}
|
||||
}
|
||||
|
||||
if params.fast_search {
|
||||
|
||||
@@ -186,6 +186,13 @@ pub async fn create_plan(
|
||||
Select::Dynamic(ref select_with_transform) => {
|
||||
scanner.project_with_transform(select_with_transform.as_slice())?;
|
||||
}
|
||||
Select::Expr(ref expr_pairs) => {
|
||||
let sql_pairs: crate::Result<Vec<(String, String)>> = expr_pairs
|
||||
.iter()
|
||||
.map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
|
||||
.collect();
|
||||
scanner.project_with_transform(sql_pairs?.as_slice())?;
|
||||
}
|
||||
Select::All => {}
|
||||
}
|
||||
|
||||
@@ -340,6 +347,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
Select::Expr(pairs) => {
|
||||
let sql_pairs: crate::Result<Vec<(String, String)>> = pairs
|
||||
.iter()
|
||||
.map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
|
||||
.collect();
|
||||
let sql_pairs = sql_pairs?;
|
||||
Some(Box::new(QueryTableRequestColumns {
|
||||
column_names: None,
|
||||
column_aliases: Some(sql_pairs.into_iter().collect()),
|
||||
}))
|
||||
}
|
||||
};
|
||||
|
||||
// Check for unsupported features
|
||||
@@ -411,6 +429,17 @@ fn convert_to_namespace_query(query: &AnyQuery) -> Result<NsQueryTableRequest> {
|
||||
.to_string(),
|
||||
});
|
||||
}
|
||||
Select::Expr(pairs) => {
|
||||
let sql_pairs: crate::Result<Vec<(String, String)>> = pairs
|
||||
.iter()
|
||||
.map(|(name, expr)| expr_to_sql_string(expr).map(|sql| (name.clone(), sql)))
|
||||
.collect();
|
||||
let sql_pairs = sql_pairs?;
|
||||
Some(Box::new(QueryTableRequestColumns {
|
||||
column_names: None,
|
||||
column_aliases: Some(sql_pairs.into_iter().collect()),
|
||||
}))
|
||||
}
|
||||
};
|
||||
|
||||
// Handle full text search if present
|
||||
|
||||
Reference in New Issue
Block a user