From 76083892cd440c255adc9b556afd05d8953ffea5 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Thu, 20 Feb 2025 20:53:56 -0800 Subject: [PATCH] feat: support UNNEST (#5580) * feat: support UNNEST Signed-off-by: Ruihang Xia * fix clippy and sqlness Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- src/query/src/dist_plan/analyzer.rs | 9 +- .../src/optimizer/string_normalization.rs | 8 +- .../standalone/common/function/geo.result | 24 ++++++ .../cases/standalone/common/function/geo.sql | 9 ++ .../standalone/common/select/unnest.result | 85 +++++++++++++++++++ .../cases/standalone/common/select/unnest.sql | 67 +++++++++++++++ 6 files changed, 198 insertions(+), 4 deletions(-) create mode 100644 tests/cases/standalone/common/select/unnest.result create mode 100644 tests/cases/standalone/common/select/unnest.sql diff --git a/src/query/src/dist_plan/analyzer.rs b/src/query/src/dist_plan/analyzer.rs index 0ec32bcaa2..7b07870dcb 100644 --- a/src/query/src/dist_plan/analyzer.rs +++ b/src/query/src/dist_plan/analyzer.rs @@ -77,8 +77,13 @@ impl DistPlannerAnalyzer { .map(|e| e.transform(&Self::transform_subquery).map(|x| x.data)) .collect::>>()?; - let inputs = plan.inputs().into_iter().cloned().collect::>(); - Ok(Transformed::yes(plan.with_new_exprs(exprs, inputs)?)) + // Some plans that are special treated (should not call `with_new_exprs` on them) + if !matches!(plan, LogicalPlan::Unnest(_)) { + let inputs = plan.inputs().into_iter().cloned().collect::>(); + Ok(Transformed::yes(plan.with_new_exprs(exprs, inputs)?)) + } else { + Ok(Transformed::no(plan)) + } } fn transform_subquery(expr: Expr) -> DfResult> { diff --git a/src/query/src/optimizer/string_normalization.rs b/src/query/src/optimizer/string_normalization.rs index d9e5efe7de..c62f5862ee 100644 --- a/src/query/src/optimizer/string_normalization.rs +++ b/src/query/src/optimizer/string_normalization.rs @@ -49,7 +49,6 @@ impl AnalyzerRule for StringNormalizationRule { | LogicalPlan::Distinct(_) | LogicalPlan::Dml(_) | LogicalPlan::Copy(_) - | LogicalPlan::Unnest(_) | LogicalPlan::RecursiveQuery(_) => { let mut converter = StringNormalizationConverter; let inputs = plan.inputs().into_iter().cloned().collect::>(); @@ -58,10 +57,15 @@ impl AnalyzerRule for StringNormalizationRule { .into_iter() .map(|e| e.rewrite(&mut converter).map(|x| x.data)) .collect::>>()?; - plan.with_new_exprs(expr, inputs).map(Transformed::yes) + if expr != plan.expressions_consider_join() { + plan.with_new_exprs(expr, inputs).map(Transformed::yes) + } else { + Ok(Transformed::no(plan)) + } } LogicalPlan::Limit(_) | LogicalPlan::Explain(_) + | LogicalPlan::Unnest(_) | LogicalPlan::Ddl(_) | LogicalPlan::DescribeTable(_) => Ok(Transformed::no(plan)), }) diff --git a/tests/cases/standalone/common/function/geo.result b/tests/cases/standalone/common/function/geo.result index e251cdd0a1..b9ae2ba580 100644 --- a/tests/cases/standalone/common/function/geo.result +++ b/tests/cases/standalone/common/function/geo.result @@ -157,6 +157,30 @@ FROM | 9 | [604189371209351167, 604189371075133439, 604189375235883007, 604189375101665279, 604189638034194431, 604189638571065343, 604189638436847615, 604189642597597183, 604189642463379455, 604189641255419903] | 55.05017 | 0.54939243697098 | +----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------+--------------------+ +SELECT + unnest(h3_grid_path_cells(cell1, cell2)) AS path_cells, +FROM + ( + SELECT + h3_string_to_cell('86283082fffffff') AS cell1, + h3_string_to_cell('86283470fffffff') AS cell2 + ); + ++--------------------+ +| path_cells | ++--------------------+ +| 604189371209351167 | +| 604189371075133439 | +| 604189375235883007 | +| 604189375101665279 | +| 604189638034194431 | +| 604189638571065343 | +| 604189638436847615 | +| 604189642597597183 | +| 604189642463379455 | +| 604189641255419903 | ++--------------------+ + SELECT h3_cells_contains('86283470fffffff,862834777ffffff, 862834757ffffff, 86283471fffffff, 862834707ffffff', '8b283470d112fff') AS R00, h3_cells_contains('86283470fffffff,862834777ffffff, 862834757ffffff, 86283471fffffff, 862834707ffffff', 604189641792290815) AS R01, diff --git a/tests/cases/standalone/common/function/geo.sql b/tests/cases/standalone/common/function/geo.sql index d2b2460c50..fe424eb228 100644 --- a/tests/cases/standalone/common/function/geo.sql +++ b/tests/cases/standalone/common/function/geo.sql @@ -57,6 +57,15 @@ FROM h3_string_to_cell('86283470fffffff') AS cell2 ); +SELECT + unnest(h3_grid_path_cells(cell1, cell2)) AS path_cells, +FROM + ( + SELECT + h3_string_to_cell('86283082fffffff') AS cell1, + h3_string_to_cell('86283470fffffff') AS cell2 + ); + SELECT h3_cells_contains('86283470fffffff,862834777ffffff, 862834757ffffff, 86283471fffffff, 862834707ffffff', '8b283470d112fff') AS R00, h3_cells_contains('86283470fffffff,862834777ffffff, 862834757ffffff, 86283471fffffff, 862834707ffffff', 604189641792290815) AS R01, diff --git a/tests/cases/standalone/common/select/unnest.result b/tests/cases/standalone/common/select/unnest.result new file mode 100644 index 0000000000..e24a140b9a --- /dev/null +++ b/tests/cases/standalone/common/select/unnest.result @@ -0,0 +1,85 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- +-- Unnest tests from Apache DataFusion +-- Basic unnest tests +SELECT unnest([1,2,3]); + ++------------------------------------------------+ +| UNNEST(make_array(Int64(1),Int64(2),Int64(3))) | ++------------------------------------------------+ +| 1 | +| 2 | +| 3 | ++------------------------------------------------+ + +SELECT unnest(struct(1,2,3)); + ++-----------------------------------------------------------+-----------------------------------------------------------+-----------------------------------------------------------+ +| unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c0 | unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c1 | unnest_placeholder(struct(Int64(1),Int64(2),Int64(3))).c2 | ++-----------------------------------------------------------+-----------------------------------------------------------+-----------------------------------------------------------+ +| 1 | 2 | 3 | ++-----------------------------------------------------------+-----------------------------------------------------------+-----------------------------------------------------------+ + +-- Table function is not supported for now +-- SELECT * FROM unnest([1,2,3]); +-- SELECT * FROM unnest(struct(1,2,3)); +-- SELECT * FROM unnest(struct(1,2,3)), unnest([4,5,6]); +-- Multiple unnest levels +SELECT unnest([1,2,3]), unnest(unnest([[1,2,3]])); + ++------------------------------------------------+--------------------------------------------------------------------+ +| UNNEST(make_array(Int64(1),Int64(2),Int64(3))) | UNNEST(UNNEST(make_array(make_array(Int64(1),Int64(2),Int64(3))))) | ++------------------------------------------------+--------------------------------------------------------------------+ +| 1 | 1 | +| 2 | 2 | +| 3 | 3 | ++------------------------------------------------+--------------------------------------------------------------------+ + +SELECT unnest([1,2,3]) + unnest([1,2,3]), unnest([1,2,3]) + unnest([4,5]); + ++-------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------+ +| UNNEST(make_array(Int64(1),Int64(2),Int64(3))) + UNNEST(make_array(Int64(1),Int64(2),Int64(3))) | UNNEST(make_array(Int64(1),Int64(2),Int64(3))) + UNNEST(make_array(Int64(4),Int64(5))) | ++-------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------+ +| 2 | 5 | +| 4 | 7 | +| 6 | | ++-------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------+ + +SELECT unnest(unnest([[1,2,3]])) + unnest(unnest([[1,2,3]])), + unnest(unnest([[1,2,3]])) + unnest([4,5]), + unnest([4,5]); + ++-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+---------------------------------------+ +| UNNEST(UNNEST(make_array(make_array(Int64(1),Int64(2),Int64(3))))) + UNNEST(UNNEST(make_array(make_array(Int64(1),Int64(2),Int64(3))))) | UNNEST(UNNEST(make_array(make_array(Int64(1),Int64(2),Int64(3))))) + UNNEST(make_array(Int64(4),Int64(5))) | UNNEST(make_array(Int64(4),Int64(5))) | ++-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+---------------------------------------+ +| 2 | 5 | 4 | +| 4 | 7 | 5 | +| 6 | | | ++-----------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------+---------------------------------------+ + +-- Unnest with expressions +SELECT unnest([1,2,3]) + 1, unnest(unnest([[1,2,3]]))* 2; + ++-----------------------------------------------------------+-------------------------------------------------------------------------------+ +| UNNEST(make_array(Int64(1),Int64(2),Int64(3))) + Int64(1) | UNNEST(UNNEST(make_array(make_array(Int64(1),Int64(2),Int64(3))))) * Int64(2) | ++-----------------------------------------------------------+-------------------------------------------------------------------------------+ +| 2 | 2 | +| 3 | 4 | +| 4 | 6 | ++-----------------------------------------------------------+-------------------------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/select/unnest.sql b/tests/cases/standalone/common/select/unnest.sql new file mode 100644 index 0000000000..af34937ae2 --- /dev/null +++ b/tests/cases/standalone/common/select/unnest.sql @@ -0,0 +1,67 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. +-- +-- Unnest tests from Apache DataFusion + +-- Basic unnest tests +SELECT unnest([1,2,3]); + +SELECT unnest(struct(1,2,3)); + +-- Table function is not supported for now +-- SELECT * FROM unnest([1,2,3]); + +-- SELECT * FROM unnest(struct(1,2,3)); + +-- SELECT * FROM unnest(struct(1,2,3)), unnest([4,5,6]); + +-- Multiple unnest levels +SELECT unnest([1,2,3]), unnest(unnest([[1,2,3]])); + +SELECT unnest([1,2,3]) + unnest([1,2,3]), unnest([1,2,3]) + unnest([4,5]); + +SELECT unnest(unnest([[1,2,3]])) + unnest(unnest([[1,2,3]])), + unnest(unnest([[1,2,3]])) + unnest([4,5]), + unnest([4,5]); + +-- Unnest with expressions +SELECT unnest([1,2,3]) + 1, unnest(unnest([[1,2,3]]))* 2; + + +-- Complex subquery with unnest +-- table function is not supported for now +-- WITH t AS ( +-- SELECT +-- left1, +-- width1, +-- min(column3) as min_height +-- FROM +-- unnest(ARRAY[1,2,3,4,5,6,7,8,9,10]) AS t(left1) +-- CROSS JOIN unnest(ARRAY[1,2,3,4,5,6,7,8,9,10]) AS t1(width1) +-- WHERE +-- left1 + width1 - 1 <= 10 +-- AND column3 BETWEEN left1 AND left1 + width1 - 1 +-- GROUP BY +-- left1, width1 +-- ) +-- SELECT +-- left1, width1, min_height, min_height * width1 as area +-- FROM t +-- WHERE min_height * width1 = ( +-- SELECT max(min_height * width1) FROM t +-- ); +