build(deps): update datafusion to latest and arrow to 51.0 (#3661)

* chore: update datafusion * update sqlness case of time.sql Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix: adjust range query partition * fix: hisogram incorrect result Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix: ignore filter pushdown temporarily Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix: update limit sqlness result Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix: histogram with wrong distribution Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix: update negative ordinal sqlness case Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * feat: bump df to cd7a00b Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * resolve conflicts * ignore test_range_filter Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix promql exec panic * fix "select count(*)" exec error * re-enable the "test_range_filter" test since the filter push down seems not necessary to be removed * fix: range query schema error * update sqlness results Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * resolve conflicts * update datafusion, again * fix pyo3 compile error, and update some sqlness results * update decimal sqlness cases Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix: promql literal * fix udaf tests * fix filter pushdown sqlness tests * fix?: test_cast * fix: rspy test fail due to datafusion `sin` signature change * rebase main to see if there are any failed tests * debug ci * debug ci * debug ci * enforce input partition Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * debug ci * fix ci * fix ci * debug ci * debug ci * debug ci * fix sqlness * feat: do not return error while creating a filter * chore: remove array from error * chore: replace todo with unimplemented * Update src/flow/clippy.toml Co-authored-by: Yingwen <realevenyag@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: WUJingdi <taylor-lagrange@qq.com> Co-authored-by: discord9 <discord9@163.com> Co-authored-by: evenyag <realevenyag@gmail.com> Co-authored-by: tison <wander4096@gmail.com>
2026-01-06 13:22:57 +00:00 · 2024-04-18 20:07:18 +08:00
parent 510782261d
commit 314f2704d4
174 changed files with 2869 additions and 2263 deletions
--- a/tests/cases/distributed/optimizer/filter_push_down.result
+++ b/tests/cases/distributed/optimizer/filter_push_down.result
@@ -225,19 +225,47 @@ SELECT i FROM (SELECT * FROM integers i1 UNION SELECT * FROM integers i2) a WHER
 | 3 |
 +---+

-- TODO(LFC): Somehow the following SQL does not order by column 1 under new DataFusion occasionally. Should further investigate it. Comment it out temporarily.
-- expected:
--  +---+---+--------------+
--  | a | b | ROW_NUMBER() |
--  +---+---+--------------+
--  | 1 | 1 | 1            |
--  | 2 | 2 | 5            |
--  | 3 | 3 | 9            |
--  +---+---+--------------+
-- SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1;
-SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1;
+SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1;

-Error: 3001(EngineExecuteQuery), Invalid argument error: must either specify a row count or at least one column
+---+---+--------------------------------------------------------------------------------------------------------------------+
+| a | b | ROW_NUMBER() ORDER BY [i1.i ASC NULLS LAST, i2.i ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |
+---+---+--------------------------------------------------------------------------------------------------------------------+
+| 1 | 1 | 1                                                                                                                  |
+| 2 | 2 | 5                                                                                                                  |
+| 3 | 3 | 9                                                                                                                  |
+---+---+--------------------------------------------------------------------------------------------------------------------+
+
+-- The "0=1" will be evaluated as a constant expression that is always false, and will be optimized away in the query
+-- engine. In the final plan, there's no filter node. We explain it to ensure that.
+-- SQLNESS REPLACE (-+) -
+-- SQLNESS REPLACE (\s\s+) _
+-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
+-- SQLNESS REPLACE (peers.*) REDACTED
+EXPLAIN SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1;
+
+-+-+
+| plan_type_| plan_|
+-+-+
+| logical_plan_| Sort: a1.cond ASC NULLS LAST_|
+|_|_SubqueryAlias: a1_|
+|_|_Projection: Boolean(false) AS cond_|
+|_|_CrossJoin:_|
+|_|_SubqueryAlias: i1_|
+|_|_Projection:_|
+|_|_MergeScan [is_placeholder=false]_|
+|_|_SubqueryAlias: i2_|
+|_|_Projection:_|
+|_|_MergeScan [is_placeholder=false]_|
+| physical_plan | CoalescePartitionsExec_|
+|_|_ProjectionExec: expr=[false as cond]_|
+|_|_CrossJoinExec_|
+|_|_ProjectionExec: expr=[]_|
+|_|_MergeScanExec: REDACTED
+|_|_RepartitionExec: partitioning=REDACTED
+|_|_ProjectionExec: expr=[]_|
+|_|_MergeScanExec: REDACTED
+|_|_|
+-+-+

 SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2 GROUP BY 1) a1 WHERE cond ORDER BY 1;

--- a/tests/cases/distributed/optimizer/filter_push_down.sql
+++ b/tests/cases/distributed/optimizer/filter_push_down.sql
@@ -44,18 +44,15 @@ SELECT * FROM (SELECT DISTINCT i1.i AS a, i2.i AS b FROM integers i1, integers i

 SELECT i FROM (SELECT * FROM integers i1 UNION SELECT * FROM integers i2) a WHERE i=3;

-- TODO(LFC): Somehow the following SQL does not order by column 1 under new DataFusion occasionally. Should further investigate it. Comment it out temporarily.
-- expected:
--  +---+---+--------------+
--  | a | b | ROW_NUMBER() |
--  +---+---+--------------+
--  | 1 | 1 | 1            |
--  | 2 | 2 | 5            |
--  | 3 | 3 | 9            |
--  +---+---+--------------+
-- SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1;
+SELECT * FROM (SELECT i1.i AS a, i2.i AS b, row_number() OVER (ORDER BY i1.i, i2.i) FROM integers i1, integers i2 WHERE i1.i IS NOT NULL AND i2.i IS NOT NULL) a1 WHERE a=b ORDER BY 1;

-SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1;
+-- The "0=1" will be evaluated as a constant expression that is always false, and will be optimized away in the query
+-- engine. In the final plan, there's no filter node. We explain it to ensure that.
+-- SQLNESS REPLACE (-+) -
+-- SQLNESS REPLACE (\s\s+) _
+-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
+-- SQLNESS REPLACE (peers.*) REDACTED
+EXPLAIN SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2) a1 WHERE cond ORDER BY 1;

 SELECT * FROM (SELECT 0=1 AS cond FROM integers i1, integers i2 GROUP BY 1) a1 WHERE cond ORDER BY 1;