From c6e5552f05e0a54cc06aa5919adda9eff37763b7 Mon Sep 17 00:00:00 2001 From: dennis zhuang Date: Thu, 25 Sep 2025 16:00:17 +0800 Subject: [PATCH] test: migrate aggregation tests from duckdb, part4 (#6965) * test: migrate aggregation tests from duckdb, part4 Signed-off-by: Dennis Zhuang * fix: tests Signed-off-by: Dennis Zhuang * fix: rename tests Signed-off-by: Dennis Zhuang * fix: comments Signed-off-by: Dennis Zhuang * chore: ignore zero weights test Signed-off-by: Dennis Zhuang * chore: remove duplicated sql Signed-off-by: Dennis Zhuang --------- Signed-off-by: Dennis Zhuang --- .../common/aggregate/approx_distinct.result | 118 ++++++++ .../common/aggregate/approx_distinct.sql | 51 ++++ .../common/aggregate/approx_median.result | 186 +++++++++++++ .../common/aggregate/approx_median.sql | 80 ++++++ .../aggregate/approx_percentile_cont.result | 194 +++++++++++++ .../aggregate/approx_percentile_cont.sql | 76 ++++++ .../approx_percentile_cont_with_weight.result | 194 +++++++++++++ .../approx_percentile_cont_with_weight.sql | 90 +++++++ .../common/aggregate/array_agg.result | 146 ++++++++++ .../standalone/common/aggregate/array_agg.sql | 56 ++++ .../standalone/common/aggregate/avg.result | 102 +++++++ .../cases/standalone/common/aggregate/avg.sql | 46 ++++ .../common/aggregate/bit_operations.result | 103 +++++++ .../common/aggregate/bit_operations.sql | 46 ++++ .../common/aggregate/bool_agg.result | 178 ++++++++++++ .../standalone/common/aggregate/bool_agg.sql | 74 +++++ .../standalone/common/aggregate/corr.result | 100 +++++++ .../standalone/common/aggregate/corr.sql | 49 ++++ .../standalone/common/aggregate/covar.result | 84 ++++++ .../standalone/common/aggregate/covar.sql | 41 +++ .../common/aggregate/first_last.result | 255 ++++++++++++++++++ .../common/aggregate/first_last.sql | 75 ++++++ .../standalone/common/aggregate/median.result | 119 ++++++++ .../standalone/common/aggregate/median.sql | 45 ++++ .../common/aggregate/min_max.result | 151 +++++++++++ .../standalone/common/aggregate/min_max.sql | 56 ++++ .../common/aggregate/regression.result | 120 +++++++++ .../common/aggregate/regression.sql | 53 ++++ .../standalone/common/aggregate/stddev.result | 175 ++++++++++++ .../standalone/common/aggregate/stddev.sql | 49 ++++ .../common/aggregate/string_agg.result | 96 +++++++ .../common/aggregate/string_agg.sql | 48 ++++ .../standalone/common/aggregate/uddsketch.sql | 1 + 33 files changed, 3257 insertions(+) create mode 100644 tests/cases/standalone/common/aggregate/approx_distinct.result create mode 100644 tests/cases/standalone/common/aggregate/approx_distinct.sql create mode 100644 tests/cases/standalone/common/aggregate/approx_median.result create mode 100644 tests/cases/standalone/common/aggregate/approx_median.sql create mode 100644 tests/cases/standalone/common/aggregate/approx_percentile_cont.result create mode 100644 tests/cases/standalone/common/aggregate/approx_percentile_cont.sql create mode 100644 tests/cases/standalone/common/aggregate/approx_percentile_cont_with_weight.result create mode 100644 tests/cases/standalone/common/aggregate/approx_percentile_cont_with_weight.sql create mode 100644 tests/cases/standalone/common/aggregate/array_agg.result create mode 100644 tests/cases/standalone/common/aggregate/array_agg.sql create mode 100644 tests/cases/standalone/common/aggregate/avg.result create mode 100644 tests/cases/standalone/common/aggregate/avg.sql create mode 100644 tests/cases/standalone/common/aggregate/bit_operations.result create mode 100644 tests/cases/standalone/common/aggregate/bit_operations.sql create mode 100644 tests/cases/standalone/common/aggregate/bool_agg.result create mode 100644 tests/cases/standalone/common/aggregate/bool_agg.sql create mode 100644 tests/cases/standalone/common/aggregate/corr.result create mode 100644 tests/cases/standalone/common/aggregate/corr.sql create mode 100644 tests/cases/standalone/common/aggregate/covar.result create mode 100644 tests/cases/standalone/common/aggregate/covar.sql create mode 100644 tests/cases/standalone/common/aggregate/first_last.result create mode 100644 tests/cases/standalone/common/aggregate/first_last.sql create mode 100644 tests/cases/standalone/common/aggregate/median.result create mode 100644 tests/cases/standalone/common/aggregate/median.sql create mode 100644 tests/cases/standalone/common/aggregate/min_max.result create mode 100644 tests/cases/standalone/common/aggregate/min_max.sql create mode 100644 tests/cases/standalone/common/aggregate/regression.result create mode 100644 tests/cases/standalone/common/aggregate/regression.sql create mode 100644 tests/cases/standalone/common/aggregate/stddev.result create mode 100644 tests/cases/standalone/common/aggregate/stddev.sql create mode 100644 tests/cases/standalone/common/aggregate/string_agg.result create mode 100644 tests/cases/standalone/common/aggregate/string_agg.sql diff --git a/tests/cases/standalone/common/aggregate/approx_distinct.result b/tests/cases/standalone/common/aggregate/approx_distinct.result new file mode 100644 index 0000000000..a3875fadf0 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/approx_distinct.result @@ -0,0 +1,118 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_approximate_distinct_count.test +-- Test approx_distinct function +-- Basic tests +SELECT APPROX_DISTINCT(1); + ++---------------------------+ +| approx_distinct(Int64(1)) | ++---------------------------+ +| 1 | ++---------------------------+ + +-- FIXME(dennis): This feature is not implemented: Support for 'approx_distinct' for data type Null is not implemented +-- SELECT APPROX_DISTINCT(NULL); +SELECT APPROX_DISTINCT('hello'); + ++--------------------------------+ +| approx_distinct(Utf8("hello")) | ++--------------------------------+ +| 1 | ++--------------------------------+ + +-- Test with range data +SELECT APPROX_DISTINCT(10), APPROX_DISTINCT('hello') FROM numbers LIMIT 100; + ++----------------------------+--------------------------------+ +| approx_distinct(Int64(10)) | approx_distinct(Utf8("hello")) | ++----------------------------+--------------------------------+ +| 1 | 1 | ++----------------------------+--------------------------------+ + +SELECT APPROX_DISTINCT(number) FROM numbers WHERE 1 = 0 LIMIT 100 ; + ++---------------------------------+ +| approx_distinct(numbers.number) | ++---------------------------------+ +| 0 | ++---------------------------------+ + +-- Test with different data types +CREATE TABLE dates_test(t DATE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO dates_test VALUES + ('2008-01-01', 1000), (NULL, 2000), ('2007-01-01', 3000), + ('2008-02-01', 4000), ('2008-01-02', 5000), ('2008-01-01', 6000), + ('2008-01-01', 7000), ('2008-01-01', 8000); + +Affected Rows: 8 + +-- FIXME(dennis): This feature is not implemented: Support for 'approx_distinct' for data type Date32 is not implemented +-- SELECT APPROX_DISTINCT(t) FROM dates_test; +DROP TABLE dates_test; + +Affected Rows: 0 + +CREATE TABLE names_test(t VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO names_test VALUES + ('Pedro', 1000), (NULL, 2000), ('Pedro', 3000), ('Pedro', 4000), + ('Mark', 5000), ('Mark', 6000), ('Mark', 7000), + ('Hannes-Muehleisen', 8000), ('Hannes-Muehleisen', 9000); + +Affected Rows: 9 + +SELECT APPROX_DISTINCT(t) FROM names_test; + ++-------------------------------+ +| approx_distinct(names_test.t) | ++-------------------------------+ +| 3 | ++-------------------------------+ + +DROP TABLE names_test; + +Affected Rows: 0 + +-- Test with large dataset +CREATE TABLE large_test(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO large_test SELECT number, number % 10, number * 1000 FROM numbers LIMIT 2000; + +Affected Rows: 2000 + +SELECT APPROX_DISTINCT(a), APPROX_DISTINCT(b) FROM large_test; + ++-------------------------------+-------------------------------+ +| approx_distinct(large_test.a) | approx_distinct(large_test.b) | ++-------------------------------+-------------------------------+ +| 2000 | 10 | ++-------------------------------+-------------------------------+ + +-- Test with groups +SELECT b, APPROX_DISTINCT(a) FROM large_test GROUP BY b ORDER BY b; + ++---+-------------------------------+ +| b | approx_distinct(large_test.a) | ++---+-------------------------------+ +| 0 | 200 | +| 1 | 201 | +| 2 | 201 | +| 3 | 200 | +| 4 | 199 | +| 5 | 200 | +| 6 | 199 | +| 7 | 200 | +| 8 | 200 | +| 9 | 200 | ++---+-------------------------------+ + +DROP TABLE large_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/approx_distinct.sql b/tests/cases/standalone/common/aggregate/approx_distinct.sql new file mode 100644 index 0000000000..0a7dbb57cc --- /dev/null +++ b/tests/cases/standalone/common/aggregate/approx_distinct.sql @@ -0,0 +1,51 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_approximate_distinct_count.test +-- Test approx_distinct function + +-- Basic tests +SELECT APPROX_DISTINCT(1); + +-- FIXME(dennis): This feature is not implemented: Support for 'approx_distinct' for data type Null is not implemented +-- SELECT APPROX_DISTINCT(NULL); + +SELECT APPROX_DISTINCT('hello'); + +-- Test with range data +SELECT APPROX_DISTINCT(10), APPROX_DISTINCT('hello') FROM numbers LIMIT 100; + +SELECT APPROX_DISTINCT(number) FROM numbers WHERE 1 = 0 LIMIT 100 ; + +-- Test with different data types +CREATE TABLE dates_test(t DATE, ts TIMESTAMP TIME INDEX); + +INSERT INTO dates_test VALUES + ('2008-01-01', 1000), (NULL, 2000), ('2007-01-01', 3000), + ('2008-02-01', 4000), ('2008-01-02', 5000), ('2008-01-01', 6000), + ('2008-01-01', 7000), ('2008-01-01', 8000); + +-- FIXME(dennis): This feature is not implemented: Support for 'approx_distinct' for data type Date32 is not implemented +-- SELECT APPROX_DISTINCT(t) FROM dates_test; + +DROP TABLE dates_test; + +CREATE TABLE names_test(t VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO names_test VALUES + ('Pedro', 1000), (NULL, 2000), ('Pedro', 3000), ('Pedro', 4000), + ('Mark', 5000), ('Mark', 6000), ('Mark', 7000), + ('Hannes-Muehleisen', 8000), ('Hannes-Muehleisen', 9000); + +SELECT APPROX_DISTINCT(t) FROM names_test; + +DROP TABLE names_test; + +-- Test with large dataset +CREATE TABLE large_test(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO large_test SELECT number, number % 10, number * 1000 FROM numbers LIMIT 2000; + +SELECT APPROX_DISTINCT(a), APPROX_DISTINCT(b) FROM large_test; + +-- Test with groups +SELECT b, APPROX_DISTINCT(a) FROM large_test GROUP BY b ORDER BY b; + +DROP TABLE large_test; diff --git a/tests/cases/standalone/common/aggregate/approx_median.result b/tests/cases/standalone/common/aggregate/approx_median.result new file mode 100644 index 0000000000..9d5bde78f6 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/approx_median.result @@ -0,0 +1,186 @@ +-- Migrated from DuckDB test style: test approximate median +-- Test APPROX_MEDIAN function +-- Test with odd number of values +CREATE TABLE odd_test(i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO odd_test VALUES (1, 1000), (2, 2000), (3, 3000), (4, 4000), (5, 5000); + +Affected Rows: 5 + +-- Should return 3 (middle value) +SELECT approx_median(i) FROM odd_test; + ++---------------------------+ +| approx_median(odd_test.i) | ++---------------------------+ +| 3 | ++---------------------------+ + +-- Test with even number of values +CREATE TABLE even_test(i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO even_test VALUES (1, 1000), (2, 2000), (4, 3000), (5, 4000); + +Affected Rows: 4 + +-- Should return approximately 3 (average of 2 and 4) +SELECT approx_median(i) FROM even_test; + ++----------------------------+ +| approx_median(even_test.i) | ++----------------------------+ +| 3 | ++----------------------------+ + +-- Test with larger dataset +CREATE TABLE large_test(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO large_test SELECT number, number % 3, number * 1000 FROM numbers LIMIT 1000; + +Affected Rows: 1000 + +SELECT approx_median(val) FROM large_test; + ++-------------------------------+ +| approx_median(large_test.val) | ++-------------------------------+ +| 499 | ++-------------------------------+ + +-- Test with groups +SELECT grp, approx_median(val) FROM large_test GROUP BY grp ORDER BY grp; + ++-----+-------------------------------+ +| grp | approx_median(large_test.val) | ++-----+-------------------------------+ +| 0 | 498 | +| 1 | 499 | +| 2 | 500 | ++-----+-------------------------------+ + +-- Test with doubles +CREATE TABLE double_test(d DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO double_test VALUES + (1.1, 1000), (2.2, 2000), (3.3, 3000), (4.4, 4000), (5.5, 5000); + +Affected Rows: 5 + +SELECT approx_median(d) FROM double_test; + ++------------------------------+ +| approx_median(double_test.d) | ++------------------------------+ +| 3.3 | ++------------------------------+ + +-- Test with NULL values +INSERT INTO double_test VALUES (NULL, 6000); + +Affected Rows: 1 + +SELECT approx_median(d) FROM double_test; + ++------------------------------+ +| approx_median(double_test.d) | ++------------------------------+ +| 3.3 | ++------------------------------+ + +-- Test with duplicate values +CREATE TABLE dup_test(val INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO dup_test VALUES + (1, 1000), (1, 2000), (2, 3000), (2, 4000), + (3, 5000), (3, 6000), (4, 7000), (4, 8000); + +Affected Rows: 8 + +SELECT approx_median(val) FROM dup_test; + ++-----------------------------+ +| approx_median(dup_test.val) | ++-----------------------------+ +| 2 | ++-----------------------------+ + +-- Compare with exact median +SELECT median(val), approx_median(val) FROM dup_test; + ++----------------------+-----------------------------+ +| median(dup_test.val) | approx_median(dup_test.val) | ++----------------------+-----------------------------+ +| 2 | 2 | ++----------------------+-----------------------------+ + +-- Test edge cases +-- empty result +SELECT approx_median(i) FROM odd_test WHERE i > 100; + ++---------------------------+ +| approx_median(odd_test.i) | ++---------------------------+ +| | ++---------------------------+ + +-- Test single value +SELECT approx_median(i) FROM odd_test WHERE i = 3; + ++---------------------------+ +| approx_median(odd_test.i) | ++---------------------------+ +| 3 | ++---------------------------+ + +-- Test with negative values +CREATE TABLE neg_test(val INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO neg_test VALUES (-5, 1000), (-2, 2000), (0, 3000), (3, 4000), (7, 5000); + +Affected Rows: 5 + +SELECT approx_median(val) FROM neg_test; + ++-----------------------------+ +| approx_median(neg_test.val) | ++-----------------------------+ +| 0 | ++-----------------------------+ + +-- cleanup +DROP TABLE odd_test; + +Affected Rows: 0 + +DROP TABLE even_test; + +Affected Rows: 0 + +DROP TABLE large_test; + +Affected Rows: 0 + +DROP TABLE double_test; + +Affected Rows: 0 + +DROP TABLE dup_test; + +Affected Rows: 0 + +DROP TABLE neg_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/approx_median.sql b/tests/cases/standalone/common/aggregate/approx_median.sql new file mode 100644 index 0000000000..41faac5b38 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/approx_median.sql @@ -0,0 +1,80 @@ +-- Migrated from DuckDB test style: test approximate median +-- Test APPROX_MEDIAN function + +-- Test with odd number of values +CREATE TABLE odd_test(i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO odd_test VALUES (1, 1000), (2, 2000), (3, 3000), (4, 4000), (5, 5000); + +-- Should return 3 (middle value) +SELECT approx_median(i) FROM odd_test; + +-- Test with even number of values +CREATE TABLE even_test(i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO even_test VALUES (1, 1000), (2, 2000), (4, 3000), (5, 4000); + +-- Should return approximately 3 (average of 2 and 4) +SELECT approx_median(i) FROM even_test; + +-- Test with larger dataset +CREATE TABLE large_test(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO large_test SELECT number, number % 3, number * 1000 FROM numbers LIMIT 1000; + +SELECT approx_median(val) FROM large_test; + +-- Test with groups +SELECT grp, approx_median(val) FROM large_test GROUP BY grp ORDER BY grp; + +-- Test with doubles +CREATE TABLE double_test(d DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO double_test VALUES + (1.1, 1000), (2.2, 2000), (3.3, 3000), (4.4, 4000), (5.5, 5000); + +SELECT approx_median(d) FROM double_test; + +-- Test with NULL values +INSERT INTO double_test VALUES (NULL, 6000); + +SELECT approx_median(d) FROM double_test; + +-- Test with duplicate values +CREATE TABLE dup_test(val INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO dup_test VALUES + (1, 1000), (1, 2000), (2, 3000), (2, 4000), + (3, 5000), (3, 6000), (4, 7000), (4, 8000); + +SELECT approx_median(val) FROM dup_test; + +-- Compare with exact median +SELECT median(val), approx_median(val) FROM dup_test; + +-- Test edge cases +-- empty result +SELECT approx_median(i) FROM odd_test WHERE i > 100; + +-- Test single value +SELECT approx_median(i) FROM odd_test WHERE i = 3; + +-- Test with negative values +CREATE TABLE neg_test(val INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO neg_test VALUES (-5, 1000), (-2, 2000), (0, 3000), (3, 4000), (7, 5000); + +SELECT approx_median(val) FROM neg_test; + +-- cleanup +DROP TABLE odd_test; + +DROP TABLE even_test; + +DROP TABLE large_test; + +DROP TABLE double_test; + +DROP TABLE dup_test; + +DROP TABLE neg_test; diff --git a/tests/cases/standalone/common/aggregate/approx_percentile_cont.result b/tests/cases/standalone/common/aggregate/approx_percentile_cont.result new file mode 100644 index 0000000000..ac9d60186e --- /dev/null +++ b/tests/cases/standalone/common/aggregate/approx_percentile_cont.result @@ -0,0 +1,194 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_approx_quantile.test +-- Test approx_percentile_cont function instead of approx_quantile +-- Test basic approximate quantile +CREATE TABLE approx_test(i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO approx_test SELECT number, number * 1000 FROM numbers LIMIT 1000; + +Affected Rows: 1000 + +-- Test approx_percentile_cont +-- median +SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i) FROM approx_test; + ++----------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.5)) WITHIN GROUP [approx_test.i ASC NULLS LAST] | ++----------------------------------------------------------------------------------+ +| 499 | ++----------------------------------------------------------------------------------+ + +-- first quartile +SELECT approx_percentile_cont(0.25) WITHIN GROUP (ORDER BY i) FROM approx_test; + ++-----------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.25)) WITHIN GROUP [approx_test.i ASC NULLS LAST] | ++-----------------------------------------------------------------------------------+ +| 249 | ++-----------------------------------------------------------------------------------+ + +-- third quartile +SELECT approx_percentile_cont(0.75) WITHIN GROUP (ORDER BY i) FROM approx_test; + ++-----------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.75)) WITHIN GROUP [approx_test.i ASC NULLS LAST] | ++-----------------------------------------------------------------------------------+ +| 749 | ++-----------------------------------------------------------------------------------+ + +-- 95th percentile +SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY i) FROM approx_test; + ++-----------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.95)) WITHIN GROUP [approx_test.i ASC NULLS LAST] | ++-----------------------------------------------------------------------------------+ +| 949 | ++-----------------------------------------------------------------------------------+ + +-- Test approx_percentile_cont DESC +-- median +SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + ++------------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.5)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] | ++------------------------------------------------------------------------------------+ +| 499 | ++------------------------------------------------------------------------------------+ + +-- first quartile +SELECT approx_percentile_cont(0.25) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + ++-------------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.25)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] | ++-------------------------------------------------------------------------------------+ +| 749 | ++-------------------------------------------------------------------------------------+ + +-- third quartile +SELECT approx_percentile_cont(0.75) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + ++-------------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.75)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] | ++-------------------------------------------------------------------------------------+ +| 249 | ++-------------------------------------------------------------------------------------+ + +-- 95th percentile +SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + ++-------------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.95)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] | ++-------------------------------------------------------------------------------------+ +| 49 | ++-------------------------------------------------------------------------------------+ + +-- Test with different data types +CREATE TABLE approx_double(d DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO approx_double SELECT number * 1.5, number * 1000 FROM numbers LIMIT 1000; + +Affected Rows: 1000 + +SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY d) FROM approx_double; + ++------------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.5)) WITHIN GROUP [approx_double.d ASC NULLS LAST] | ++------------------------------------------------------------------------------------+ +| 748.875 | ++------------------------------------------------------------------------------------+ + +SELECT approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY d) FROM approx_double; + ++------------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.9)) WITHIN GROUP [approx_double.d ASC NULLS LAST] | ++------------------------------------------------------------------------------------+ +| 1349.25 | ++------------------------------------------------------------------------------------+ + +-- Test with groups +CREATE TABLE approx_groups(grp INTEGER, val INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO approx_groups SELECT + number % 3 as grp, + number, + number * 1000 +FROM numbers LIMIT 300; + +Affected Rows: 300 + +SELECT grp, approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY val) +FROM approx_groups GROUP BY grp ORDER BY grp; + ++-----+--------------------------------------------------------------------------------------+ +| grp | approx_percentile_cont(Float64(0.5)) WITHIN GROUP [approx_groups.val ASC NULLS LAST] | ++-----+--------------------------------------------------------------------------------------+ +| 0 | 148 | +| 1 | 149 | +| 2 | 150 | ++-----+--------------------------------------------------------------------------------------+ + +-- Test with NULL values +INSERT INTO approx_test VALUES (NULL, 1001000); + +Affected Rows: 1 + +SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i) FROM approx_test; + ++----------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0.5)) WITHIN GROUP [approx_test.i ASC NULLS LAST] | ++----------------------------------------------------------------------------------+ +| 499 | ++----------------------------------------------------------------------------------+ + +-- Test edge cases +-- should be close to min +SELECT approx_percentile_cont(0.0) WITHIN GROUP (ORDER BY i) FROM approx_test; + ++--------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0)) WITHIN GROUP [approx_test.i ASC NULLS LAST] | ++--------------------------------------------------------------------------------+ +| 0 | ++--------------------------------------------------------------------------------+ + +SELECT approx_percentile_cont(1.0) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + ++----------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(1)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] | ++----------------------------------------------------------------------------------+ +| 0 | ++----------------------------------------------------------------------------------+ + +-- should be close to max +SELECT approx_percentile_cont(1.0) WITHIN GROUP (ORDER BY i) FROM approx_test; + ++--------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(1)) WITHIN GROUP [approx_test.i ASC NULLS LAST] | ++--------------------------------------------------------------------------------+ +| 999 | ++--------------------------------------------------------------------------------+ + +SELECT approx_percentile_cont(0.0) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + ++----------------------------------------------------------------------------------+ +| approx_percentile_cont(Float64(0)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] | ++----------------------------------------------------------------------------------+ +| 999 | ++----------------------------------------------------------------------------------+ + +DROP TABLE approx_test; + +Affected Rows: 0 + +DROP TABLE approx_double; + +Affected Rows: 0 + +DROP TABLE approx_groups; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/approx_percentile_cont.sql b/tests/cases/standalone/common/aggregate/approx_percentile_cont.sql new file mode 100644 index 0000000000..54a2dee654 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/approx_percentile_cont.sql @@ -0,0 +1,76 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_approx_quantile.test +-- Test approx_percentile_cont function instead of approx_quantile + +-- Test basic approximate quantile +CREATE TABLE approx_test(i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO approx_test SELECT number, number * 1000 FROM numbers LIMIT 1000; + +-- Test approx_percentile_cont +-- median +SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i) FROM approx_test; + +-- first quartile +SELECT approx_percentile_cont(0.25) WITHIN GROUP (ORDER BY i) FROM approx_test; + +-- third quartile +SELECT approx_percentile_cont(0.75) WITHIN GROUP (ORDER BY i) FROM approx_test; + +-- 95th percentile +SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY i) FROM approx_test; + +-- Test approx_percentile_cont DESC +-- median +SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + +-- first quartile +SELECT approx_percentile_cont(0.25) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + +-- third quartile +SELECT approx_percentile_cont(0.75) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + +-- 95th percentile +SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + +-- Test with different data types +CREATE TABLE approx_double(d DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO approx_double SELECT number * 1.5, number * 1000 FROM numbers LIMIT 1000; + +SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY d) FROM approx_double; + +SELECT approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY d) FROM approx_double; + +-- Test with groups +CREATE TABLE approx_groups(grp INTEGER, val INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO approx_groups SELECT + number % 3 as grp, + number, + number * 1000 +FROM numbers LIMIT 300; + +SELECT grp, approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY val) +FROM approx_groups GROUP BY grp ORDER BY grp; + +-- Test with NULL values +INSERT INTO approx_test VALUES (NULL, 1001000); + +SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i) FROM approx_test; + +-- Test edge cases +-- should be close to min +SELECT approx_percentile_cont(0.0) WITHIN GROUP (ORDER BY i) FROM approx_test; + +SELECT approx_percentile_cont(1.0) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + +-- should be close to max +SELECT approx_percentile_cont(1.0) WITHIN GROUP (ORDER BY i) FROM approx_test; + +SELECT approx_percentile_cont(0.0) WITHIN GROUP (ORDER BY i DESC) FROM approx_test; + +DROP TABLE approx_test; + +DROP TABLE approx_double; + +DROP TABLE approx_groups; diff --git a/tests/cases/standalone/common/aggregate/approx_percentile_cont_with_weight.result b/tests/cases/standalone/common/aggregate/approx_percentile_cont_with_weight.result new file mode 100644 index 0000000000..b843ba9357 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/approx_percentile_cont_with_weight.result @@ -0,0 +1,194 @@ +-- Migrated from DuckDB test style: test weighted approximate percentile +-- Test APPROX_PERCENTILE_CONT_WITH_WEIGHT function +-- Test basic weighted percentile +CREATE TABLE weight_test("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO weight_test VALUES + (10, 1, 1000), (20, 2, 2000), (30, 3, 3000), (40, 4, 4000), (50, 1, 5000); + +Affected Rows: 5 + +-- Test 50th percentile (median) with weights +SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_test; + ++---------------------------------------------------------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(weight_test.weight,Float64(0.5)) WITHIN GROUP [weight_test.value ASC NULLS LAST] | ++---------------------------------------------------------------------------------------------------------------------+ +| 22 | ++---------------------------------------------------------------------------------------------------------------------+ + +-- Test different percentiles +SELECT approx_percentile_cont_with_weight(weight, 0.25) WITHIN GROUP (ORDER BY "value") FROM weight_test; + ++----------------------------------------------------------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(weight_test.weight,Float64(0.25)) WITHIN GROUP [weight_test.value ASC NULLS LAST] | ++----------------------------------------------------------------------------------------------------------------------+ +| 16 | ++----------------------------------------------------------------------------------------------------------------------+ + +SELECT approx_percentile_cont_with_weight(weight, 0.75) WITHIN GROUP (ORDER BY "value") FROM weight_test; + ++----------------------------------------------------------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(weight_test.weight,Float64(0.75)) WITHIN GROUP [weight_test.value ASC NULLS LAST] | ++----------------------------------------------------------------------------------------------------------------------+ +| 44 | ++----------------------------------------------------------------------------------------------------------------------+ + +-- Test with groups +CREATE TABLE weight_groups(grp INTEGER, "value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO weight_groups VALUES + (1, 10, 2, 1000), (1, 20, 3, 2000), (1, 30, 1, 3000), + (2, 100, 1, 4000), (2, 200, 4, 5000), (2, 300, 2, 6000); + +Affected Rows: 6 + +SELECT grp, approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") +FROM weight_groups GROUP BY grp ORDER BY grp; + ++-----+-------------------------------------------------------------------------------------------------------------------------+ +| grp | approx_percentile_cont_with_weight(weight_groups.weight,Float64(0.5)) WITHIN GROUP [weight_groups.value ASC NULLS LAST] | ++-----+-------------------------------------------------------------------------------------------------------------------------+ +| 1 | 12 | +| 2 | 162 | ++-----+-------------------------------------------------------------------------------------------------------------------------+ + +-- Test with double values and weights +CREATE TABLE weight_double("value" DOUBLE, "weight" DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO weight_double VALUES + (1.5, 0.5, 1000), (2.5, 1.0, 2000), (3.5, 1.5, 3000), (4.5, 2.0, 4000); + +Affected Rows: 4 + +SELECT approx_percentile_cont_with_weight("weight", 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_double; + ++-------------------------------------------------------------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(weight_double.weight,Float64(0.5)) WITHIN GROUP [weight_double.value ASC NULLS LAST] | ++-------------------------------------------------------------------------------------------------------------------------+ +| 3.3333333333333335 | ++-------------------------------------------------------------------------------------------------------------------------+ + +-- Test edge cases +-- min +SELECT approx_percentile_cont_with_weight("weight", 0.0) WITHIN GROUP (ORDER BY "value") FROM weight_test; + ++-------------------------------------------------------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(weight_test.weight,Float64(0)) WITHIN GROUP [weight_test.value ASC NULLS LAST] | ++-------------------------------------------------------------------------------------------------------------------+ +| 10 | ++-------------------------------------------------------------------------------------------------------------------+ + +-- max +SELECT approx_percentile_cont_with_weight("weight", 1.0) WITHIN GROUP (ORDER BY "value") FROM weight_test; + ++-------------------------------------------------------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(weight_test.weight,Float64(1)) WITHIN GROUP [weight_test.value ASC NULLS LAST] | ++-------------------------------------------------------------------------------------------------------------------+ +| 50 | ++-------------------------------------------------------------------------------------------------------------------+ + +-- Test with zero weights +CREATE TABLE zero_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO zero_weight VALUES + (10, 0, 1000), (20, 1, 2000), (30, 0, 3000), (40, 2, 4000); + +Affected Rows: 4 + +--TODO: this result is unstable currently +--SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM zero_weight; +-- Test with NULL values +INSERT INTO weight_test VALUES (NULL, 1, 6000), (60, NULL, 7000); + +Affected Rows: 2 + +SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_test; + ++---------------------------------------------------------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(weight_test.weight,Float64(0.5)) WITHIN GROUP [weight_test.value ASC NULLS LAST] | ++---------------------------------------------------------------------------------------------------------------------+ +| 22 | ++---------------------------------------------------------------------------------------------------------------------+ + +-- Test empty result +SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") +FROM weight_test WHERE "value" > 1000; + ++---------------------------------------------------------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(weight_test.weight,Float64(0.5)) WITHIN GROUP [weight_test.value ASC NULLS LAST] | ++---------------------------------------------------------------------------------------------------------------------+ +| | ++---------------------------------------------------------------------------------------------------------------------+ + +-- Test single weighted value +CREATE TABLE single_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO single_weight VALUES (42, 5, 1000); + +Affected Rows: 1 + +SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM single_weight; + ++-------------------------------------------------------------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(single_weight.weight,Float64(0.5)) WITHIN GROUP [single_weight.value ASC NULLS LAST] | ++-------------------------------------------------------------------------------------------------------------------------+ +| 42 | ++-------------------------------------------------------------------------------------------------------------------------+ + +-- Test equal weights (should behave like regular percentile) +CREATE TABLE equal_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO equal_weight VALUES + (10, 1, 1000), (20, 1, 2000), (30, 1, 3000), (40, 1, 4000); + +Affected Rows: 4 + +SELECT + approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value"), + approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY "value") +FROM equal_weight; + ++-----------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(equal_weight.weight,Float64(0.5)) WITHIN GROUP [equal_weight.value ASC NULLS LAST] | approx_percentile_cont(Float64(0.5)) WITHIN GROUP [equal_weight.value ASC NULLS LAST] | ++-----------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------+ +| 25 | 25 | ++-----------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------+ + +-- cleanup +DROP TABLE weight_test; + +Affected Rows: 0 + +DROP TABLE weight_groups; + +Affected Rows: 0 + +DROP TABLE weight_double; + +Affected Rows: 0 + +DROP TABLE zero_weight; + +Affected Rows: 0 + +DROP TABLE single_weight; + +Affected Rows: 0 + +DROP TABLE equal_weight; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/approx_percentile_cont_with_weight.sql b/tests/cases/standalone/common/aggregate/approx_percentile_cont_with_weight.sql new file mode 100644 index 0000000000..b5cd9e9b18 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/approx_percentile_cont_with_weight.sql @@ -0,0 +1,90 @@ +-- Migrated from DuckDB test style: test weighted approximate percentile +-- Test APPROX_PERCENTILE_CONT_WITH_WEIGHT function + +-- Test basic weighted percentile +CREATE TABLE weight_test("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO weight_test VALUES + (10, 1, 1000), (20, 2, 2000), (30, 3, 3000), (40, 4, 4000), (50, 1, 5000); + +-- Test 50th percentile (median) with weights +SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_test; + +-- Test different percentiles +SELECT approx_percentile_cont_with_weight(weight, 0.25) WITHIN GROUP (ORDER BY "value") FROM weight_test; + +SELECT approx_percentile_cont_with_weight(weight, 0.75) WITHIN GROUP (ORDER BY "value") FROM weight_test; + +-- Test with groups +CREATE TABLE weight_groups(grp INTEGER, "value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO weight_groups VALUES + (1, 10, 2, 1000), (1, 20, 3, 2000), (1, 30, 1, 3000), + (2, 100, 1, 4000), (2, 200, 4, 5000), (2, 300, 2, 6000); + +SELECT grp, approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") +FROM weight_groups GROUP BY grp ORDER BY grp; + +-- Test with double values and weights +CREATE TABLE weight_double("value" DOUBLE, "weight" DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO weight_double VALUES + (1.5, 0.5, 1000), (2.5, 1.0, 2000), (3.5, 1.5, 3000), (4.5, 2.0, 4000); + +SELECT approx_percentile_cont_with_weight("weight", 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_double; + +-- Test edge cases +-- min +SELECT approx_percentile_cont_with_weight("weight", 0.0) WITHIN GROUP (ORDER BY "value") FROM weight_test; + +-- max +SELECT approx_percentile_cont_with_weight("weight", 1.0) WITHIN GROUP (ORDER BY "value") FROM weight_test; + +-- Test with zero weights +CREATE TABLE zero_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO zero_weight VALUES + (10, 0, 1000), (20, 1, 2000), (30, 0, 3000), (40, 2, 4000); + +--TODO: this result is unstable currently +--SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM zero_weight; + +-- Test with NULL values +INSERT INTO weight_test VALUES (NULL, 1, 6000), (60, NULL, 7000); + +SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_test; + +-- Test empty result +SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") +FROM weight_test WHERE "value" > 1000; + +-- Test single weighted value +CREATE TABLE single_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO single_weight VALUES (42, 5, 1000); + +SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM single_weight; + +-- Test equal weights (should behave like regular percentile) +CREATE TABLE equal_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO equal_weight VALUES + (10, 1, 1000), (20, 1, 2000), (30, 1, 3000), (40, 1, 4000); + +SELECT + approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value"), + approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY "value") +FROM equal_weight; + +-- cleanup +DROP TABLE weight_test; + +DROP TABLE weight_groups; + +DROP TABLE weight_double; + +DROP TABLE zero_weight; + +DROP TABLE single_weight; + +DROP TABLE equal_weight; diff --git a/tests/cases/standalone/common/aggregate/array_agg.result b/tests/cases/standalone/common/aggregate/array_agg.result new file mode 100644 index 0000000000..5a73c28989 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/array_agg.result @@ -0,0 +1,146 @@ +-- Migrated from DuckDB test style: test array aggregation +-- Test ARRAY_AGG function +-- Test with integers +CREATE TABLE integers(i INTEGER, g INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO integers VALUES (1, 1, 1000), (2, 1, 2000), (3, 1, 3000), (4, 2, 4000), (5, 2, 5000); + +Affected Rows: 5 + +-- Basic array aggregation +SELECT array_agg(i) FROM integers; + ++-----------------------+ +| array_agg(integers.i) | ++-----------------------+ +| [1, 2, 3, 4, 5] | ++-----------------------+ + +-- Array aggregation with GROUP BY +SELECT g, array_agg(i) FROM integers GROUP BY g ORDER BY g; + ++---+-----------------------+ +| g | array_agg(integers.i) | ++---+-----------------------+ +| 1 | [1, 2, 3] | +| 2 | [4, 5] | ++---+-----------------------+ + +-- Test with ORDER BY +SELECT array_agg(i ORDER BY i DESC) FROM integers; + ++--------------------------------------------------------------+ +| array_agg(integers.i) ORDER BY [integers.i DESC NULLS FIRST] | ++--------------------------------------------------------------+ +| [5, 4, 3, 2, 1] | ++--------------------------------------------------------------+ + +SELECT g, array_agg(i ORDER BY i DESC) FROM integers GROUP BY g ORDER BY g; + ++---+--------------------------------------------------------------+ +| g | array_agg(integers.i) ORDER BY [integers.i DESC NULLS FIRST] | ++---+--------------------------------------------------------------+ +| 1 | [3, 2, 1] | +| 2 | [5, 4] | ++---+--------------------------------------------------------------+ + +-- Test with strings +CREATE TABLE strings(s VARCHAR, g INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO strings VALUES + ('apple', 1, 1000), ('banana', 1, 2000), ('cherry', 2, 3000), + ('date', 2, 4000), ('elderberry', 1, 5000); + +Affected Rows: 5 + +SELECT array_agg(s) FROM strings; + ++-------------------------------------------+ +| array_agg(strings.s) | ++-------------------------------------------+ +| [apple, banana, cherry, date, elderberry] | ++-------------------------------------------+ + +SELECT g, array_agg(s ORDER BY s) FROM strings GROUP BY g ORDER BY g; + ++---+----------------------------------------------------------+ +| g | array_agg(strings.s) ORDER BY [strings.s ASC NULLS LAST] | ++---+----------------------------------------------------------+ +| 1 | [apple, banana, elderberry] | +| 2 | [cherry, date] | ++---+----------------------------------------------------------+ + +-- Test with NULL values +INSERT INTO strings VALUES (NULL, 1, 6000), ('fig', NULL, 7000); + +Affected Rows: 2 + +SELECT array_agg(s) FROM strings WHERE s IS NOT NULL; + ++------------------------------------------------+ +| array_agg(strings.s) | ++------------------------------------------------+ +| [apple, banana, cherry, date, elderberry, fig] | ++------------------------------------------------+ + +SELECT g, array_agg(s) FROM strings WHERE g IS NOT NULL GROUP BY g ORDER BY g; + ++---+-------------------------------+ +| g | array_agg(strings.s) | ++---+-------------------------------+ +| 1 | [apple, banana, elderberry, ] | +| 2 | [cherry, date] | ++---+-------------------------------+ + +-- Test with DISTINCT +SELECT array_agg(DISTINCT s ORDER BY s) FROM strings WHERE s IS NOT NULL; + ++-------------------------------------------------------------------+ +| array_agg(DISTINCT strings.s) ORDER BY [strings.s ASC NULLS LAST] | ++-------------------------------------------------------------------+ +| [apple, banana, cherry, date, elderberry, fig] | ++-------------------------------------------------------------------+ + +-- Test empty result +SELECT array_agg(i) FROM integers WHERE i > 100; + ++-----------------------+ +| array_agg(integers.i) | ++-----------------------+ +| | ++-----------------------+ + +-- Test with doubles +CREATE TABLE doubles(d DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO doubles VALUES (1.1, 1000), (2.2, 2000), (3.3, 3000), (4.4, 4000); + +Affected Rows: 4 + +SELECT array_agg(d ORDER BY d) FROM doubles; + ++----------------------------------------------------------+ +| array_agg(doubles.d) ORDER BY [doubles.d ASC NULLS LAST] | ++----------------------------------------------------------+ +| [1.1, 2.2, 3.3, 4.4] | ++----------------------------------------------------------+ + +-- cleanup +DROP TABLE integers; + +Affected Rows: 0 + +DROP TABLE strings; + +Affected Rows: 0 + +DROP TABLE doubles; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/array_agg.sql b/tests/cases/standalone/common/aggregate/array_agg.sql new file mode 100644 index 0000000000..dedabf1a18 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/array_agg.sql @@ -0,0 +1,56 @@ +-- Migrated from DuckDB test style: test array aggregation +-- Test ARRAY_AGG function + +-- Test with integers +CREATE TABLE integers(i INTEGER, g INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO integers VALUES (1, 1, 1000), (2, 1, 2000), (3, 1, 3000), (4, 2, 4000), (5, 2, 5000); + +-- Basic array aggregation +SELECT array_agg(i) FROM integers; + +-- Array aggregation with GROUP BY +SELECT g, array_agg(i) FROM integers GROUP BY g ORDER BY g; + +-- Test with ORDER BY +SELECT array_agg(i ORDER BY i DESC) FROM integers; + +SELECT g, array_agg(i ORDER BY i DESC) FROM integers GROUP BY g ORDER BY g; + +-- Test with strings +CREATE TABLE strings(s VARCHAR, g INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO strings VALUES + ('apple', 1, 1000), ('banana', 1, 2000), ('cherry', 2, 3000), + ('date', 2, 4000), ('elderberry', 1, 5000); + +SELECT array_agg(s) FROM strings; + +SELECT g, array_agg(s ORDER BY s) FROM strings GROUP BY g ORDER BY g; + +-- Test with NULL values +INSERT INTO strings VALUES (NULL, 1, 6000), ('fig', NULL, 7000); + +SELECT array_agg(s) FROM strings WHERE s IS NOT NULL; + +SELECT g, array_agg(s) FROM strings WHERE g IS NOT NULL GROUP BY g ORDER BY g; + +-- Test with DISTINCT +SELECT array_agg(DISTINCT s ORDER BY s) FROM strings WHERE s IS NOT NULL; + +-- Test empty result +SELECT array_agg(i) FROM integers WHERE i > 100; + +-- Test with doubles +CREATE TABLE doubles(d DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO doubles VALUES (1.1, 1000), (2.2, 2000), (3.3, 3000), (4.4, 4000); + +SELECT array_agg(d ORDER BY d) FROM doubles; + +-- cleanup +DROP TABLE integers; + +DROP TABLE strings; + +DROP TABLE doubles; diff --git a/tests/cases/standalone/common/aggregate/avg.result b/tests/cases/standalone/common/aggregate/avg.result new file mode 100644 index 0000000000..e9d06efae2 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/avg.result @@ -0,0 +1,102 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_avg.test +-- Test AVG aggregate function +-- scalar average +SELECT AVG(3); + ++---------------+ +| avg(Int64(3)) | ++---------------+ +| 3.0 | ++---------------+ + +-- FIXME(dennis): unsupported type +-- SELECT AVG(NULL); +SELECT AVG(3::SMALLINT), AVG(NULL::SMALLINT); + ++---------------+-----------+ +| avg(Int64(3)) | avg(NULL) | ++---------------+-----------+ +| 3.0 | | ++---------------+-----------+ + +SELECT AVG(3::DOUBLE), AVG(NULL::DOUBLE); + ++---------------+-----------+ +| avg(Int64(3)) | avg(NULL) | ++---------------+-----------+ +| 3.0 | | ++---------------+-----------+ + +-- test average with table +CREATE TABLE integers(i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO integers VALUES (1, 1000), (2, 2000), (3, 3000); + +Affected Rows: 3 + +SELECT AVG(i), AVG(1), AVG(DISTINCT i), AVG(NULL) FROM integers; + +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Execution error: Function 'avg' user-defined coercion failed with "Error during planning: The function \"avg\" does not support inputs of type Null." No function matches the given name and argument types 'avg(Null)'. You might need to add explicit type casts. + Candidate functions: + avg(UserDefined) + +SELECT AVG(i) FROM integers WHERE i > 100; + ++-----------------+ +| avg(integers.i) | ++-----------------+ +| | ++-----------------+ + +-- empty average +CREATE TABLE vals(i INTEGER, j DOUBLE, k BIGINT, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO vals VALUES (NULL, NULL, NULL, 1000); + +Affected Rows: 1 + +SELECT AVG(i), AVG(j), AVG(k) FROM vals; + ++-------------+-------------+-------------+ +| avg(vals.i) | avg(vals.j) | avg(vals.k) | ++-------------+-------------+-------------+ +| | | | ++-------------+-------------+-------------+ + +-- test with mixed values +DROP TABLE vals; + +Affected Rows: 0 + +CREATE TABLE vals(i INTEGER, j DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO vals VALUES (1, 1.5, 1000), (2, 2.5, 2000), (3, 3.5, 3000), (NULL, NULL, 4000); + +Affected Rows: 4 + +SELECT AVG(i), AVG(j) FROM vals; + ++-------------+-------------+ +| avg(vals.i) | avg(vals.j) | ++-------------+-------------+ +| 2.0 | 2.5 | ++-------------+-------------+ + +-- FIXME(dennis): AVG(DISTINCT) not supported +-- https://github.com/apache/datafusion/issues/2408 +-- SELECT AVG(DISTINCT i), AVG(DISTINCT j) FROM vals; +-- cleanup +DROP TABLE integers; + +Affected Rows: 0 + +DROP TABLE vals; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/avg.sql b/tests/cases/standalone/common/aggregate/avg.sql new file mode 100644 index 0000000000..cbb12edcbe --- /dev/null +++ b/tests/cases/standalone/common/aggregate/avg.sql @@ -0,0 +1,46 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_avg.test +-- Test AVG aggregate function + +-- scalar average +SELECT AVG(3); + +-- FIXME(dennis): unsupported type +-- SELECT AVG(NULL); + +SELECT AVG(3::SMALLINT), AVG(NULL::SMALLINT); + +SELECT AVG(3::DOUBLE), AVG(NULL::DOUBLE); + +-- test average with table +CREATE TABLE integers(i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO integers VALUES (1, 1000), (2, 2000), (3, 3000); + +SELECT AVG(i), AVG(1), AVG(DISTINCT i), AVG(NULL) FROM integers; + +SELECT AVG(i) FROM integers WHERE i > 100; + +-- empty average +CREATE TABLE vals(i INTEGER, j DOUBLE, k BIGINT, ts TIMESTAMP TIME INDEX); + +INSERT INTO vals VALUES (NULL, NULL, NULL, 1000); + +SELECT AVG(i), AVG(j), AVG(k) FROM vals; + +-- test with mixed values +DROP TABLE vals; + +CREATE TABLE vals(i INTEGER, j DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO vals VALUES (1, 1.5, 1000), (2, 2.5, 2000), (3, 3.5, 3000), (NULL, NULL, 4000); + +SELECT AVG(i), AVG(j) FROM vals; + +-- FIXME(dennis): AVG(DISTINCT) not supported +-- https://github.com/apache/datafusion/issues/2408 +-- SELECT AVG(DISTINCT i), AVG(DISTINCT j) FROM vals; + +-- cleanup +DROP TABLE integers; + +DROP TABLE vals; diff --git a/tests/cases/standalone/common/aggregate/bit_operations.result b/tests/cases/standalone/common/aggregate/bit_operations.result new file mode 100644 index 0000000000..657bd9edff --- /dev/null +++ b/tests/cases/standalone/common/aggregate/bit_operations.result @@ -0,0 +1,103 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_bit_*.test +-- Test bitwise aggregate operations +-- Test BIT_AND +CREATE TABLE bit_test(i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO bit_test VALUES + (7, 1000), -- 111 + (3, 2000), -- 011 + (5, 3000), -- 101 + (NULL, 4000); + +Affected Rows: 4 + +-- Should be 1 (001) +SELECT BIT_AND(i) FROM bit_test; + ++---------------------+ +| bit_and(bit_test.i) | ++---------------------+ +| 1 | ++---------------------+ + +-- Test BIT_OR +-- Should be 7 (111) +SELECT BIT_OR(i) FROM bit_test; + ++--------------------+ +| bit_or(bit_test.i) | ++--------------------+ +| 7 | ++--------------------+ + +-- Test BIT_XOR +-- Should be 1 (111 XOR 011 XOR 101) +SELECT BIT_XOR(i) FROM bit_test; + ++---------------------+ +| bit_xor(bit_test.i) | ++---------------------+ +| 1 | ++---------------------+ + +-- Test with groups +INSERT INTO bit_test VALUES (8, 5000), (12, 6000), (4, 7000); + +Affected Rows: 3 + +-- Create separate table for group testing +CREATE TABLE bit_groups(grp INTEGER, i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO bit_groups VALUES + (1, 7, 1000), (1, 3, 2000), (1, 5, 3000), + (2, 8, 4000), (2, 12, 5000), (2, 4, 6000); + +Affected Rows: 6 + +SELECT grp, BIT_AND(i), BIT_OR(i), BIT_XOR(i) FROM bit_groups GROUP BY grp ORDER BY grp; + ++-----+-----------------------+----------------------+-----------------------+ +| grp | bit_and(bit_groups.i) | bit_or(bit_groups.i) | bit_xor(bit_groups.i) | ++-----+-----------------------+----------------------+-----------------------+ +| 1 | 1 | 7 | 1 | +| 2 | 0 | 12 | 0 | ++-----+-----------------------+----------------------+-----------------------+ + +-- Test edge cases +-- NULL +SELECT BIT_AND(i) FROM bit_test WHERE i > 100; + ++---------------------+ +| bit_and(bit_test.i) | ++---------------------+ +| | ++---------------------+ + +SELECT BIT_OR(i) FROM bit_test WHERE i > 100; + ++--------------------+ +| bit_or(bit_test.i) | ++--------------------+ +| | ++--------------------+ + +SELECT BIT_XOR(i) FROM bit_test WHERE i > 100; + ++---------------------+ +| bit_xor(bit_test.i) | ++---------------------+ +| | ++---------------------+ + +DROP TABLE bit_test; + +Affected Rows: 0 + +DROP TABLE bit_groups; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/bit_operations.sql b/tests/cases/standalone/common/aggregate/bit_operations.sql new file mode 100644 index 0000000000..7c21cdbd6d --- /dev/null +++ b/tests/cases/standalone/common/aggregate/bit_operations.sql @@ -0,0 +1,46 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_bit_*.test +-- Test bitwise aggregate operations + +-- Test BIT_AND +CREATE TABLE bit_test(i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO bit_test VALUES + (7, 1000), -- 111 + (3, 2000), -- 011 + (5, 3000), -- 101 + (NULL, 4000); + +-- Should be 1 (001) +SELECT BIT_AND(i) FROM bit_test; + +-- Test BIT_OR +-- Should be 7 (111) +SELECT BIT_OR(i) FROM bit_test; + +-- Test BIT_XOR +-- Should be 1 (111 XOR 011 XOR 101) +SELECT BIT_XOR(i) FROM bit_test; + +-- Test with groups +INSERT INTO bit_test VALUES (8, 5000), (12, 6000), (4, 7000); + +-- Create separate table for group testing +CREATE TABLE bit_groups(grp INTEGER, i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO bit_groups VALUES + (1, 7, 1000), (1, 3, 2000), (1, 5, 3000), + (2, 8, 4000), (2, 12, 5000), (2, 4, 6000); + +SELECT grp, BIT_AND(i), BIT_OR(i), BIT_XOR(i) FROM bit_groups GROUP BY grp ORDER BY grp; + +-- Test edge cases +-- NULL +SELECT BIT_AND(i) FROM bit_test WHERE i > 100; + +SELECT BIT_OR(i) FROM bit_test WHERE i > 100; + +SELECT BIT_XOR(i) FROM bit_test WHERE i > 100; + +DROP TABLE bit_test; + +DROP TABLE bit_groups; diff --git a/tests/cases/standalone/common/aggregate/bool_agg.result b/tests/cases/standalone/common/aggregate/bool_agg.result new file mode 100644 index 0000000000..8979a23e86 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/bool_agg.result @@ -0,0 +1,178 @@ +-- Migrated from DuckDB test style: test boolean aggregation +-- Test BOOL_AND and BOOL_OR functions +-- Test with boolean values +CREATE TABLE bool_test(b BOOLEAN, g INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO bool_test VALUES + (true, 1, 1000), (true, 1, 2000), (true, 1, 3000), + (false, 2, 4000), (true, 2, 5000), (false, 2, 6000), + (NULL, 3, 7000), (true, 3, 8000); + +Affected Rows: 8 + +-- Test BOOL_AND (all values must be true) and BOOL_OR (any value can be true) + -- Should be true +SELECT bool_and(b) FROM bool_test WHERE g = 1; + ++-----------------------+ +| bool_and(bool_test.b) | ++-----------------------+ +| true | ++-----------------------+ + +-- Should be false +SELECT bool_and(b) FROM bool_test WHERE g = 2; + ++-----------------------+ +| bool_and(bool_test.b) | ++-----------------------+ +| false | ++-----------------------+ + + -- Should be true (NULL ignored) +SELECT bool_and(b) FROM bool_test WHERE g = 3; + ++-----------------------+ +| bool_and(bool_test.b) | ++-----------------------+ +| true | ++-----------------------+ + + -- Should be true +SELECT bool_or(b) FROM bool_test WHERE g = 1; + ++----------------------+ +| bool_or(bool_test.b) | ++----------------------+ +| true | ++----------------------+ + +-- Should be true +SELECT bool_or(b) FROM bool_test WHERE g = 2; + ++----------------------+ +| bool_or(bool_test.b) | ++----------------------+ +| true | ++----------------------+ + +-- Should be true +SELECT bool_or(b) FROM bool_test WHERE g = 3; + ++----------------------+ +| bool_or(bool_test.b) | ++----------------------+ +| true | ++----------------------+ + +-- Test with GROUP BY +SELECT g, bool_and(b), bool_or(b) FROM bool_test GROUP BY g ORDER BY g; + ++---+-----------------------+----------------------+ +| g | bool_and(bool_test.b) | bool_or(bool_test.b) | ++---+-----------------------+----------------------+ +| 1 | true | true | +| 2 | false | true | +| 3 | true | true | ++---+-----------------------+----------------------+ + +-- Test all true values +CREATE TABLE all_true(b BOOLEAN, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO all_true VALUES (true, 1000), (true, 2000), (true, 3000); + +Affected Rows: 3 + +SELECT bool_and(b), bool_or(b) FROM all_true; + ++----------------------+---------------------+ +| bool_and(all_true.b) | bool_or(all_true.b) | ++----------------------+---------------------+ +| true | true | ++----------------------+---------------------+ + +-- Test all false values +CREATE TABLE all_false(b BOOLEAN, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO all_false VALUES (false, 1000), (false, 2000), (false, 3000); + +Affected Rows: 3 + +SELECT bool_and(b), bool_or(b) FROM all_false; + ++-----------------------+----------------------+ +| bool_and(all_false.b) | bool_or(all_false.b) | ++-----------------------+----------------------+ +| false | false | ++-----------------------+----------------------+ + +-- Test all NULL values +CREATE TABLE all_null(b BOOLEAN, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO all_null VALUES (NULL, 1000), (NULL, 2000), (NULL, 3000); + +Affected Rows: 3 + +SELECT bool_and(b), bool_or(b) FROM all_null; + ++----------------------+---------------------+ +| bool_and(all_null.b) | bool_or(all_null.b) | ++----------------------+---------------------+ +| | | ++----------------------+---------------------+ + +-- Test empty result +SELECT bool_and(b), bool_or(b) FROM bool_test WHERE g > 100; + ++-----------------------+----------------------+ +| bool_and(bool_test.b) | bool_or(bool_test.b) | ++-----------------------+----------------------+ +| | | ++-----------------------+----------------------+ + +-- Test with integer expressions (converted to boolean) +CREATE TABLE int_test(i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO int_test VALUES (0, 1000), (1, 2000), (2, 3000), (NULL, 4000); + +Affected Rows: 4 + +SELECT bool_and(i > 0), bool_or(i > 1) FROM int_test; + ++---------------------------------+--------------------------------+ +| bool_and(int_test.i > Int64(0)) | bool_or(int_test.i > Int64(1)) | ++---------------------------------+--------------------------------+ +| false | true | ++---------------------------------+--------------------------------+ + +-- cleanup +DROP TABLE bool_test; + +Affected Rows: 0 + +DROP TABLE all_true; + +Affected Rows: 0 + +DROP TABLE all_false; + +Affected Rows: 0 + +DROP TABLE all_null; + +Affected Rows: 0 + +DROP TABLE int_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/bool_agg.sql b/tests/cases/standalone/common/aggregate/bool_agg.sql new file mode 100644 index 0000000000..5f916ae608 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/bool_agg.sql @@ -0,0 +1,74 @@ +-- Migrated from DuckDB test style: test boolean aggregation +-- Test BOOL_AND and BOOL_OR functions + +-- Test with boolean values +CREATE TABLE bool_test(b BOOLEAN, g INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO bool_test VALUES + (true, 1, 1000), (true, 1, 2000), (true, 1, 3000), + (false, 2, 4000), (true, 2, 5000), (false, 2, 6000), + (NULL, 3, 7000), (true, 3, 8000); + +-- Test BOOL_AND (all values must be true) and BOOL_OR (any value can be true) + -- Should be true +SELECT bool_and(b) FROM bool_test WHERE g = 1; + +-- Should be false +SELECT bool_and(b) FROM bool_test WHERE g = 2; + + -- Should be true (NULL ignored) +SELECT bool_and(b) FROM bool_test WHERE g = 3; + + -- Should be true +SELECT bool_or(b) FROM bool_test WHERE g = 1; + +-- Should be true +SELECT bool_or(b) FROM bool_test WHERE g = 2; + +-- Should be true +SELECT bool_or(b) FROM bool_test WHERE g = 3; + +-- Test with GROUP BY +SELECT g, bool_and(b), bool_or(b) FROM bool_test GROUP BY g ORDER BY g; + +-- Test all true values +CREATE TABLE all_true(b BOOLEAN, ts TIMESTAMP TIME INDEX); + +INSERT INTO all_true VALUES (true, 1000), (true, 2000), (true, 3000); + +SELECT bool_and(b), bool_or(b) FROM all_true; + +-- Test all false values +CREATE TABLE all_false(b BOOLEAN, ts TIMESTAMP TIME INDEX); + +INSERT INTO all_false VALUES (false, 1000), (false, 2000), (false, 3000); + +SELECT bool_and(b), bool_or(b) FROM all_false; + +-- Test all NULL values +CREATE TABLE all_null(b BOOLEAN, ts TIMESTAMP TIME INDEX); + +INSERT INTO all_null VALUES (NULL, 1000), (NULL, 2000), (NULL, 3000); + +SELECT bool_and(b), bool_or(b) FROM all_null; + +-- Test empty result +SELECT bool_and(b), bool_or(b) FROM bool_test WHERE g > 100; + +-- Test with integer expressions (converted to boolean) +CREATE TABLE int_test(i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO int_test VALUES (0, 1000), (1, 2000), (2, 3000), (NULL, 4000); + +SELECT bool_and(i > 0), bool_or(i > 1) FROM int_test; + +-- cleanup +DROP TABLE bool_test; + +DROP TABLE all_true; + +DROP TABLE all_false; + +DROP TABLE all_null; + +DROP TABLE int_test; diff --git a/tests/cases/standalone/common/aggregate/corr.result b/tests/cases/standalone/common/aggregate/corr.result new file mode 100644 index 0000000000..7099c7a85e --- /dev/null +++ b/tests/cases/standalone/common/aggregate/corr.result @@ -0,0 +1,100 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_corr.test +-- Test CORR operator (correlation coefficient) +-- Corner cases +SELECT corr(NULL,NULL); + ++-----------------+ +| corr(NULL,NULL) | ++-----------------+ +| | ++-----------------+ + +-- Single value returns NULL +-- FIXME(dennis): datafusion returns 0.0 here, should be NULL +SELECT corr(1,1); + ++-------------------------+ +| corr(Int64(1),Int64(1)) | ++-------------------------+ +| 0.0 | ++-------------------------+ + +-- Test with table +CREATE TABLE aggr(k INT, v DECIMAL(10,2), v2 DECIMAL(10, 2), ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO aggr VALUES + (1, 10, null, 1000), + (2, 10, 11, 2000), + (2, 20, 22, 3000), + (2, 25, null, 4000), + (2, 30, 35, 5000); + +Affected Rows: 5 + +SELECT k, corr(v, v2) FROM aggr GROUP BY k ORDER BY k; + ++---+----------------------+ +| k | corr(aggr.v,aggr.v2) | ++---+----------------------+ +| 1 | | +| 2 | 0.9988445981121536 | ++---+----------------------+ + +SELECT corr(v, v2) FROM aggr; + ++----------------------+ +| corr(aggr.v,aggr.v2) | ++----------------------+ +| 0.9988445981121532 | ++----------------------+ + +-- Test with integer values +CREATE TABLE corr_test(x INTEGER, y INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO corr_test VALUES + (1, 2, 1000), + (2, 4, 2000), + (3, 6, 3000), + (4, 8, 4000), + (5, 10, 5000); + +Affected Rows: 5 + +-- Perfect positive correlation +SELECT corr(x, y) FROM corr_test; + ++-------------------------------+ +| corr(corr_test.x,corr_test.y) | ++-------------------------------+ +| 0.9999999999999999 | ++-------------------------------+ + +-- Test with negative correlation +INSERT INTO corr_test VALUES + (6, 5, 6000), + (7, 3, 7000), + (8, 1, 8000); + +Affected Rows: 3 + +SELECT corr(x, y) FROM corr_test; + ++-------------------------------+ +| corr(corr_test.x,corr_test.y) | ++-------------------------------+ +| -0.12452312927991684 | ++-------------------------------+ + +-- cleanup +DROP TABLE aggr; + +Affected Rows: 0 + +DROP TABLE corr_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/corr.sql b/tests/cases/standalone/common/aggregate/corr.sql new file mode 100644 index 0000000000..d22715337a --- /dev/null +++ b/tests/cases/standalone/common/aggregate/corr.sql @@ -0,0 +1,49 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_corr.test +-- Test CORR operator (correlation coefficient) + +-- Corner cases +SELECT corr(NULL,NULL); + +-- Single value returns NULL +-- FIXME(dennis): datafusion returns 0.0 here, should be NULL +SELECT corr(1,1); + +-- Test with table +CREATE TABLE aggr(k INT, v DECIMAL(10,2), v2 DECIMAL(10, 2), ts TIMESTAMP TIME INDEX); + +INSERT INTO aggr VALUES + (1, 10, null, 1000), + (2, 10, 11, 2000), + (2, 20, 22, 3000), + (2, 25, null, 4000), + (2, 30, 35, 5000); + +SELECT k, corr(v, v2) FROM aggr GROUP BY k ORDER BY k; + +SELECT corr(v, v2) FROM aggr; + +-- Test with integer values +CREATE TABLE corr_test(x INTEGER, y INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO corr_test VALUES + (1, 2, 1000), + (2, 4, 2000), + (3, 6, 3000), + (4, 8, 4000), + (5, 10, 5000); + +-- Perfect positive correlation +SELECT corr(x, y) FROM corr_test; + +-- Test with negative correlation +INSERT INTO corr_test VALUES + (6, 5, 6000), + (7, 3, 7000), + (8, 1, 8000); + +SELECT corr(x, y) FROM corr_test; + +-- cleanup +DROP TABLE aggr; + +DROP TABLE corr_test; diff --git a/tests/cases/standalone/common/aggregate/covar.result b/tests/cases/standalone/common/aggregate/covar.result new file mode 100644 index 0000000000..07760ae2e2 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/covar.result @@ -0,0 +1,84 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_covar.test +-- Test COVAR operators (covariance) +-- Test population covariance on scalar values +SELECT COVAR_POP(3,3), COVAR_POP(NULL,3), COVAR_POP(3,NULL), COVAR_POP(NULL,NULL); + ++------------------------------+--------------------------+--------------------------+----------------------+ +| covar_pop(Int64(3),Int64(3)) | covar_pop(NULL,Int64(3)) | covar_pop(Int64(3),NULL) | covar_pop(NULL,NULL) | ++------------------------------+--------------------------+--------------------------+----------------------+ +| 0.0 | | | | ++------------------------------+--------------------------+--------------------------+----------------------+ + +-- Test sample covariance on scalar values +SELECT COVAR_SAMP(3,3), COVAR_SAMP(NULL,3), COVAR_SAMP(3,NULL), COVAR_SAMP(NULL,NULL); + ++-------------------------------+---------------------------+---------------------------+-----------------------+ +| covar_samp(Int64(3),Int64(3)) | covar_samp(NULL,Int64(3)) | covar_samp(Int64(3),NULL) | covar_samp(NULL,NULL) | ++-------------------------------+---------------------------+---------------------------+-----------------------+ +| | | | | ++-------------------------------+---------------------------+---------------------------+-----------------------+ + +-- Test population covariance on a set of values +CREATE TABLE integers(x INTEGER, y INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO integers VALUES + (10, NULL, 1000), + (10, 11, 2000), + (20, 22, 3000), + (25, NULL, 4000), + (30, 35, 5000); + +Affected Rows: 5 + +SELECT COVAR_POP(x,y), COVAR_POP(x,1), COVAR_POP(1,y), COVAR_POP(x,NULL), COVAR_POP(NULL,y) FROM integers; + ++----------------------------------+--------------------------------+--------------------------------+----------------------------+----------------------------+ +| covar_pop(integers.x,integers.y) | covar_pop(integers.x,Int64(1)) | covar_pop(Int64(1),integers.y) | covar_pop(integers.x,NULL) | covar_pop(NULL,integers.y) | ++----------------------------------+--------------------------------+--------------------------------+----------------------------+----------------------------+ +| 79.99999999999999 | 0.0 | 0.0 | | | ++----------------------------------+--------------------------------+--------------------------------+----------------------------+----------------------------+ + +-- Test sample covariance +SELECT COVAR_SAMP(x,y), COVAR_SAMP(x,1), COVAR_SAMP(1,y) FROM integers; + ++-----------------------------------+---------------------------------+---------------------------------+ +| covar_samp(integers.x,integers.y) | covar_samp(integers.x,Int64(1)) | covar_samp(Int64(1),integers.y) | ++-----------------------------------+---------------------------------+---------------------------------+ +| 119.99999999999999 | 0.0 | 0.0 | ++-----------------------------------+---------------------------------+---------------------------------+ + +-- Test grouped covariance +CREATE TABLE covar_data(grp INTEGER, x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO covar_data VALUES + (1, 1.0, 2.0, 1000), + (1, 2.0, 4.0, 2000), + (1, 3.0, 6.0, 3000), + (2, 10.0, 5.0, 4000), + (2, 20.0, 10.0, 5000), + (2, 30.0, 15.0, 6000); + +Affected Rows: 6 + +SELECT grp, COVAR_POP(x, y), COVAR_SAMP(x, y) FROM covar_data GROUP BY grp ORDER BY grp; + ++-----+--------------------------------------+---------------------------------------+ +| grp | covar_pop(covar_data.x,covar_data.y) | covar_samp(covar_data.x,covar_data.y) | ++-----+--------------------------------------+---------------------------------------+ +| 1 | 1.3333333333333333 | 2.0 | +| 2 | 33.333333333333336 | 50.0 | ++-----+--------------------------------------+---------------------------------------+ + +-- cleanup +DROP TABLE integers; + +Affected Rows: 0 + +DROP TABLE covar_data; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/covar.sql b/tests/cases/standalone/common/aggregate/covar.sql new file mode 100644 index 0000000000..335a81173a --- /dev/null +++ b/tests/cases/standalone/common/aggregate/covar.sql @@ -0,0 +1,41 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_covar.test +-- Test COVAR operators (covariance) + +-- Test population covariance on scalar values +SELECT COVAR_POP(3,3), COVAR_POP(NULL,3), COVAR_POP(3,NULL), COVAR_POP(NULL,NULL); + +-- Test sample covariance on scalar values +SELECT COVAR_SAMP(3,3), COVAR_SAMP(NULL,3), COVAR_SAMP(3,NULL), COVAR_SAMP(NULL,NULL); + +-- Test population covariance on a set of values +CREATE TABLE integers(x INTEGER, y INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO integers VALUES + (10, NULL, 1000), + (10, 11, 2000), + (20, 22, 3000), + (25, NULL, 4000), + (30, 35, 5000); + +SELECT COVAR_POP(x,y), COVAR_POP(x,1), COVAR_POP(1,y), COVAR_POP(x,NULL), COVAR_POP(NULL,y) FROM integers; + +-- Test sample covariance +SELECT COVAR_SAMP(x,y), COVAR_SAMP(x,1), COVAR_SAMP(1,y) FROM integers; + +-- Test grouped covariance +CREATE TABLE covar_data(grp INTEGER, x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO covar_data VALUES + (1, 1.0, 2.0, 1000), + (1, 2.0, 4.0, 2000), + (1, 3.0, 6.0, 3000), + (2, 10.0, 5.0, 4000), + (2, 20.0, 10.0, 5000), + (2, 30.0, 15.0, 6000); + +SELECT grp, COVAR_POP(x, y), COVAR_SAMP(x, y) FROM covar_data GROUP BY grp ORDER BY grp; + +-- cleanup +DROP TABLE integers; + +DROP TABLE covar_data; diff --git a/tests/cases/standalone/common/aggregate/first_last.result b/tests/cases/standalone/common/aggregate/first_last.result new file mode 100644 index 0000000000..4a0029eb93 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/first_last.result @@ -0,0 +1,255 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_last.test +-- Test FIRST and LAST aggregate functions +-- Test with integers +CREATE TABLE five(i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO five VALUES (1, 1000), (2, 2000), (3, 3000), (4, 4000), (5, 5000); + +Affected Rows: 5 + +SELECT last_value(i) FROM five; + ++--------------------+ +| last_value(five.i) | ++--------------------+ +| 5 | ++--------------------+ + +SELECT first_value(i) FROM five; + ++---------------------+ +| first_value(five.i) | ++---------------------+ +| 1 | ++---------------------+ + +SELECT i % 3 AS g, last_value(i) FROM five GROUP BY g ORDER BY g; + ++---+--------------------+ +| g | last_value(five.i) | ++---+--------------------+ +| 0 | 3 | +| 1 | 4 | +| 2 | 5 | ++---+--------------------+ + +SELECT i % 3 AS g, first_value(i) FROM five GROUP BY g ORDER BY g; + ++---+---------------------+ +| g | first_value(five.i) | ++---+---------------------+ +| 0 | 3 | +| 1 | 1 | +| 2 | 2 | ++---+---------------------+ + +-- Test with ORDER BY +SELECT last_value(i ORDER BY i DESC) FROM five; + ++-------------------------------------------------------+ +| last_value(five.i) ORDER BY [five.i DESC NULLS FIRST] | ++-------------------------------------------------------+ +| 1 | ++-------------------------------------------------------+ + +SELECT first_value(i ORDER BY i DESC) FROM five; + ++--------------------------------------------------------+ +| first_value(five.i) ORDER BY [five.i DESC NULLS FIRST] | ++--------------------------------------------------------+ +| 5 | ++--------------------------------------------------------+ + +SELECT i % 3 AS g, last_value(i ORDER BY i DESC) FROM five GROUP BY g ORDER BY g; + ++---+-------------------------------------------------------+ +| g | last_value(five.i) ORDER BY [five.i DESC NULLS FIRST] | ++---+-------------------------------------------------------+ +| 0 | 3 | +| 1 | 1 | +| 2 | 2 | ++---+-------------------------------------------------------+ + +SELECT i % 3 AS g, first_value(i ORDER BY i DESC) FROM five GROUP BY g ORDER BY g; + ++---+--------------------------------------------------------+ +| g | first_value(five.i) ORDER BY [five.i DESC NULLS FIRST] | ++---+--------------------------------------------------------+ +| 0 | 3 | +| 1 | 4 | +| 2 | 5 | ++---+--------------------------------------------------------+ + +-- Test with strings +CREATE TABLE strings(s VARCHAR, g INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO strings VALUES ('apple', 1, 1000), ('banana', 1, 2000), ('cherry', 2, 3000), ('date', 2, 4000), ('elderberry', 3, 5000); + +Affected Rows: 5 + +SELECT last_value(s) FROM strings; + ++-----------------------+ +| last_value(strings.s) | ++-----------------------+ +| elderberry | ++-----------------------+ + +SELECT first_value(s) FROM strings; + ++------------------------+ +| first_value(strings.s) | ++------------------------+ +| apple | ++------------------------+ + +SELECT g, last_value(s) FROM strings GROUP BY g ORDER BY g; + ++---+-----------------------+ +| g | last_value(strings.s) | ++---+-----------------------+ +| 1 | banana | +| 2 | date | +| 3 | elderberry | ++---+-----------------------+ + +SELECT g, first_value(s) FROM strings GROUP BY g ORDER BY g; + ++---+------------------------+ +| g | first_value(strings.s) | ++---+------------------------+ +| 1 | apple | +| 2 | cherry | +| 3 | elderberry | ++---+------------------------+ + +-- Test with ORDER BY on strings +SELECT last_value(s ORDER BY s) FROM strings; + ++-----------------------------------------------------------+ +| last_value(strings.s) ORDER BY [strings.s ASC NULLS LAST] | ++-----------------------------------------------------------+ +| elderberry | ++-----------------------------------------------------------+ + +SELECT first_value(s ORDER BY s) FROM strings; + ++------------------------------------------------------------+ +| first_value(strings.s) ORDER BY [strings.s ASC NULLS LAST] | ++------------------------------------------------------------+ +| apple | ++------------------------------------------------------------+ + +SELECT g, last_value(s ORDER BY s) FROM strings GROUP BY g ORDER BY g; + ++---+-----------------------------------------------------------+ +| g | last_value(strings.s) ORDER BY [strings.s ASC NULLS LAST] | ++---+-----------------------------------------------------------+ +| 1 | banana | +| 2 | date | +| 3 | elderberry | ++---+-----------------------------------------------------------+ + +SELECT g, first_value(s ORDER BY s) FROM strings GROUP BY g ORDER BY g; + ++---+------------------------------------------------------------+ +| g | first_value(strings.s) ORDER BY [strings.s ASC NULLS LAST] | ++---+------------------------------------------------------------+ +| 1 | apple | +| 2 | cherry | +| 3 | elderberry | ++---+------------------------------------------------------------+ + +-- Test with NULL values +INSERT INTO strings VALUES (NULL, 1, 6000), ('fig', NULL, 7000); + +Affected Rows: 2 + +SELECT last_value(s) FROM strings; + ++-----------------------+ +| last_value(strings.s) | ++-----------------------+ +| fig | ++-----------------------+ + +SELECT first_value(s) FROM strings; + ++------------------------+ +| first_value(strings.s) | ++------------------------+ +| apple | ++------------------------+ + +SELECT g, last_value(s) FROM strings WHERE g IS NOT NULL GROUP BY g ORDER BY g; + ++---+-----------------------+ +| g | last_value(strings.s) | ++---+-----------------------+ +| 1 | | +| 2 | date | +| 3 | elderberry | ++---+-----------------------+ + +-- Test with dates +CREATE TABLE dates(d DATE, i INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO dates VALUES ('2021-08-20', 1, 1000), ('2021-08-21', 2, 2000), ('2021-08-22', 3, 3000), ('2021-08-23', 4, 4000), ('2021-08-24', 5, 5000); + +Affected Rows: 5 + +SELECT last_value(d) FROM dates; + ++---------------------+ +| last_value(dates.d) | ++---------------------+ +| 2021-08-24 | ++---------------------+ + +SELECT first_value(d) FROM dates; + ++----------------------+ +| first_value(dates.d) | ++----------------------+ +| 2021-08-20 | ++----------------------+ + +SELECT i % 3 AS g, last_value(d) FROM dates GROUP BY g ORDER BY g; + ++---+---------------------+ +| g | last_value(dates.d) | ++---+---------------------+ +| 0 | 2021-08-22 | +| 1 | 2021-08-23 | +| 2 | 2021-08-24 | ++---+---------------------+ + +SELECT i % 3 AS g, first_value(d) FROM dates GROUP BY g ORDER BY g; + ++---+----------------------+ +| g | first_value(dates.d) | ++---+----------------------+ +| 0 | 2021-08-22 | +| 1 | 2021-08-20 | +| 2 | 2021-08-21 | ++---+----------------------+ + +-- cleanup +DROP TABLE five; + +Affected Rows: 0 + +DROP TABLE strings; + +Affected Rows: 0 + +DROP TABLE dates; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/first_last.sql b/tests/cases/standalone/common/aggregate/first_last.sql new file mode 100644 index 0000000000..55f07ad603 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/first_last.sql @@ -0,0 +1,75 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_last.test +-- Test FIRST and LAST aggregate functions + +-- Test with integers +CREATE TABLE five(i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO five VALUES (1, 1000), (2, 2000), (3, 3000), (4, 4000), (5, 5000); + +SELECT last_value(i) FROM five; + +SELECT first_value(i) FROM five; + +SELECT i % 3 AS g, last_value(i) FROM five GROUP BY g ORDER BY g; + +SELECT i % 3 AS g, first_value(i) FROM five GROUP BY g ORDER BY g; + +-- Test with ORDER BY +SELECT last_value(i ORDER BY i DESC) FROM five; + +SELECT first_value(i ORDER BY i DESC) FROM five; + +SELECT i % 3 AS g, last_value(i ORDER BY i DESC) FROM five GROUP BY g ORDER BY g; + +SELECT i % 3 AS g, first_value(i ORDER BY i DESC) FROM five GROUP BY g ORDER BY g; + +-- Test with strings +CREATE TABLE strings(s VARCHAR, g INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO strings VALUES ('apple', 1, 1000), ('banana', 1, 2000), ('cherry', 2, 3000), ('date', 2, 4000), ('elderberry', 3, 5000); + +SELECT last_value(s) FROM strings; + +SELECT first_value(s) FROM strings; + +SELECT g, last_value(s) FROM strings GROUP BY g ORDER BY g; + +SELECT g, first_value(s) FROM strings GROUP BY g ORDER BY g; + +-- Test with ORDER BY on strings +SELECT last_value(s ORDER BY s) FROM strings; + +SELECT first_value(s ORDER BY s) FROM strings; + +SELECT g, last_value(s ORDER BY s) FROM strings GROUP BY g ORDER BY g; + +SELECT g, first_value(s ORDER BY s) FROM strings GROUP BY g ORDER BY g; + +-- Test with NULL values +INSERT INTO strings VALUES (NULL, 1, 6000), ('fig', NULL, 7000); + +SELECT last_value(s) FROM strings; + +SELECT first_value(s) FROM strings; + +SELECT g, last_value(s) FROM strings WHERE g IS NOT NULL GROUP BY g ORDER BY g; + +-- Test with dates +CREATE TABLE dates(d DATE, i INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO dates VALUES ('2021-08-20', 1, 1000), ('2021-08-21', 2, 2000), ('2021-08-22', 3, 3000), ('2021-08-23', 4, 4000), ('2021-08-24', 5, 5000); + +SELECT last_value(d) FROM dates; + +SELECT first_value(d) FROM dates; + +SELECT i % 3 AS g, last_value(d) FROM dates GROUP BY g ORDER BY g; + +SELECT i % 3 AS g, first_value(d) FROM dates GROUP BY g ORDER BY g; + +-- cleanup +DROP TABLE five; + +DROP TABLE strings; + +DROP TABLE dates; diff --git a/tests/cases/standalone/common/aggregate/median.result b/tests/cases/standalone/common/aggregate/median.result new file mode 100644 index 0000000000..98af8787cc --- /dev/null +++ b/tests/cases/standalone/common/aggregate/median.result @@ -0,0 +1,119 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_median.test +-- Test MEDIAN aggregate +-- scalar median +SELECT median(NULL), median(1); + ++--------------+------------------+ +| median(NULL) | median(Int64(1)) | ++--------------+------------------+ +| | 1 | ++--------------+------------------+ + +-- test with simple table +CREATE TABLE quantile(r INTEGER, v DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO quantile VALUES + (0, 0.1, 1000), (1, 0.2, 2000), (2, 0.3, 3000), (3, 0.4, 4000), (4, 0.5, 5000), + (5, 0.6, 6000), (6, 0.7, 7000), (7, 0.8, 8000), (8, 0.9, 9000), (9, 1.0, 10000), + (NULL, 0.1, 11000), (NULL, 0.5, 12000), (NULL, 0.9, 13000); + +Affected Rows: 13 + +SELECT median(r)::VARCHAR FROM quantile; + ++--------------------+ +| median(quantile.r) | ++--------------------+ +| 4 | ++--------------------+ + +SELECT median(r::FLOAT)::VARCHAR FROM quantile; + ++--------------------+ +| median(quantile.r) | ++--------------------+ +| 4.5 | ++--------------------+ + +SELECT median(r::DOUBLE)::VARCHAR FROM quantile; + ++--------------------+ +| median(quantile.r) | ++--------------------+ +| 4.5 | ++--------------------+ + +SELECT median(r::SMALLINT)::VARCHAR FROM quantile WHERE r < 100; + ++--------------------+ +| median(quantile.r) | ++--------------------+ +| 4 | ++--------------------+ + +SELECT median(r::INTEGER)::VARCHAR FROM quantile; + ++--------------------+ +| median(quantile.r) | ++--------------------+ +| 4 | ++--------------------+ + +SELECT median(r::BIGINT)::VARCHAR FROM quantile; + ++--------------------+ +| median(quantile.r) | ++--------------------+ +| 4 | ++--------------------+ + +-- test with NULL values +SELECT median(NULL) FROM quantile; + ++--------------+ +| median(NULL) | ++--------------+ +| | ++--------------+ + +SELECT median(42) FROM quantile; + ++-------------------+ +| median(Int64(42)) | ++-------------------+ +| 42 | ++-------------------+ + +-- test with grouped data +CREATE TABLE median_groups(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO median_groups VALUES + (1, 1, 1000), (2, 1, 2000), (3, 1, 3000), (4, 1, 4000), (5, 1, 5000), + (10, 2, 6000), (20, 2, 7000), (30, 2, 8000), (40, 2, 9000), (50, 2, 10000), + (NULL, 3, 11000); + +Affected Rows: 11 + +SELECT grp, median(val) FROM median_groups GROUP BY grp ORDER BY grp; + ++-----+---------------------------+ +| grp | median(median_groups.val) | ++-----+---------------------------+ +| 1 | 3 | +| 2 | 30 | +| 3 | | ++-----+---------------------------+ + +-- cleanup +DROP TABLE quantile; + +Affected Rows: 0 + +DROP TABLE median_groups; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/median.sql b/tests/cases/standalone/common/aggregate/median.sql new file mode 100644 index 0000000000..97f88b00e8 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/median.sql @@ -0,0 +1,45 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_median.test +-- Test MEDIAN aggregate + +-- scalar median +SELECT median(NULL), median(1); + +-- test with simple table +CREATE TABLE quantile(r INTEGER, v DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO quantile VALUES + (0, 0.1, 1000), (1, 0.2, 2000), (2, 0.3, 3000), (3, 0.4, 4000), (4, 0.5, 5000), + (5, 0.6, 6000), (6, 0.7, 7000), (7, 0.8, 8000), (8, 0.9, 9000), (9, 1.0, 10000), + (NULL, 0.1, 11000), (NULL, 0.5, 12000), (NULL, 0.9, 13000); + +SELECT median(r)::VARCHAR FROM quantile; + +SELECT median(r::FLOAT)::VARCHAR FROM quantile; + +SELECT median(r::DOUBLE)::VARCHAR FROM quantile; + +SELECT median(r::SMALLINT)::VARCHAR FROM quantile WHERE r < 100; + +SELECT median(r::INTEGER)::VARCHAR FROM quantile; + +SELECT median(r::BIGINT)::VARCHAR FROM quantile; + +-- test with NULL values +SELECT median(NULL) FROM quantile; + +SELECT median(42) FROM quantile; + +-- test with grouped data +CREATE TABLE median_groups(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO median_groups VALUES + (1, 1, 1000), (2, 1, 2000), (3, 1, 3000), (4, 1, 4000), (5, 1, 5000), + (10, 2, 6000), (20, 2, 7000), (30, 2, 8000), (40, 2, 9000), (50, 2, 10000), + (NULL, 3, 11000); + +SELECT grp, median(val) FROM median_groups GROUP BY grp ORDER BY grp; + +-- cleanup +DROP TABLE quantile; + +DROP TABLE median_groups; diff --git a/tests/cases/standalone/common/aggregate/min_max.result b/tests/cases/standalone/common/aggregate/min_max.result new file mode 100644 index 0000000000..b04fc54182 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/min_max.result @@ -0,0 +1,151 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_aggregate_types.test +-- Test MIN/MAX aggregate functions +-- Test with strings +CREATE TABLE strings(s STRING, g INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO strings VALUES ('hello', 0, 1000), ('world', 1, 2000), (NULL, 0, 3000), ('r', 1, 4000); + +Affected Rows: 4 + +-- simple aggregates only +SELECT COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings; + ++----------+------------------+----------------+----------------+ +| count(*) | count(strings.s) | min(strings.s) | max(strings.s) | ++----------+------------------+----------------+----------------+ +| 4 | 3 | hello | world | ++----------+------------------+----------------+----------------+ + +SELECT COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings WHERE s IS NULL; + ++----------+------------------+----------------+----------------+ +| count(*) | count(strings.s) | min(strings.s) | max(strings.s) | ++----------+------------------+----------------+----------------+ +| 1 | 0 | | | ++----------+------------------+----------------+----------------+ + +-- grouped aggregates +SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings GROUP BY g ORDER BY g; + ++---+----------+------------------+----------------+----------------+ +| g | count(*) | count(strings.s) | min(strings.s) | max(strings.s) | ++---+----------+------------------+----------------+----------------+ +| 0 | 2 | 1 | hello | hello | +| 1 | 2 | 2 | r | world | ++---+----------+------------------+----------------+----------------+ + +-- empty group +SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings WHERE s IS NULL OR s <> 'hello' GROUP BY g ORDER BY g; + ++---+----------+------------------+----------------+----------------+ +| g | count(*) | count(strings.s) | min(strings.s) | max(strings.s) | ++---+----------+------------------+----------------+----------------+ +| 0 | 1 | 0 | | | +| 1 | 2 | 2 | r | world | ++---+----------+------------------+----------------+----------------+ + +-- Test with integers +CREATE TABLE integers(i INTEGER, g INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO integers VALUES (1, 0, 1000), (5, 1, 2000), (NULL, 0, 3000), (3, 1, 4000), (2, 0, 5000); + +Affected Rows: 5 + +SELECT MIN(i), MAX(i) FROM integers; + ++-----------------+-----------------+ +| min(integers.i) | max(integers.i) | ++-----------------+-----------------+ +| 1 | 5 | ++-----------------+-----------------+ + +SELECT g, MIN(i), MAX(i) FROM integers GROUP BY g ORDER BY g; + ++---+-----------------+-----------------+ +| g | min(integers.i) | max(integers.i) | ++---+-----------------+-----------------+ +| 0 | 1 | 2 | +| 1 | 3 | 5 | ++---+-----------------+-----------------+ + +-- Test with doubles +CREATE TABLE doubles(d DOUBLE, g INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO doubles VALUES (1.5, 0, 1000), (5.5, 1, 2000), (NULL, 0, 3000), (3.5, 1, 4000), (2.5, 0, 5000); + +Affected Rows: 5 + +SELECT MIN(d), MAX(d) FROM doubles; + ++----------------+----------------+ +| min(doubles.d) | max(doubles.d) | ++----------------+----------------+ +| 1.5 | 5.5 | ++----------------+----------------+ + +SELECT g, MIN(d), MAX(d) FROM doubles GROUP BY g ORDER BY g; + ++---+----------------+----------------+ +| g | min(doubles.d) | max(doubles.d) | ++---+----------------+----------------+ +| 0 | 1.5 | 2.5 | +| 1 | 3.5 | 5.5 | ++---+----------------+----------------+ + +-- Test with booleans +CREATE TABLE booleans(b BOOLEAN, g INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO booleans VALUES (false, 0, 1000), (true, 1, 2000), (NULL, 0, 3000), (false, 1, 4000); + +Affected Rows: 4 + +SELECT COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans; + ++----------+-------------------+-----------------+-----------------+ +| count(*) | count(booleans.b) | min(booleans.b) | max(booleans.b) | ++----------+-------------------+-----------------+-----------------+ +| 4 | 3 | false | true | ++----------+-------------------+-----------------+-----------------+ + +SELECT COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans WHERE b IS NULL; + ++----------+-------------------+-----------------+-----------------+ +| count(*) | count(booleans.b) | min(booleans.b) | max(booleans.b) | ++----------+-------------------+-----------------+-----------------+ +| 1 | 0 | | | ++----------+-------------------+-----------------+-----------------+ + +SELECT g, COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans GROUP BY g ORDER BY g; + ++---+----------+-------------------+-----------------+-----------------+ +| g | count(*) | count(booleans.b) | min(booleans.b) | max(booleans.b) | ++---+----------+-------------------+-----------------+-----------------+ +| 0 | 2 | 1 | false | false | +| 1 | 2 | 2 | false | true | ++---+----------+-------------------+-----------------+-----------------+ + +-- cleanup +DROP TABLE strings; + +Affected Rows: 0 + +DROP TABLE integers; + +Affected Rows: 0 + +DROP TABLE doubles; + +Affected Rows: 0 + +DROP TABLE booleans; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/min_max.sql b/tests/cases/standalone/common/aggregate/min_max.sql new file mode 100644 index 0000000000..dc037037af --- /dev/null +++ b/tests/cases/standalone/common/aggregate/min_max.sql @@ -0,0 +1,56 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_aggregate_types.test +-- Test MIN/MAX aggregate functions + +-- Test with strings +CREATE TABLE strings(s STRING, g INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO strings VALUES ('hello', 0, 1000), ('world', 1, 2000), (NULL, 0, 3000), ('r', 1, 4000); + +-- simple aggregates only +SELECT COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings; + +SELECT COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings WHERE s IS NULL; + +-- grouped aggregates +SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings GROUP BY g ORDER BY g; + +-- empty group +SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings WHERE s IS NULL OR s <> 'hello' GROUP BY g ORDER BY g; + +-- Test with integers +CREATE TABLE integers(i INTEGER, g INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO integers VALUES (1, 0, 1000), (5, 1, 2000), (NULL, 0, 3000), (3, 1, 4000), (2, 0, 5000); + +SELECT MIN(i), MAX(i) FROM integers; + +SELECT g, MIN(i), MAX(i) FROM integers GROUP BY g ORDER BY g; + +-- Test with doubles +CREATE TABLE doubles(d DOUBLE, g INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO doubles VALUES (1.5, 0, 1000), (5.5, 1, 2000), (NULL, 0, 3000), (3.5, 1, 4000), (2.5, 0, 5000); + +SELECT MIN(d), MAX(d) FROM doubles; + +SELECT g, MIN(d), MAX(d) FROM doubles GROUP BY g ORDER BY g; + +-- Test with booleans +CREATE TABLE booleans(b BOOLEAN, g INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO booleans VALUES (false, 0, 1000), (true, 1, 2000), (NULL, 0, 3000), (false, 1, 4000); + +SELECT COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans; + +SELECT COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans WHERE b IS NULL; + +SELECT g, COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans GROUP BY g ORDER BY g; + +-- cleanup +DROP TABLE strings; + +DROP TABLE integers; + +DROP TABLE doubles; + +DROP TABLE booleans; diff --git a/tests/cases/standalone/common/aggregate/regression.result b/tests/cases/standalone/common/aggregate/regression.result new file mode 100644 index 0000000000..2e744ae184 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/regression.result @@ -0,0 +1,120 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_regression.test +-- Test REGRESSION functions +-- Test REGR_SLOPE, REGR_INTERCEPT, REGR_R2 +CREATE TABLE regr_test(x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +-- Linear relationship: y = 2x + 1 +INSERT INTO regr_test VALUES + (1.0, 3.0, 1000), (2.0, 5.0, 2000), (3.0, 7.0, 3000), + (4.0, 9.0, 4000), (5.0, 11.0, 5000); + +Affected Rows: 5 + +-- Test regression slope (should be close to 2) +SELECT REGR_SLOPE(y, x) FROM regr_test; + ++-------------------------------------+ +| regr_slope(regr_test.y,regr_test.x) | ++-------------------------------------+ +| 2.0 | ++-------------------------------------+ + +-- Test regression intercept (should be close to 1) +SELECT REGR_INTERCEPT(y, x) FROM regr_test; + ++-----------------------------------------+ +| regr_intercept(regr_test.y,regr_test.x) | ++-----------------------------------------+ +| 1.0 | ++-----------------------------------------+ + +-- Test R-squared (should be close to 1 for perfect fit) +SELECT REGR_R2(y, x) FROM regr_test; + ++----------------------------------+ +| regr_r2(regr_test.y,regr_test.x) | ++----------------------------------+ +| 1.0 | ++----------------------------------+ + +-- Test REGR_COUNT (number of non-null pairs) +SELECT REGR_COUNT(y, x) FROM regr_test; + ++-------------------------------------+ +| regr_count(regr_test.y,regr_test.x) | ++-------------------------------------+ +| 5 | ++-------------------------------------+ + +-- Test REGR_SXX, REGR_SYY, REGR_SXY +SELECT REGR_SXX(y, x), REGR_SYY(y, x), REGR_SXY(y, x) FROM regr_test; + ++-----------------------------------+-----------------------------------+-----------------------------------+ +| regr_sxx(regr_test.y,regr_test.x) | regr_syy(regr_test.y,regr_test.x) | regr_sxy(regr_test.y,regr_test.x) | ++-----------------------------------+-----------------------------------+-----------------------------------+ +| 10.0 | 40.0 | 20.0 | ++-----------------------------------+-----------------------------------+-----------------------------------+ + +-- Test REGR_AVGX, REGR_AVGY +SELECT REGR_AVGX(y, x), REGR_AVGY(y, x) FROM regr_test; + ++------------------------------------+------------------------------------+ +| regr_avgx(regr_test.y,regr_test.x) | regr_avgy(regr_test.y,regr_test.x) | ++------------------------------------+------------------------------------+ +| 3.0 | 7.0 | ++------------------------------------+------------------------------------+ + +-- Test with noisy data +CREATE TABLE regr_noisy(x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO regr_noisy VALUES + (1.0, 3.1, 1000), (2.0, 4.9, 2000), (3.0, 7.2, 3000), + (4.0, 8.8, 4000), (5.0, 11.1, 5000); + +Affected Rows: 5 + +SELECT REGR_SLOPE(y, x), REGR_INTERCEPT(y, x), REGR_R2(y, x) FROM regr_noisy; + ++---------------------------------------+-------------------------------------------+------------------------------------+ +| regr_slope(regr_noisy.y,regr_noisy.x) | regr_intercept(regr_noisy.y,regr_noisy.x) | regr_r2(regr_noisy.y,regr_noisy.x) | ++---------------------------------------+-------------------------------------------+------------------------------------+ +| 1.9900000000000002 | 1.049999999999998 | 0.9973053289009772 | ++---------------------------------------+-------------------------------------------+------------------------------------+ + +-- Test with groups +CREATE TABLE regr_groups(grp INTEGER, x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO regr_groups VALUES + (1, 1.0, 3.0, 1000), (1, 2.0, 5.0, 2000), (1, 3.0, 7.0, 3000), + (2, 1.0, 2.0, 4000), (2, 2.0, 4.0, 5000), (2, 3.0, 6.0, 6000); + +Affected Rows: 6 + +SELECT grp, REGR_SLOPE(y, x), REGR_INTERCEPT(y, x), REGR_R2(y, x) +FROM regr_groups GROUP BY grp ORDER BY grp; + ++-----+-----------------------------------------+---------------------------------------------+--------------------------------------+ +| grp | regr_slope(regr_groups.y,regr_groups.x) | regr_intercept(regr_groups.y,regr_groups.x) | regr_r2(regr_groups.y,regr_groups.x) | ++-----+-----------------------------------------+---------------------------------------------+--------------------------------------+ +| 1 | 2.0 | 1.0 | 1.0 | +| 2 | 2.0 | 0.0 | 1.0 | ++-----+-----------------------------------------+---------------------------------------------+--------------------------------------+ + +DROP TABLE regr_test; + +Affected Rows: 0 + +DROP TABLE regr_noisy; + +Affected Rows: 0 + +DROP TABLE regr_groups; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/regression.sql b/tests/cases/standalone/common/aggregate/regression.sql new file mode 100644 index 0000000000..3b935a2c40 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/regression.sql @@ -0,0 +1,53 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_regression.test +-- Test REGRESSION functions + +-- Test REGR_SLOPE, REGR_INTERCEPT, REGR_R2 +CREATE TABLE regr_test(x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX); + +-- Linear relationship: y = 2x + 1 +INSERT INTO regr_test VALUES + (1.0, 3.0, 1000), (2.0, 5.0, 2000), (3.0, 7.0, 3000), + (4.0, 9.0, 4000), (5.0, 11.0, 5000); + +-- Test regression slope (should be close to 2) +SELECT REGR_SLOPE(y, x) FROM regr_test; + +-- Test regression intercept (should be close to 1) +SELECT REGR_INTERCEPT(y, x) FROM regr_test; + +-- Test R-squared (should be close to 1 for perfect fit) +SELECT REGR_R2(y, x) FROM regr_test; + +-- Test REGR_COUNT (number of non-null pairs) +SELECT REGR_COUNT(y, x) FROM regr_test; + +-- Test REGR_SXX, REGR_SYY, REGR_SXY +SELECT REGR_SXX(y, x), REGR_SYY(y, x), REGR_SXY(y, x) FROM regr_test; + +-- Test REGR_AVGX, REGR_AVGY +SELECT REGR_AVGX(y, x), REGR_AVGY(y, x) FROM regr_test; + +-- Test with noisy data +CREATE TABLE regr_noisy(x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO regr_noisy VALUES + (1.0, 3.1, 1000), (2.0, 4.9, 2000), (3.0, 7.2, 3000), + (4.0, 8.8, 4000), (5.0, 11.1, 5000); + +SELECT REGR_SLOPE(y, x), REGR_INTERCEPT(y, x), REGR_R2(y, x) FROM regr_noisy; + +-- Test with groups +CREATE TABLE regr_groups(grp INTEGER, x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX); + +INSERT INTO regr_groups VALUES + (1, 1.0, 3.0, 1000), (1, 2.0, 5.0, 2000), (1, 3.0, 7.0, 3000), + (2, 1.0, 2.0, 4000), (2, 2.0, 4.0, 5000), (2, 3.0, 6.0, 6000); + +SELECT grp, REGR_SLOPE(y, x), REGR_INTERCEPT(y, x), REGR_R2(y, x) +FROM regr_groups GROUP BY grp ORDER BY grp; + +DROP TABLE regr_test; + +DROP TABLE regr_noisy; + +DROP TABLE regr_groups; diff --git a/tests/cases/standalone/common/aggregate/stddev.result b/tests/cases/standalone/common/aggregate/stddev.result new file mode 100644 index 0000000000..4cabcd313a --- /dev/null +++ b/tests/cases/standalone/common/aggregate/stddev.result @@ -0,0 +1,175 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_stddev.test +-- Test STDDEV aggregations +CREATE TABLE stddev_test(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO stddev_test VALUES (42, 1, 1000), (43, 1, 2000), (42, 2, 3000), (1000, 2, 4000), (NULL, 1, 5000), (NULL, 3, 6000); + +Affected Rows: 6 + +SELECT stddev_samp(1); + ++------------------+ +| stddev(Int64(1)) | ++------------------+ +| | ++------------------+ + +SELECT var_samp(1); + ++---------------+ +| var(Int64(1)) | ++---------------+ +| | ++---------------+ + +-- stddev_samp +SELECT round(stddev_samp(val), 1) FROM stddev_test; + ++-----------------------------------------+ +| round(stddev(stddev_test.val),Int64(1)) | ++-----------------------------------------+ +| 478.8 | ++-----------------------------------------+ + +SELECT round(stddev_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL; + ++-----------------------------------------+ +| round(stddev(stddev_test.val),Int64(1)) | ++-----------------------------------------+ +| 478.8 | ++-----------------------------------------+ + +SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp; + ++-----+----------------------+-----------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(stddev(stddev_test.val),Int64(1)) | min(stddev_test.val) | ++-----+----------------------+-----------------------------------------+----------------------+ +| 1 | 85 | 0.7 | 42 | +| 2 | 1042 | 677.4 | 42 | +| 3 | | | | ++-----+----------------------+-----------------------------------------+----------------------+ + +SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp; + ++-----+----------------------+-----------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(stddev(stddev_test.val),Int64(1)) | min(stddev_test.val) | ++-----+----------------------+-----------------------------------------+----------------------+ +| 1 | 85 | 0.7 | 42 | +| 2 | 1042 | 677.4 | 42 | ++-----+----------------------+-----------------------------------------+----------------------+ + +-- stddev_pop +SELECT round(stddev_pop(val), 1) FROM stddev_test; + ++---------------------------------------------+ +| round(stddev_pop(stddev_test.val),Int64(1)) | ++---------------------------------------------+ +| 414.7 | ++---------------------------------------------+ + +SELECT round(stddev_pop(val), 1) FROM stddev_test WHERE val IS NOT NULL; + ++---------------------------------------------+ +| round(stddev_pop(stddev_test.val),Int64(1)) | ++---------------------------------------------+ +| 414.7 | ++---------------------------------------------+ + +SELECT grp, sum(val), round(stddev_pop(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp; + ++-----+----------------------+---------------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(stddev_pop(stddev_test.val),Int64(1)) | min(stddev_test.val) | ++-----+----------------------+---------------------------------------------+----------------------+ +| 1 | 85 | 0.5 | 42 | +| 2 | 1042 | 479.0 | 42 | +| 3 | | | | ++-----+----------------------+---------------------------------------------+----------------------+ + +SELECT grp, sum(val), round(stddev_pop(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp; + ++-----+----------------------+---------------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(stddev_pop(stddev_test.val),Int64(1)) | min(stddev_test.val) | ++-----+----------------------+---------------------------------------------+----------------------+ +| 1 | 85 | 0.5 | 42 | +| 2 | 1042 | 479.0 | 42 | ++-----+----------------------+---------------------------------------------+----------------------+ + +-- var_samp +SELECT round(var_samp(val), 1) FROM stddev_test; + ++--------------------------------------+ +| round(var(stddev_test.val),Int64(1)) | ++--------------------------------------+ +| 229281.6 | ++--------------------------------------+ + +SELECT round(var_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL; + ++--------------------------------------+ +| round(var(stddev_test.val),Int64(1)) | ++--------------------------------------+ +| 229281.6 | ++--------------------------------------+ + +SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp; + ++-----+----------------------+--------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(var(stddev_test.val),Int64(1)) | min(stddev_test.val) | ++-----+----------------------+--------------------------------------+----------------------+ +| 1 | 85 | 0.5 | 42 | +| 2 | 1042 | 458882.0 | 42 | +| 3 | | | | ++-----+----------------------+--------------------------------------+----------------------+ + +SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp; + ++-----+----------------------+--------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(var(stddev_test.val),Int64(1)) | min(stddev_test.val) | ++-----+----------------------+--------------------------------------+----------------------+ +| 1 | 85 | 0.5 | 42 | +| 2 | 1042 | 458882.0 | 42 | ++-----+----------------------+--------------------------------------+----------------------+ + +-- var_pop +SELECT round(var_pop(val), 1) FROM stddev_test; + ++------------------------------------------+ +| round(var_pop(stddev_test.val),Int64(1)) | ++------------------------------------------+ +| 171961.2 | ++------------------------------------------+ + +SELECT round(var_pop(val), 1) FROM stddev_test WHERE val IS NOT NULL; + ++------------------------------------------+ +| round(var_pop(stddev_test.val),Int64(1)) | ++------------------------------------------+ +| 171961.2 | ++------------------------------------------+ + +SELECT grp, sum(val), round(var_pop(val), 2), min(val) FROM stddev_test GROUP BY grp ORDER BY grp; + ++-----+----------------------+------------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(var_pop(stddev_test.val),Int64(2)) | min(stddev_test.val) | ++-----+----------------------+------------------------------------------+----------------------+ +| 1 | 85 | 0.25 | 42 | +| 2 | 1042 | 229441.0 | 42 | +| 3 | | | | ++-----+----------------------+------------------------------------------+----------------------+ + +SELECT grp, sum(val), round(var_pop(val), 2), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp; + ++-----+----------------------+------------------------------------------+----------------------+ +| grp | sum(stddev_test.val) | round(var_pop(stddev_test.val),Int64(2)) | min(stddev_test.val) | ++-----+----------------------+------------------------------------------+----------------------+ +| 1 | 85 | 0.25 | 42 | +| 2 | 1042 | 229441.0 | 42 | ++-----+----------------------+------------------------------------------+----------------------+ + +-- cleanup +DROP TABLE stddev_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/stddev.sql b/tests/cases/standalone/common/aggregate/stddev.sql new file mode 100644 index 0000000000..487ff01909 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/stddev.sql @@ -0,0 +1,49 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_stddev.test +-- Test STDDEV aggregations + +CREATE TABLE stddev_test(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX); + +INSERT INTO stddev_test VALUES (42, 1, 1000), (43, 1, 2000), (42, 2, 3000), (1000, 2, 4000), (NULL, 1, 5000), (NULL, 3, 6000); + +SELECT stddev_samp(1); + +SELECT var_samp(1); + +-- stddev_samp +SELECT round(stddev_samp(val), 1) FROM stddev_test; + +SELECT round(stddev_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL; + +SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp; + +SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp; + +-- stddev_pop +SELECT round(stddev_pop(val), 1) FROM stddev_test; + +SELECT round(stddev_pop(val), 1) FROM stddev_test WHERE val IS NOT NULL; + +SELECT grp, sum(val), round(stddev_pop(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp; + +SELECT grp, sum(val), round(stddev_pop(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp; + +-- var_samp +SELECT round(var_samp(val), 1) FROM stddev_test; + +SELECT round(var_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL; + +SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp; + +SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp; + +-- var_pop +SELECT round(var_pop(val), 1) FROM stddev_test; + +SELECT round(var_pop(val), 1) FROM stddev_test WHERE val IS NOT NULL; + +SELECT grp, sum(val), round(var_pop(val), 2), min(val) FROM stddev_test GROUP BY grp ORDER BY grp; + +SELECT grp, sum(val), round(var_pop(val), 2), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp; + +-- cleanup +DROP TABLE stddev_test; diff --git a/tests/cases/standalone/common/aggregate/string_agg.result b/tests/cases/standalone/common/aggregate/string_agg.result new file mode 100644 index 0000000000..851d0d7744 --- /dev/null +++ b/tests/cases/standalone/common/aggregate/string_agg.result @@ -0,0 +1,96 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_string_agg.test +-- Test STRING_AGG operator +-- test string aggregation on scalar values +SELECT STRING_AGG('a',','); + ++---------------------------------+ +| string_agg(Utf8("a"),Utf8(",")) | ++---------------------------------+ +| a | ++---------------------------------+ + +-- test string aggregation on scalar values with NULL +SELECT STRING_AGG('a',','), STRING_AGG(NULL,','), STRING_AGG('a', NULL), STRING_AGG(NULL,NULL); + ++---------------------------------+----------------------------+----------------------------+-----------------------+ +| string_agg(Utf8("a"),Utf8(",")) | string_agg(NULL,Utf8(",")) | string_agg(Utf8("a"),NULL) | string_agg(NULL,NULL) | ++---------------------------------+----------------------------+----------------------------+-----------------------+ +| a | | a | | ++---------------------------------+----------------------------+----------------------------+-----------------------+ + +-- test string aggregation on a set of values +CREATE TABLE strings(g INTEGER, x VARCHAR, y VARCHAR, ts TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO strings VALUES + (1,'a','/', 1000), (1,'b','-', 2000), + (2,'i','/', 3000), (2,NULL,'-', 4000), (2,'j','+', 5000), + (3,'p','/', 6000), + (4,'x','/', 7000), (4,'y','-', 8000), (4,'z','+', 9000); + +Affected Rows: 9 + +SELECT g, STRING_AGG(x,'|') FROM strings GROUP BY g ORDER BY g; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1 + +-- test agg on empty set +SELECT STRING_AGG(x,',') FROM strings WHERE g > 100; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0 + +-- string_agg can be used instead of group_concat +SELECT string_agg('a', ','); + ++---------------------------------+ +| string_agg(Utf8("a"),Utf8(",")) | ++---------------------------------+ +| a | ++---------------------------------+ + +SELECT string_agg('a', ','); + ++---------------------------------+ +| string_agg(Utf8("a"),Utf8(",")) | ++---------------------------------+ +| a | ++---------------------------------+ + +SELECT g, string_agg(x, ',') FROM strings GROUP BY g ORDER BY g; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1 + +-- Test ORDER BY +-- Single group +SELECT STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0 + +SELECT STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0 + +-- Grouped with ORDER BY +SELECT g, STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings GROUP BY g ORDER BY g; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1 + +SELECT g, STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings GROUP BY g ORDER BY g; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1 + +-- Test with DISTINCT +SELECT STRING_AGG(DISTINCT x, '' ORDER BY x), STRING_AGG(DISTINCT x, '|' ORDER BY x) FROM strings; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0 + +SELECT g, STRING_AGG(DISTINCT x, '' ORDER BY x) FROM strings GROUP BY g ORDER BY g; + +Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1 + +-- cleanup +DROP TABLE strings; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/aggregate/string_agg.sql b/tests/cases/standalone/common/aggregate/string_agg.sql new file mode 100644 index 0000000000..40b167f09c --- /dev/null +++ b/tests/cases/standalone/common/aggregate/string_agg.sql @@ -0,0 +1,48 @@ +-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_string_agg.test +-- Test STRING_AGG operator + +-- test string aggregation on scalar values +SELECT STRING_AGG('a',','); + +-- test string aggregation on scalar values with NULL +SELECT STRING_AGG('a',','), STRING_AGG(NULL,','), STRING_AGG('a', NULL), STRING_AGG(NULL,NULL); + +-- test string aggregation on a set of values +CREATE TABLE strings(g INTEGER, x VARCHAR, y VARCHAR, ts TIMESTAMP TIME INDEX); + +INSERT INTO strings VALUES + (1,'a','/', 1000), (1,'b','-', 2000), + (2,'i','/', 3000), (2,NULL,'-', 4000), (2,'j','+', 5000), + (3,'p','/', 6000), + (4,'x','/', 7000), (4,'y','-', 8000), (4,'z','+', 9000); + +SELECT g, STRING_AGG(x,'|') FROM strings GROUP BY g ORDER BY g; + +-- test agg on empty set +SELECT STRING_AGG(x,',') FROM strings WHERE g > 100; + +-- string_agg can be used instead of group_concat +SELECT string_agg('a', ','); + +SELECT string_agg('a', ','); + +SELECT g, string_agg(x, ',') FROM strings GROUP BY g ORDER BY g; + +-- Test ORDER BY +-- Single group +SELECT STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings; + +SELECT STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings; + +-- Grouped with ORDER BY +SELECT g, STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings GROUP BY g ORDER BY g; + +SELECT g, STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings GROUP BY g ORDER BY g; + +-- Test with DISTINCT +SELECT STRING_AGG(DISTINCT x, '' ORDER BY x), STRING_AGG(DISTINCT x, '|' ORDER BY x) FROM strings; + +SELECT g, STRING_AGG(DISTINCT x, '' ORDER BY x) FROM strings GROUP BY g ORDER BY g; + +-- cleanup +DROP TABLE strings; diff --git a/tests/cases/standalone/common/aggregate/uddsketch.sql b/tests/cases/standalone/common/aggregate/uddsketch.sql index 56ce2ccf97..c959f33ee9 100644 --- a/tests/cases/standalone/common/aggregate/uddsketch.sql +++ b/tests/cases/standalone/common/aggregate/uddsketch.sql @@ -41,4 +41,5 @@ SELECT uddsketch_calc(0.1, uddsketch_merge(128, 0.1, `state`)) FROM grouped_udds SELECT uddsketch_calc(0.1, uddsketch_merge(64, 0.01, `state`)) FROM grouped_uddsketch; drop table test_uddsketch; + drop table grouped_uddsketch;