test: migrate aggregation tests from duckdb, part4 (#6965)

* test: migrate aggregation tests from duckdb, part4

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* fix: tests

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* fix: rename tests

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* fix: comments

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* chore: ignore zero weights test

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* chore: remove duplicated sql

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

---------

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>
This commit is contained in:
dennis zhuang
2025-09-25 16:00:17 +08:00
committed by GitHub
parent 9c8ff1d8a0
commit c6e5552f05
33 changed files with 3257 additions and 0 deletions

View File

@@ -0,0 +1,118 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_approximate_distinct_count.test
-- Test approx_distinct function
-- Basic tests
SELECT APPROX_DISTINCT(1);
+---------------------------+
| approx_distinct(Int64(1)) |
+---------------------------+
| 1 |
+---------------------------+
-- FIXME(dennis): This feature is not implemented: Support for 'approx_distinct' for data type Null is not implemented
-- SELECT APPROX_DISTINCT(NULL);
SELECT APPROX_DISTINCT('hello');
+--------------------------------+
| approx_distinct(Utf8("hello")) |
+--------------------------------+
| 1 |
+--------------------------------+
-- Test with range data
SELECT APPROX_DISTINCT(10), APPROX_DISTINCT('hello') FROM numbers LIMIT 100;
+----------------------------+--------------------------------+
| approx_distinct(Int64(10)) | approx_distinct(Utf8("hello")) |
+----------------------------+--------------------------------+
| 1 | 1 |
+----------------------------+--------------------------------+
SELECT APPROX_DISTINCT(number) FROM numbers WHERE 1 = 0 LIMIT 100 ;
+---------------------------------+
| approx_distinct(numbers.number) |
+---------------------------------+
| 0 |
+---------------------------------+
-- Test with different data types
CREATE TABLE dates_test(t DATE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO dates_test VALUES
('2008-01-01', 1000), (NULL, 2000), ('2007-01-01', 3000),
('2008-02-01', 4000), ('2008-01-02', 5000), ('2008-01-01', 6000),
('2008-01-01', 7000), ('2008-01-01', 8000);
Affected Rows: 8
-- FIXME(dennis): This feature is not implemented: Support for 'approx_distinct' for data type Date32 is not implemented
-- SELECT APPROX_DISTINCT(t) FROM dates_test;
DROP TABLE dates_test;
Affected Rows: 0
CREATE TABLE names_test(t VARCHAR, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO names_test VALUES
('Pedro', 1000), (NULL, 2000), ('Pedro', 3000), ('Pedro', 4000),
('Mark', 5000), ('Mark', 6000), ('Mark', 7000),
('Hannes-Muehleisen', 8000), ('Hannes-Muehleisen', 9000);
Affected Rows: 9
SELECT APPROX_DISTINCT(t) FROM names_test;
+-------------------------------+
| approx_distinct(names_test.t) |
+-------------------------------+
| 3 |
+-------------------------------+
DROP TABLE names_test;
Affected Rows: 0
-- Test with large dataset
CREATE TABLE large_test(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO large_test SELECT number, number % 10, number * 1000 FROM numbers LIMIT 2000;
Affected Rows: 2000
SELECT APPROX_DISTINCT(a), APPROX_DISTINCT(b) FROM large_test;
+-------------------------------+-------------------------------+
| approx_distinct(large_test.a) | approx_distinct(large_test.b) |
+-------------------------------+-------------------------------+
| 2000 | 10 |
+-------------------------------+-------------------------------+
-- Test with groups
SELECT b, APPROX_DISTINCT(a) FROM large_test GROUP BY b ORDER BY b;
+---+-------------------------------+
| b | approx_distinct(large_test.a) |
+---+-------------------------------+
| 0 | 200 |
| 1 | 201 |
| 2 | 201 |
| 3 | 200 |
| 4 | 199 |
| 5 | 200 |
| 6 | 199 |
| 7 | 200 |
| 8 | 200 |
| 9 | 200 |
+---+-------------------------------+
DROP TABLE large_test;
Affected Rows: 0

View File

@@ -0,0 +1,51 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_approximate_distinct_count.test
-- Test approx_distinct function
-- Basic tests
SELECT APPROX_DISTINCT(1);
-- FIXME(dennis): This feature is not implemented: Support for 'approx_distinct' for data type Null is not implemented
-- SELECT APPROX_DISTINCT(NULL);
SELECT APPROX_DISTINCT('hello');
-- Test with range data
SELECT APPROX_DISTINCT(10), APPROX_DISTINCT('hello') FROM numbers LIMIT 100;
SELECT APPROX_DISTINCT(number) FROM numbers WHERE 1 = 0 LIMIT 100 ;
-- Test with different data types
CREATE TABLE dates_test(t DATE, ts TIMESTAMP TIME INDEX);
INSERT INTO dates_test VALUES
('2008-01-01', 1000), (NULL, 2000), ('2007-01-01', 3000),
('2008-02-01', 4000), ('2008-01-02', 5000), ('2008-01-01', 6000),
('2008-01-01', 7000), ('2008-01-01', 8000);
-- FIXME(dennis): This feature is not implemented: Support for 'approx_distinct' for data type Date32 is not implemented
-- SELECT APPROX_DISTINCT(t) FROM dates_test;
DROP TABLE dates_test;
CREATE TABLE names_test(t VARCHAR, ts TIMESTAMP TIME INDEX);
INSERT INTO names_test VALUES
('Pedro', 1000), (NULL, 2000), ('Pedro', 3000), ('Pedro', 4000),
('Mark', 5000), ('Mark', 6000), ('Mark', 7000),
('Hannes-Muehleisen', 8000), ('Hannes-Muehleisen', 9000);
SELECT APPROX_DISTINCT(t) FROM names_test;
DROP TABLE names_test;
-- Test with large dataset
CREATE TABLE large_test(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO large_test SELECT number, number % 10, number * 1000 FROM numbers LIMIT 2000;
SELECT APPROX_DISTINCT(a), APPROX_DISTINCT(b) FROM large_test;
-- Test with groups
SELECT b, APPROX_DISTINCT(a) FROM large_test GROUP BY b ORDER BY b;
DROP TABLE large_test;

View File

@@ -0,0 +1,186 @@
-- Migrated from DuckDB test style: test approximate median
-- Test APPROX_MEDIAN function
-- Test with odd number of values
CREATE TABLE odd_test(i INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO odd_test VALUES (1, 1000), (2, 2000), (3, 3000), (4, 4000), (5, 5000);
Affected Rows: 5
-- Should return 3 (middle value)
SELECT approx_median(i) FROM odd_test;
+---------------------------+
| approx_median(odd_test.i) |
+---------------------------+
| 3 |
+---------------------------+
-- Test with even number of values
CREATE TABLE even_test(i INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO even_test VALUES (1, 1000), (2, 2000), (4, 3000), (5, 4000);
Affected Rows: 4
-- Should return approximately 3 (average of 2 and 4)
SELECT approx_median(i) FROM even_test;
+----------------------------+
| approx_median(even_test.i) |
+----------------------------+
| 3 |
+----------------------------+
-- Test with larger dataset
CREATE TABLE large_test(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO large_test SELECT number, number % 3, number * 1000 FROM numbers LIMIT 1000;
Affected Rows: 1000
SELECT approx_median(val) FROM large_test;
+-------------------------------+
| approx_median(large_test.val) |
+-------------------------------+
| 499 |
+-------------------------------+
-- Test with groups
SELECT grp, approx_median(val) FROM large_test GROUP BY grp ORDER BY grp;
+-----+-------------------------------+
| grp | approx_median(large_test.val) |
+-----+-------------------------------+
| 0 | 498 |
| 1 | 499 |
| 2 | 500 |
+-----+-------------------------------+
-- Test with doubles
CREATE TABLE double_test(d DOUBLE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO double_test VALUES
(1.1, 1000), (2.2, 2000), (3.3, 3000), (4.4, 4000), (5.5, 5000);
Affected Rows: 5
SELECT approx_median(d) FROM double_test;
+------------------------------+
| approx_median(double_test.d) |
+------------------------------+
| 3.3 |
+------------------------------+
-- Test with NULL values
INSERT INTO double_test VALUES (NULL, 6000);
Affected Rows: 1
SELECT approx_median(d) FROM double_test;
+------------------------------+
| approx_median(double_test.d) |
+------------------------------+
| 3.3 |
+------------------------------+
-- Test with duplicate values
CREATE TABLE dup_test(val INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO dup_test VALUES
(1, 1000), (1, 2000), (2, 3000), (2, 4000),
(3, 5000), (3, 6000), (4, 7000), (4, 8000);
Affected Rows: 8
SELECT approx_median(val) FROM dup_test;
+-----------------------------+
| approx_median(dup_test.val) |
+-----------------------------+
| 2 |
+-----------------------------+
-- Compare with exact median
SELECT median(val), approx_median(val) FROM dup_test;
+----------------------+-----------------------------+
| median(dup_test.val) | approx_median(dup_test.val) |
+----------------------+-----------------------------+
| 2 | 2 |
+----------------------+-----------------------------+
-- Test edge cases
-- empty result
SELECT approx_median(i) FROM odd_test WHERE i > 100;
+---------------------------+
| approx_median(odd_test.i) |
+---------------------------+
| |
+---------------------------+
-- Test single value
SELECT approx_median(i) FROM odd_test WHERE i = 3;
+---------------------------+
| approx_median(odd_test.i) |
+---------------------------+
| 3 |
+---------------------------+
-- Test with negative values
CREATE TABLE neg_test(val INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO neg_test VALUES (-5, 1000), (-2, 2000), (0, 3000), (3, 4000), (7, 5000);
Affected Rows: 5
SELECT approx_median(val) FROM neg_test;
+-----------------------------+
| approx_median(neg_test.val) |
+-----------------------------+
| 0 |
+-----------------------------+
-- cleanup
DROP TABLE odd_test;
Affected Rows: 0
DROP TABLE even_test;
Affected Rows: 0
DROP TABLE large_test;
Affected Rows: 0
DROP TABLE double_test;
Affected Rows: 0
DROP TABLE dup_test;
Affected Rows: 0
DROP TABLE neg_test;
Affected Rows: 0

View File

@@ -0,0 +1,80 @@
-- Migrated from DuckDB test style: test approximate median
-- Test APPROX_MEDIAN function
-- Test with odd number of values
CREATE TABLE odd_test(i INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO odd_test VALUES (1, 1000), (2, 2000), (3, 3000), (4, 4000), (5, 5000);
-- Should return 3 (middle value)
SELECT approx_median(i) FROM odd_test;
-- Test with even number of values
CREATE TABLE even_test(i INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO even_test VALUES (1, 1000), (2, 2000), (4, 3000), (5, 4000);
-- Should return approximately 3 (average of 2 and 4)
SELECT approx_median(i) FROM even_test;
-- Test with larger dataset
CREATE TABLE large_test(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO large_test SELECT number, number % 3, number * 1000 FROM numbers LIMIT 1000;
SELECT approx_median(val) FROM large_test;
-- Test with groups
SELECT grp, approx_median(val) FROM large_test GROUP BY grp ORDER BY grp;
-- Test with doubles
CREATE TABLE double_test(d DOUBLE, ts TIMESTAMP TIME INDEX);
INSERT INTO double_test VALUES
(1.1, 1000), (2.2, 2000), (3.3, 3000), (4.4, 4000), (5.5, 5000);
SELECT approx_median(d) FROM double_test;
-- Test with NULL values
INSERT INTO double_test VALUES (NULL, 6000);
SELECT approx_median(d) FROM double_test;
-- Test with duplicate values
CREATE TABLE dup_test(val INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO dup_test VALUES
(1, 1000), (1, 2000), (2, 3000), (2, 4000),
(3, 5000), (3, 6000), (4, 7000), (4, 8000);
SELECT approx_median(val) FROM dup_test;
-- Compare with exact median
SELECT median(val), approx_median(val) FROM dup_test;
-- Test edge cases
-- empty result
SELECT approx_median(i) FROM odd_test WHERE i > 100;
-- Test single value
SELECT approx_median(i) FROM odd_test WHERE i = 3;
-- Test with negative values
CREATE TABLE neg_test(val INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO neg_test VALUES (-5, 1000), (-2, 2000), (0, 3000), (3, 4000), (7, 5000);
SELECT approx_median(val) FROM neg_test;
-- cleanup
DROP TABLE odd_test;
DROP TABLE even_test;
DROP TABLE large_test;
DROP TABLE double_test;
DROP TABLE dup_test;
DROP TABLE neg_test;

View File

@@ -0,0 +1,194 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_approx_quantile.test
-- Test approx_percentile_cont function instead of approx_quantile
-- Test basic approximate quantile
CREATE TABLE approx_test(i INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO approx_test SELECT number, number * 1000 FROM numbers LIMIT 1000;
Affected Rows: 1000
-- Test approx_percentile_cont
-- median
SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i) FROM approx_test;
+----------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.5)) WITHIN GROUP [approx_test.i ASC NULLS LAST] |
+----------------------------------------------------------------------------------+
| 499 |
+----------------------------------------------------------------------------------+
-- first quartile
SELECT approx_percentile_cont(0.25) WITHIN GROUP (ORDER BY i) FROM approx_test;
+-----------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.25)) WITHIN GROUP [approx_test.i ASC NULLS LAST] |
+-----------------------------------------------------------------------------------+
| 249 |
+-----------------------------------------------------------------------------------+
-- third quartile
SELECT approx_percentile_cont(0.75) WITHIN GROUP (ORDER BY i) FROM approx_test;
+-----------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.75)) WITHIN GROUP [approx_test.i ASC NULLS LAST] |
+-----------------------------------------------------------------------------------+
| 749 |
+-----------------------------------------------------------------------------------+
-- 95th percentile
SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY i) FROM approx_test;
+-----------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.95)) WITHIN GROUP [approx_test.i ASC NULLS LAST] |
+-----------------------------------------------------------------------------------+
| 949 |
+-----------------------------------------------------------------------------------+
-- Test approx_percentile_cont DESC
-- median
SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
+------------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.5)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] |
+------------------------------------------------------------------------------------+
| 499 |
+------------------------------------------------------------------------------------+
-- first quartile
SELECT approx_percentile_cont(0.25) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
+-------------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.25)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] |
+-------------------------------------------------------------------------------------+
| 749 |
+-------------------------------------------------------------------------------------+
-- third quartile
SELECT approx_percentile_cont(0.75) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
+-------------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.75)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] |
+-------------------------------------------------------------------------------------+
| 249 |
+-------------------------------------------------------------------------------------+
-- 95th percentile
SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
+-------------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.95)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] |
+-------------------------------------------------------------------------------------+
| 49 |
+-------------------------------------------------------------------------------------+
-- Test with different data types
CREATE TABLE approx_double(d DOUBLE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO approx_double SELECT number * 1.5, number * 1000 FROM numbers LIMIT 1000;
Affected Rows: 1000
SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY d) FROM approx_double;
+------------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.5)) WITHIN GROUP [approx_double.d ASC NULLS LAST] |
+------------------------------------------------------------------------------------+
| 748.875 |
+------------------------------------------------------------------------------------+
SELECT approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY d) FROM approx_double;
+------------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.9)) WITHIN GROUP [approx_double.d ASC NULLS LAST] |
+------------------------------------------------------------------------------------+
| 1349.25 |
+------------------------------------------------------------------------------------+
-- Test with groups
CREATE TABLE approx_groups(grp INTEGER, val INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO approx_groups SELECT
number % 3 as grp,
number,
number * 1000
FROM numbers LIMIT 300;
Affected Rows: 300
SELECT grp, approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY val)
FROM approx_groups GROUP BY grp ORDER BY grp;
+-----+--------------------------------------------------------------------------------------+
| grp | approx_percentile_cont(Float64(0.5)) WITHIN GROUP [approx_groups.val ASC NULLS LAST] |
+-----+--------------------------------------------------------------------------------------+
| 0 | 148 |
| 1 | 149 |
| 2 | 150 |
+-----+--------------------------------------------------------------------------------------+
-- Test with NULL values
INSERT INTO approx_test VALUES (NULL, 1001000);
Affected Rows: 1
SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i) FROM approx_test;
+----------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0.5)) WITHIN GROUP [approx_test.i ASC NULLS LAST] |
+----------------------------------------------------------------------------------+
| 499 |
+----------------------------------------------------------------------------------+
-- Test edge cases
-- should be close to min
SELECT approx_percentile_cont(0.0) WITHIN GROUP (ORDER BY i) FROM approx_test;
+--------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0)) WITHIN GROUP [approx_test.i ASC NULLS LAST] |
+--------------------------------------------------------------------------------+
| 0 |
+--------------------------------------------------------------------------------+
SELECT approx_percentile_cont(1.0) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
+----------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(1)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] |
+----------------------------------------------------------------------------------+
| 0 |
+----------------------------------------------------------------------------------+
-- should be close to max
SELECT approx_percentile_cont(1.0) WITHIN GROUP (ORDER BY i) FROM approx_test;
+--------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(1)) WITHIN GROUP [approx_test.i ASC NULLS LAST] |
+--------------------------------------------------------------------------------+
| 999 |
+--------------------------------------------------------------------------------+
SELECT approx_percentile_cont(0.0) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
+----------------------------------------------------------------------------------+
| approx_percentile_cont(Float64(0)) WITHIN GROUP [approx_test.i DESC NULLS FIRST] |
+----------------------------------------------------------------------------------+
| 999 |
+----------------------------------------------------------------------------------+
DROP TABLE approx_test;
Affected Rows: 0
DROP TABLE approx_double;
Affected Rows: 0
DROP TABLE approx_groups;
Affected Rows: 0

View File

@@ -0,0 +1,76 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_approx_quantile.test
-- Test approx_percentile_cont function instead of approx_quantile
-- Test basic approximate quantile
CREATE TABLE approx_test(i INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO approx_test SELECT number, number * 1000 FROM numbers LIMIT 1000;
-- Test approx_percentile_cont
-- median
SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i) FROM approx_test;
-- first quartile
SELECT approx_percentile_cont(0.25) WITHIN GROUP (ORDER BY i) FROM approx_test;
-- third quartile
SELECT approx_percentile_cont(0.75) WITHIN GROUP (ORDER BY i) FROM approx_test;
-- 95th percentile
SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY i) FROM approx_test;
-- Test approx_percentile_cont DESC
-- median
SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
-- first quartile
SELECT approx_percentile_cont(0.25) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
-- third quartile
SELECT approx_percentile_cont(0.75) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
-- 95th percentile
SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
-- Test with different data types
CREATE TABLE approx_double(d DOUBLE, ts TIMESTAMP TIME INDEX);
INSERT INTO approx_double SELECT number * 1.5, number * 1000 FROM numbers LIMIT 1000;
SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY d) FROM approx_double;
SELECT approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY d) FROM approx_double;
-- Test with groups
CREATE TABLE approx_groups(grp INTEGER, val INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO approx_groups SELECT
number % 3 as grp,
number,
number * 1000
FROM numbers LIMIT 300;
SELECT grp, approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY val)
FROM approx_groups GROUP BY grp ORDER BY grp;
-- Test with NULL values
INSERT INTO approx_test VALUES (NULL, 1001000);
SELECT approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY i) FROM approx_test;
-- Test edge cases
-- should be close to min
SELECT approx_percentile_cont(0.0) WITHIN GROUP (ORDER BY i) FROM approx_test;
SELECT approx_percentile_cont(1.0) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
-- should be close to max
SELECT approx_percentile_cont(1.0) WITHIN GROUP (ORDER BY i) FROM approx_test;
SELECT approx_percentile_cont(0.0) WITHIN GROUP (ORDER BY i DESC) FROM approx_test;
DROP TABLE approx_test;
DROP TABLE approx_double;
DROP TABLE approx_groups;

View File

@@ -0,0 +1,194 @@
-- Migrated from DuckDB test style: test weighted approximate percentile
-- Test APPROX_PERCENTILE_CONT_WITH_WEIGHT function
-- Test basic weighted percentile
CREATE TABLE weight_test("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO weight_test VALUES
(10, 1, 1000), (20, 2, 2000), (30, 3, 3000), (40, 4, 4000), (50, 1, 5000);
Affected Rows: 5
-- Test 50th percentile (median) with weights
SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_test;
+---------------------------------------------------------------------------------------------------------------------+
| approx_percentile_cont_with_weight(weight_test.weight,Float64(0.5)) WITHIN GROUP [weight_test.value ASC NULLS LAST] |
+---------------------------------------------------------------------------------------------------------------------+
| 22 |
+---------------------------------------------------------------------------------------------------------------------+
-- Test different percentiles
SELECT approx_percentile_cont_with_weight(weight, 0.25) WITHIN GROUP (ORDER BY "value") FROM weight_test;
+----------------------------------------------------------------------------------------------------------------------+
| approx_percentile_cont_with_weight(weight_test.weight,Float64(0.25)) WITHIN GROUP [weight_test.value ASC NULLS LAST] |
+----------------------------------------------------------------------------------------------------------------------+
| 16 |
+----------------------------------------------------------------------------------------------------------------------+
SELECT approx_percentile_cont_with_weight(weight, 0.75) WITHIN GROUP (ORDER BY "value") FROM weight_test;
+----------------------------------------------------------------------------------------------------------------------+
| approx_percentile_cont_with_weight(weight_test.weight,Float64(0.75)) WITHIN GROUP [weight_test.value ASC NULLS LAST] |
+----------------------------------------------------------------------------------------------------------------------+
| 44 |
+----------------------------------------------------------------------------------------------------------------------+
-- Test with groups
CREATE TABLE weight_groups(grp INTEGER, "value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO weight_groups VALUES
(1, 10, 2, 1000), (1, 20, 3, 2000), (1, 30, 1, 3000),
(2, 100, 1, 4000), (2, 200, 4, 5000), (2, 300, 2, 6000);
Affected Rows: 6
SELECT grp, approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value")
FROM weight_groups GROUP BY grp ORDER BY grp;
+-----+-------------------------------------------------------------------------------------------------------------------------+
| grp | approx_percentile_cont_with_weight(weight_groups.weight,Float64(0.5)) WITHIN GROUP [weight_groups.value ASC NULLS LAST] |
+-----+-------------------------------------------------------------------------------------------------------------------------+
| 1 | 12 |
| 2 | 162 |
+-----+-------------------------------------------------------------------------------------------------------------------------+
-- Test with double values and weights
CREATE TABLE weight_double("value" DOUBLE, "weight" DOUBLE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO weight_double VALUES
(1.5, 0.5, 1000), (2.5, 1.0, 2000), (3.5, 1.5, 3000), (4.5, 2.0, 4000);
Affected Rows: 4
SELECT approx_percentile_cont_with_weight("weight", 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_double;
+-------------------------------------------------------------------------------------------------------------------------+
| approx_percentile_cont_with_weight(weight_double.weight,Float64(0.5)) WITHIN GROUP [weight_double.value ASC NULLS LAST] |
+-------------------------------------------------------------------------------------------------------------------------+
| 3.3333333333333335 |
+-------------------------------------------------------------------------------------------------------------------------+
-- Test edge cases
-- min
SELECT approx_percentile_cont_with_weight("weight", 0.0) WITHIN GROUP (ORDER BY "value") FROM weight_test;
+-------------------------------------------------------------------------------------------------------------------+
| approx_percentile_cont_with_weight(weight_test.weight,Float64(0)) WITHIN GROUP [weight_test.value ASC NULLS LAST] |
+-------------------------------------------------------------------------------------------------------------------+
| 10 |
+-------------------------------------------------------------------------------------------------------------------+
-- max
SELECT approx_percentile_cont_with_weight("weight", 1.0) WITHIN GROUP (ORDER BY "value") FROM weight_test;
+-------------------------------------------------------------------------------------------------------------------+
| approx_percentile_cont_with_weight(weight_test.weight,Float64(1)) WITHIN GROUP [weight_test.value ASC NULLS LAST] |
+-------------------------------------------------------------------------------------------------------------------+
| 50 |
+-------------------------------------------------------------------------------------------------------------------+
-- Test with zero weights
CREATE TABLE zero_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO zero_weight VALUES
(10, 0, 1000), (20, 1, 2000), (30, 0, 3000), (40, 2, 4000);
Affected Rows: 4
--TODO: this result is unstable currently
--SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM zero_weight;
-- Test with NULL values
INSERT INTO weight_test VALUES (NULL, 1, 6000), (60, NULL, 7000);
Affected Rows: 2
SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_test;
+---------------------------------------------------------------------------------------------------------------------+
| approx_percentile_cont_with_weight(weight_test.weight,Float64(0.5)) WITHIN GROUP [weight_test.value ASC NULLS LAST] |
+---------------------------------------------------------------------------------------------------------------------+
| 22 |
+---------------------------------------------------------------------------------------------------------------------+
-- Test empty result
SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value")
FROM weight_test WHERE "value" > 1000;
+---------------------------------------------------------------------------------------------------------------------+
| approx_percentile_cont_with_weight(weight_test.weight,Float64(0.5)) WITHIN GROUP [weight_test.value ASC NULLS LAST] |
+---------------------------------------------------------------------------------------------------------------------+
| |
+---------------------------------------------------------------------------------------------------------------------+
-- Test single weighted value
CREATE TABLE single_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO single_weight VALUES (42, 5, 1000);
Affected Rows: 1
SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM single_weight;
+-------------------------------------------------------------------------------------------------------------------------+
| approx_percentile_cont_with_weight(single_weight.weight,Float64(0.5)) WITHIN GROUP [single_weight.value ASC NULLS LAST] |
+-------------------------------------------------------------------------------------------------------------------------+
| 42 |
+-------------------------------------------------------------------------------------------------------------------------+
-- Test equal weights (should behave like regular percentile)
CREATE TABLE equal_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO equal_weight VALUES
(10, 1, 1000), (20, 1, 2000), (30, 1, 3000), (40, 1, 4000);
Affected Rows: 4
SELECT
approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value"),
approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY "value")
FROM equal_weight;
+-----------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------+
| approx_percentile_cont_with_weight(equal_weight.weight,Float64(0.5)) WITHIN GROUP [equal_weight.value ASC NULLS LAST] | approx_percentile_cont(Float64(0.5)) WITHIN GROUP [equal_weight.value ASC NULLS LAST] |
+-----------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------+
| 25 | 25 |
+-----------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------+
-- cleanup
DROP TABLE weight_test;
Affected Rows: 0
DROP TABLE weight_groups;
Affected Rows: 0
DROP TABLE weight_double;
Affected Rows: 0
DROP TABLE zero_weight;
Affected Rows: 0
DROP TABLE single_weight;
Affected Rows: 0
DROP TABLE equal_weight;
Affected Rows: 0

View File

@@ -0,0 +1,90 @@
-- Migrated from DuckDB test style: test weighted approximate percentile
-- Test APPROX_PERCENTILE_CONT_WITH_WEIGHT function
-- Test basic weighted percentile
CREATE TABLE weight_test("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO weight_test VALUES
(10, 1, 1000), (20, 2, 2000), (30, 3, 3000), (40, 4, 4000), (50, 1, 5000);
-- Test 50th percentile (median) with weights
SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_test;
-- Test different percentiles
SELECT approx_percentile_cont_with_weight(weight, 0.25) WITHIN GROUP (ORDER BY "value") FROM weight_test;
SELECT approx_percentile_cont_with_weight(weight, 0.75) WITHIN GROUP (ORDER BY "value") FROM weight_test;
-- Test with groups
CREATE TABLE weight_groups(grp INTEGER, "value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO weight_groups VALUES
(1, 10, 2, 1000), (1, 20, 3, 2000), (1, 30, 1, 3000),
(2, 100, 1, 4000), (2, 200, 4, 5000), (2, 300, 2, 6000);
SELECT grp, approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value")
FROM weight_groups GROUP BY grp ORDER BY grp;
-- Test with double values and weights
CREATE TABLE weight_double("value" DOUBLE, "weight" DOUBLE, ts TIMESTAMP TIME INDEX);
INSERT INTO weight_double VALUES
(1.5, 0.5, 1000), (2.5, 1.0, 2000), (3.5, 1.5, 3000), (4.5, 2.0, 4000);
SELECT approx_percentile_cont_with_weight("weight", 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_double;
-- Test edge cases
-- min
SELECT approx_percentile_cont_with_weight("weight", 0.0) WITHIN GROUP (ORDER BY "value") FROM weight_test;
-- max
SELECT approx_percentile_cont_with_weight("weight", 1.0) WITHIN GROUP (ORDER BY "value") FROM weight_test;
-- Test with zero weights
CREATE TABLE zero_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO zero_weight VALUES
(10, 0, 1000), (20, 1, 2000), (30, 0, 3000), (40, 2, 4000);
--TODO: this result is unstable currently
--SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM zero_weight;
-- Test with NULL values
INSERT INTO weight_test VALUES (NULL, 1, 6000), (60, NULL, 7000);
SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM weight_test;
-- Test empty result
SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value")
FROM weight_test WHERE "value" > 1000;
-- Test single weighted value
CREATE TABLE single_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO single_weight VALUES (42, 5, 1000);
SELECT approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value") FROM single_weight;
-- Test equal weights (should behave like regular percentile)
CREATE TABLE equal_weight("value" INTEGER, weight INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO equal_weight VALUES
(10, 1, 1000), (20, 1, 2000), (30, 1, 3000), (40, 1, 4000);
SELECT
approx_percentile_cont_with_weight(weight, 0.5) WITHIN GROUP (ORDER BY "value"),
approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY "value")
FROM equal_weight;
-- cleanup
DROP TABLE weight_test;
DROP TABLE weight_groups;
DROP TABLE weight_double;
DROP TABLE zero_weight;
DROP TABLE single_weight;
DROP TABLE equal_weight;

View File

@@ -0,0 +1,146 @@
-- Migrated from DuckDB test style: test array aggregation
-- Test ARRAY_AGG function
-- Test with integers
CREATE TABLE integers(i INTEGER, g INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO integers VALUES (1, 1, 1000), (2, 1, 2000), (3, 1, 3000), (4, 2, 4000), (5, 2, 5000);
Affected Rows: 5
-- Basic array aggregation
SELECT array_agg(i) FROM integers;
+-----------------------+
| array_agg(integers.i) |
+-----------------------+
| [1, 2, 3, 4, 5] |
+-----------------------+
-- Array aggregation with GROUP BY
SELECT g, array_agg(i) FROM integers GROUP BY g ORDER BY g;
+---+-----------------------+
| g | array_agg(integers.i) |
+---+-----------------------+
| 1 | [1, 2, 3] |
| 2 | [4, 5] |
+---+-----------------------+
-- Test with ORDER BY
SELECT array_agg(i ORDER BY i DESC) FROM integers;
+--------------------------------------------------------------+
| array_agg(integers.i) ORDER BY [integers.i DESC NULLS FIRST] |
+--------------------------------------------------------------+
| [5, 4, 3, 2, 1] |
+--------------------------------------------------------------+
SELECT g, array_agg(i ORDER BY i DESC) FROM integers GROUP BY g ORDER BY g;
+---+--------------------------------------------------------------+
| g | array_agg(integers.i) ORDER BY [integers.i DESC NULLS FIRST] |
+---+--------------------------------------------------------------+
| 1 | [3, 2, 1] |
| 2 | [5, 4] |
+---+--------------------------------------------------------------+
-- Test with strings
CREATE TABLE strings(s VARCHAR, g INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO strings VALUES
('apple', 1, 1000), ('banana', 1, 2000), ('cherry', 2, 3000),
('date', 2, 4000), ('elderberry', 1, 5000);
Affected Rows: 5
SELECT array_agg(s) FROM strings;
+-------------------------------------------+
| array_agg(strings.s) |
+-------------------------------------------+
| [apple, banana, cherry, date, elderberry] |
+-------------------------------------------+
SELECT g, array_agg(s ORDER BY s) FROM strings GROUP BY g ORDER BY g;
+---+----------------------------------------------------------+
| g | array_agg(strings.s) ORDER BY [strings.s ASC NULLS LAST] |
+---+----------------------------------------------------------+
| 1 | [apple, banana, elderberry] |
| 2 | [cherry, date] |
+---+----------------------------------------------------------+
-- Test with NULL values
INSERT INTO strings VALUES (NULL, 1, 6000), ('fig', NULL, 7000);
Affected Rows: 2
SELECT array_agg(s) FROM strings WHERE s IS NOT NULL;
+------------------------------------------------+
| array_agg(strings.s) |
+------------------------------------------------+
| [apple, banana, cherry, date, elderberry, fig] |
+------------------------------------------------+
SELECT g, array_agg(s) FROM strings WHERE g IS NOT NULL GROUP BY g ORDER BY g;
+---+-------------------------------+
| g | array_agg(strings.s) |
+---+-------------------------------+
| 1 | [apple, banana, elderberry, ] |
| 2 | [cherry, date] |
+---+-------------------------------+
-- Test with DISTINCT
SELECT array_agg(DISTINCT s ORDER BY s) FROM strings WHERE s IS NOT NULL;
+-------------------------------------------------------------------+
| array_agg(DISTINCT strings.s) ORDER BY [strings.s ASC NULLS LAST] |
+-------------------------------------------------------------------+
| [apple, banana, cherry, date, elderberry, fig] |
+-------------------------------------------------------------------+
-- Test empty result
SELECT array_agg(i) FROM integers WHERE i > 100;
+-----------------------+
| array_agg(integers.i) |
+-----------------------+
| |
+-----------------------+
-- Test with doubles
CREATE TABLE doubles(d DOUBLE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO doubles VALUES (1.1, 1000), (2.2, 2000), (3.3, 3000), (4.4, 4000);
Affected Rows: 4
SELECT array_agg(d ORDER BY d) FROM doubles;
+----------------------------------------------------------+
| array_agg(doubles.d) ORDER BY [doubles.d ASC NULLS LAST] |
+----------------------------------------------------------+
| [1.1, 2.2, 3.3, 4.4] |
+----------------------------------------------------------+
-- cleanup
DROP TABLE integers;
Affected Rows: 0
DROP TABLE strings;
Affected Rows: 0
DROP TABLE doubles;
Affected Rows: 0

View File

@@ -0,0 +1,56 @@
-- Migrated from DuckDB test style: test array aggregation
-- Test ARRAY_AGG function
-- Test with integers
CREATE TABLE integers(i INTEGER, g INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO integers VALUES (1, 1, 1000), (2, 1, 2000), (3, 1, 3000), (4, 2, 4000), (5, 2, 5000);
-- Basic array aggregation
SELECT array_agg(i) FROM integers;
-- Array aggregation with GROUP BY
SELECT g, array_agg(i) FROM integers GROUP BY g ORDER BY g;
-- Test with ORDER BY
SELECT array_agg(i ORDER BY i DESC) FROM integers;
SELECT g, array_agg(i ORDER BY i DESC) FROM integers GROUP BY g ORDER BY g;
-- Test with strings
CREATE TABLE strings(s VARCHAR, g INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO strings VALUES
('apple', 1, 1000), ('banana', 1, 2000), ('cherry', 2, 3000),
('date', 2, 4000), ('elderberry', 1, 5000);
SELECT array_agg(s) FROM strings;
SELECT g, array_agg(s ORDER BY s) FROM strings GROUP BY g ORDER BY g;
-- Test with NULL values
INSERT INTO strings VALUES (NULL, 1, 6000), ('fig', NULL, 7000);
SELECT array_agg(s) FROM strings WHERE s IS NOT NULL;
SELECT g, array_agg(s) FROM strings WHERE g IS NOT NULL GROUP BY g ORDER BY g;
-- Test with DISTINCT
SELECT array_agg(DISTINCT s ORDER BY s) FROM strings WHERE s IS NOT NULL;
-- Test empty result
SELECT array_agg(i) FROM integers WHERE i > 100;
-- Test with doubles
CREATE TABLE doubles(d DOUBLE, ts TIMESTAMP TIME INDEX);
INSERT INTO doubles VALUES (1.1, 1000), (2.2, 2000), (3.3, 3000), (4.4, 4000);
SELECT array_agg(d ORDER BY d) FROM doubles;
-- cleanup
DROP TABLE integers;
DROP TABLE strings;
DROP TABLE doubles;

View File

@@ -0,0 +1,102 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_avg.test
-- Test AVG aggregate function
-- scalar average
SELECT AVG(3);
+---------------+
| avg(Int64(3)) |
+---------------+
| 3.0 |
+---------------+
-- FIXME(dennis): unsupported type
-- SELECT AVG(NULL);
SELECT AVG(3::SMALLINT), AVG(NULL::SMALLINT);
+---------------+-----------+
| avg(Int64(3)) | avg(NULL) |
+---------------+-----------+
| 3.0 | |
+---------------+-----------+
SELECT AVG(3::DOUBLE), AVG(NULL::DOUBLE);
+---------------+-----------+
| avg(Int64(3)) | avg(NULL) |
+---------------+-----------+
| 3.0 | |
+---------------+-----------+
-- test average with table
CREATE TABLE integers(i INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO integers VALUES (1, 1000), (2, 2000), (3, 3000);
Affected Rows: 3
SELECT AVG(i), AVG(1), AVG(DISTINCT i), AVG(NULL) FROM integers;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Execution error: Function 'avg' user-defined coercion failed with "Error during planning: The function \"avg\" does not support inputs of type Null." No function matches the given name and argument types 'avg(Null)'. You might need to add explicit type casts.
Candidate functions:
avg(UserDefined)
SELECT AVG(i) FROM integers WHERE i > 100;
+-----------------+
| avg(integers.i) |
+-----------------+
| |
+-----------------+
-- empty average
CREATE TABLE vals(i INTEGER, j DOUBLE, k BIGINT, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO vals VALUES (NULL, NULL, NULL, 1000);
Affected Rows: 1
SELECT AVG(i), AVG(j), AVG(k) FROM vals;
+-------------+-------------+-------------+
| avg(vals.i) | avg(vals.j) | avg(vals.k) |
+-------------+-------------+-------------+
| | | |
+-------------+-------------+-------------+
-- test with mixed values
DROP TABLE vals;
Affected Rows: 0
CREATE TABLE vals(i INTEGER, j DOUBLE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO vals VALUES (1, 1.5, 1000), (2, 2.5, 2000), (3, 3.5, 3000), (NULL, NULL, 4000);
Affected Rows: 4
SELECT AVG(i), AVG(j) FROM vals;
+-------------+-------------+
| avg(vals.i) | avg(vals.j) |
+-------------+-------------+
| 2.0 | 2.5 |
+-------------+-------------+
-- FIXME(dennis): AVG(DISTINCT) not supported
-- https://github.com/apache/datafusion/issues/2408
-- SELECT AVG(DISTINCT i), AVG(DISTINCT j) FROM vals;
-- cleanup
DROP TABLE integers;
Affected Rows: 0
DROP TABLE vals;
Affected Rows: 0

View File

@@ -0,0 +1,46 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_avg.test
-- Test AVG aggregate function
-- scalar average
SELECT AVG(3);
-- FIXME(dennis): unsupported type
-- SELECT AVG(NULL);
SELECT AVG(3::SMALLINT), AVG(NULL::SMALLINT);
SELECT AVG(3::DOUBLE), AVG(NULL::DOUBLE);
-- test average with table
CREATE TABLE integers(i INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO integers VALUES (1, 1000), (2, 2000), (3, 3000);
SELECT AVG(i), AVG(1), AVG(DISTINCT i), AVG(NULL) FROM integers;
SELECT AVG(i) FROM integers WHERE i > 100;
-- empty average
CREATE TABLE vals(i INTEGER, j DOUBLE, k BIGINT, ts TIMESTAMP TIME INDEX);
INSERT INTO vals VALUES (NULL, NULL, NULL, 1000);
SELECT AVG(i), AVG(j), AVG(k) FROM vals;
-- test with mixed values
DROP TABLE vals;
CREATE TABLE vals(i INTEGER, j DOUBLE, ts TIMESTAMP TIME INDEX);
INSERT INTO vals VALUES (1, 1.5, 1000), (2, 2.5, 2000), (3, 3.5, 3000), (NULL, NULL, 4000);
SELECT AVG(i), AVG(j) FROM vals;
-- FIXME(dennis): AVG(DISTINCT) not supported
-- https://github.com/apache/datafusion/issues/2408
-- SELECT AVG(DISTINCT i), AVG(DISTINCT j) FROM vals;
-- cleanup
DROP TABLE integers;
DROP TABLE vals;

View File

@@ -0,0 +1,103 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_bit_*.test
-- Test bitwise aggregate operations
-- Test BIT_AND
CREATE TABLE bit_test(i INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO bit_test VALUES
(7, 1000), -- 111
(3, 2000), -- 011
(5, 3000), -- 101
(NULL, 4000);
Affected Rows: 4
-- Should be 1 (001)
SELECT BIT_AND(i) FROM bit_test;
+---------------------+
| bit_and(bit_test.i) |
+---------------------+
| 1 |
+---------------------+
-- Test BIT_OR
-- Should be 7 (111)
SELECT BIT_OR(i) FROM bit_test;
+--------------------+
| bit_or(bit_test.i) |
+--------------------+
| 7 |
+--------------------+
-- Test BIT_XOR
-- Should be 1 (111 XOR 011 XOR 101)
SELECT BIT_XOR(i) FROM bit_test;
+---------------------+
| bit_xor(bit_test.i) |
+---------------------+
| 1 |
+---------------------+
-- Test with groups
INSERT INTO bit_test VALUES (8, 5000), (12, 6000), (4, 7000);
Affected Rows: 3
-- Create separate table for group testing
CREATE TABLE bit_groups(grp INTEGER, i INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO bit_groups VALUES
(1, 7, 1000), (1, 3, 2000), (1, 5, 3000),
(2, 8, 4000), (2, 12, 5000), (2, 4, 6000);
Affected Rows: 6
SELECT grp, BIT_AND(i), BIT_OR(i), BIT_XOR(i) FROM bit_groups GROUP BY grp ORDER BY grp;
+-----+-----------------------+----------------------+-----------------------+
| grp | bit_and(bit_groups.i) | bit_or(bit_groups.i) | bit_xor(bit_groups.i) |
+-----+-----------------------+----------------------+-----------------------+
| 1 | 1 | 7 | 1 |
| 2 | 0 | 12 | 0 |
+-----+-----------------------+----------------------+-----------------------+
-- Test edge cases
-- NULL
SELECT BIT_AND(i) FROM bit_test WHERE i > 100;
+---------------------+
| bit_and(bit_test.i) |
+---------------------+
| |
+---------------------+
SELECT BIT_OR(i) FROM bit_test WHERE i > 100;
+--------------------+
| bit_or(bit_test.i) |
+--------------------+
| |
+--------------------+
SELECT BIT_XOR(i) FROM bit_test WHERE i > 100;
+---------------------+
| bit_xor(bit_test.i) |
+---------------------+
| |
+---------------------+
DROP TABLE bit_test;
Affected Rows: 0
DROP TABLE bit_groups;
Affected Rows: 0

View File

@@ -0,0 +1,46 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_bit_*.test
-- Test bitwise aggregate operations
-- Test BIT_AND
CREATE TABLE bit_test(i INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO bit_test VALUES
(7, 1000), -- 111
(3, 2000), -- 011
(5, 3000), -- 101
(NULL, 4000);
-- Should be 1 (001)
SELECT BIT_AND(i) FROM bit_test;
-- Test BIT_OR
-- Should be 7 (111)
SELECT BIT_OR(i) FROM bit_test;
-- Test BIT_XOR
-- Should be 1 (111 XOR 011 XOR 101)
SELECT BIT_XOR(i) FROM bit_test;
-- Test with groups
INSERT INTO bit_test VALUES (8, 5000), (12, 6000), (4, 7000);
-- Create separate table for group testing
CREATE TABLE bit_groups(grp INTEGER, i INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO bit_groups VALUES
(1, 7, 1000), (1, 3, 2000), (1, 5, 3000),
(2, 8, 4000), (2, 12, 5000), (2, 4, 6000);
SELECT grp, BIT_AND(i), BIT_OR(i), BIT_XOR(i) FROM bit_groups GROUP BY grp ORDER BY grp;
-- Test edge cases
-- NULL
SELECT BIT_AND(i) FROM bit_test WHERE i > 100;
SELECT BIT_OR(i) FROM bit_test WHERE i > 100;
SELECT BIT_XOR(i) FROM bit_test WHERE i > 100;
DROP TABLE bit_test;
DROP TABLE bit_groups;

View File

@@ -0,0 +1,178 @@
-- Migrated from DuckDB test style: test boolean aggregation
-- Test BOOL_AND and BOOL_OR functions
-- Test with boolean values
CREATE TABLE bool_test(b BOOLEAN, g INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO bool_test VALUES
(true, 1, 1000), (true, 1, 2000), (true, 1, 3000),
(false, 2, 4000), (true, 2, 5000), (false, 2, 6000),
(NULL, 3, 7000), (true, 3, 8000);
Affected Rows: 8
-- Test BOOL_AND (all values must be true) and BOOL_OR (any value can be true)
-- Should be true
SELECT bool_and(b) FROM bool_test WHERE g = 1;
+-----------------------+
| bool_and(bool_test.b) |
+-----------------------+
| true |
+-----------------------+
-- Should be false
SELECT bool_and(b) FROM bool_test WHERE g = 2;
+-----------------------+
| bool_and(bool_test.b) |
+-----------------------+
| false |
+-----------------------+
-- Should be true (NULL ignored)
SELECT bool_and(b) FROM bool_test WHERE g = 3;
+-----------------------+
| bool_and(bool_test.b) |
+-----------------------+
| true |
+-----------------------+
-- Should be true
SELECT bool_or(b) FROM bool_test WHERE g = 1;
+----------------------+
| bool_or(bool_test.b) |
+----------------------+
| true |
+----------------------+
-- Should be true
SELECT bool_or(b) FROM bool_test WHERE g = 2;
+----------------------+
| bool_or(bool_test.b) |
+----------------------+
| true |
+----------------------+
-- Should be true
SELECT bool_or(b) FROM bool_test WHERE g = 3;
+----------------------+
| bool_or(bool_test.b) |
+----------------------+
| true |
+----------------------+
-- Test with GROUP BY
SELECT g, bool_and(b), bool_or(b) FROM bool_test GROUP BY g ORDER BY g;
+---+-----------------------+----------------------+
| g | bool_and(bool_test.b) | bool_or(bool_test.b) |
+---+-----------------------+----------------------+
| 1 | true | true |
| 2 | false | true |
| 3 | true | true |
+---+-----------------------+----------------------+
-- Test all true values
CREATE TABLE all_true(b BOOLEAN, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO all_true VALUES (true, 1000), (true, 2000), (true, 3000);
Affected Rows: 3
SELECT bool_and(b), bool_or(b) FROM all_true;
+----------------------+---------------------+
| bool_and(all_true.b) | bool_or(all_true.b) |
+----------------------+---------------------+
| true | true |
+----------------------+---------------------+
-- Test all false values
CREATE TABLE all_false(b BOOLEAN, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO all_false VALUES (false, 1000), (false, 2000), (false, 3000);
Affected Rows: 3
SELECT bool_and(b), bool_or(b) FROM all_false;
+-----------------------+----------------------+
| bool_and(all_false.b) | bool_or(all_false.b) |
+-----------------------+----------------------+
| false | false |
+-----------------------+----------------------+
-- Test all NULL values
CREATE TABLE all_null(b BOOLEAN, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO all_null VALUES (NULL, 1000), (NULL, 2000), (NULL, 3000);
Affected Rows: 3
SELECT bool_and(b), bool_or(b) FROM all_null;
+----------------------+---------------------+
| bool_and(all_null.b) | bool_or(all_null.b) |
+----------------------+---------------------+
| | |
+----------------------+---------------------+
-- Test empty result
SELECT bool_and(b), bool_or(b) FROM bool_test WHERE g > 100;
+-----------------------+----------------------+
| bool_and(bool_test.b) | bool_or(bool_test.b) |
+-----------------------+----------------------+
| | |
+-----------------------+----------------------+
-- Test with integer expressions (converted to boolean)
CREATE TABLE int_test(i INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO int_test VALUES (0, 1000), (1, 2000), (2, 3000), (NULL, 4000);
Affected Rows: 4
SELECT bool_and(i > 0), bool_or(i > 1) FROM int_test;
+---------------------------------+--------------------------------+
| bool_and(int_test.i > Int64(0)) | bool_or(int_test.i > Int64(1)) |
+---------------------------------+--------------------------------+
| false | true |
+---------------------------------+--------------------------------+
-- cleanup
DROP TABLE bool_test;
Affected Rows: 0
DROP TABLE all_true;
Affected Rows: 0
DROP TABLE all_false;
Affected Rows: 0
DROP TABLE all_null;
Affected Rows: 0
DROP TABLE int_test;
Affected Rows: 0

View File

@@ -0,0 +1,74 @@
-- Migrated from DuckDB test style: test boolean aggregation
-- Test BOOL_AND and BOOL_OR functions
-- Test with boolean values
CREATE TABLE bool_test(b BOOLEAN, g INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO bool_test VALUES
(true, 1, 1000), (true, 1, 2000), (true, 1, 3000),
(false, 2, 4000), (true, 2, 5000), (false, 2, 6000),
(NULL, 3, 7000), (true, 3, 8000);
-- Test BOOL_AND (all values must be true) and BOOL_OR (any value can be true)
-- Should be true
SELECT bool_and(b) FROM bool_test WHERE g = 1;
-- Should be false
SELECT bool_and(b) FROM bool_test WHERE g = 2;
-- Should be true (NULL ignored)
SELECT bool_and(b) FROM bool_test WHERE g = 3;
-- Should be true
SELECT bool_or(b) FROM bool_test WHERE g = 1;
-- Should be true
SELECT bool_or(b) FROM bool_test WHERE g = 2;
-- Should be true
SELECT bool_or(b) FROM bool_test WHERE g = 3;
-- Test with GROUP BY
SELECT g, bool_and(b), bool_or(b) FROM bool_test GROUP BY g ORDER BY g;
-- Test all true values
CREATE TABLE all_true(b BOOLEAN, ts TIMESTAMP TIME INDEX);
INSERT INTO all_true VALUES (true, 1000), (true, 2000), (true, 3000);
SELECT bool_and(b), bool_or(b) FROM all_true;
-- Test all false values
CREATE TABLE all_false(b BOOLEAN, ts TIMESTAMP TIME INDEX);
INSERT INTO all_false VALUES (false, 1000), (false, 2000), (false, 3000);
SELECT bool_and(b), bool_or(b) FROM all_false;
-- Test all NULL values
CREATE TABLE all_null(b BOOLEAN, ts TIMESTAMP TIME INDEX);
INSERT INTO all_null VALUES (NULL, 1000), (NULL, 2000), (NULL, 3000);
SELECT bool_and(b), bool_or(b) FROM all_null;
-- Test empty result
SELECT bool_and(b), bool_or(b) FROM bool_test WHERE g > 100;
-- Test with integer expressions (converted to boolean)
CREATE TABLE int_test(i INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO int_test VALUES (0, 1000), (1, 2000), (2, 3000), (NULL, 4000);
SELECT bool_and(i > 0), bool_or(i > 1) FROM int_test;
-- cleanup
DROP TABLE bool_test;
DROP TABLE all_true;
DROP TABLE all_false;
DROP TABLE all_null;
DROP TABLE int_test;

View File

@@ -0,0 +1,100 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_corr.test
-- Test CORR operator (correlation coefficient)
-- Corner cases
SELECT corr(NULL,NULL);
+-----------------+
| corr(NULL,NULL) |
+-----------------+
| |
+-----------------+
-- Single value returns NULL
-- FIXME(dennis): datafusion returns 0.0 here, should be NULL
SELECT corr(1,1);
+-------------------------+
| corr(Int64(1),Int64(1)) |
+-------------------------+
| 0.0 |
+-------------------------+
-- Test with table
CREATE TABLE aggr(k INT, v DECIMAL(10,2), v2 DECIMAL(10, 2), ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO aggr VALUES
(1, 10, null, 1000),
(2, 10, 11, 2000),
(2, 20, 22, 3000),
(2, 25, null, 4000),
(2, 30, 35, 5000);
Affected Rows: 5
SELECT k, corr(v, v2) FROM aggr GROUP BY k ORDER BY k;
+---+----------------------+
| k | corr(aggr.v,aggr.v2) |
+---+----------------------+
| 1 | |
| 2 | 0.9988445981121536 |
+---+----------------------+
SELECT corr(v, v2) FROM aggr;
+----------------------+
| corr(aggr.v,aggr.v2) |
+----------------------+
| 0.9988445981121532 |
+----------------------+
-- Test with integer values
CREATE TABLE corr_test(x INTEGER, y INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO corr_test VALUES
(1, 2, 1000),
(2, 4, 2000),
(3, 6, 3000),
(4, 8, 4000),
(5, 10, 5000);
Affected Rows: 5
-- Perfect positive correlation
SELECT corr(x, y) FROM corr_test;
+-------------------------------+
| corr(corr_test.x,corr_test.y) |
+-------------------------------+
| 0.9999999999999999 |
+-------------------------------+
-- Test with negative correlation
INSERT INTO corr_test VALUES
(6, 5, 6000),
(7, 3, 7000),
(8, 1, 8000);
Affected Rows: 3
SELECT corr(x, y) FROM corr_test;
+-------------------------------+
| corr(corr_test.x,corr_test.y) |
+-------------------------------+
| -0.12452312927991684 |
+-------------------------------+
-- cleanup
DROP TABLE aggr;
Affected Rows: 0
DROP TABLE corr_test;
Affected Rows: 0

View File

@@ -0,0 +1,49 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_corr.test
-- Test CORR operator (correlation coefficient)
-- Corner cases
SELECT corr(NULL,NULL);
-- Single value returns NULL
-- FIXME(dennis): datafusion returns 0.0 here, should be NULL
SELECT corr(1,1);
-- Test with table
CREATE TABLE aggr(k INT, v DECIMAL(10,2), v2 DECIMAL(10, 2), ts TIMESTAMP TIME INDEX);
INSERT INTO aggr VALUES
(1, 10, null, 1000),
(2, 10, 11, 2000),
(2, 20, 22, 3000),
(2, 25, null, 4000),
(2, 30, 35, 5000);
SELECT k, corr(v, v2) FROM aggr GROUP BY k ORDER BY k;
SELECT corr(v, v2) FROM aggr;
-- Test with integer values
CREATE TABLE corr_test(x INTEGER, y INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO corr_test VALUES
(1, 2, 1000),
(2, 4, 2000),
(3, 6, 3000),
(4, 8, 4000),
(5, 10, 5000);
-- Perfect positive correlation
SELECT corr(x, y) FROM corr_test;
-- Test with negative correlation
INSERT INTO corr_test VALUES
(6, 5, 6000),
(7, 3, 7000),
(8, 1, 8000);
SELECT corr(x, y) FROM corr_test;
-- cleanup
DROP TABLE aggr;
DROP TABLE corr_test;

View File

@@ -0,0 +1,84 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_covar.test
-- Test COVAR operators (covariance)
-- Test population covariance on scalar values
SELECT COVAR_POP(3,3), COVAR_POP(NULL,3), COVAR_POP(3,NULL), COVAR_POP(NULL,NULL);
+------------------------------+--------------------------+--------------------------+----------------------+
| covar_pop(Int64(3),Int64(3)) | covar_pop(NULL,Int64(3)) | covar_pop(Int64(3),NULL) | covar_pop(NULL,NULL) |
+------------------------------+--------------------------+--------------------------+----------------------+
| 0.0 | | | |
+------------------------------+--------------------------+--------------------------+----------------------+
-- Test sample covariance on scalar values
SELECT COVAR_SAMP(3,3), COVAR_SAMP(NULL,3), COVAR_SAMP(3,NULL), COVAR_SAMP(NULL,NULL);
+-------------------------------+---------------------------+---------------------------+-----------------------+
| covar_samp(Int64(3),Int64(3)) | covar_samp(NULL,Int64(3)) | covar_samp(Int64(3),NULL) | covar_samp(NULL,NULL) |
+-------------------------------+---------------------------+---------------------------+-----------------------+
| | | | |
+-------------------------------+---------------------------+---------------------------+-----------------------+
-- Test population covariance on a set of values
CREATE TABLE integers(x INTEGER, y INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO integers VALUES
(10, NULL, 1000),
(10, 11, 2000),
(20, 22, 3000),
(25, NULL, 4000),
(30, 35, 5000);
Affected Rows: 5
SELECT COVAR_POP(x,y), COVAR_POP(x,1), COVAR_POP(1,y), COVAR_POP(x,NULL), COVAR_POP(NULL,y) FROM integers;
+----------------------------------+--------------------------------+--------------------------------+----------------------------+----------------------------+
| covar_pop(integers.x,integers.y) | covar_pop(integers.x,Int64(1)) | covar_pop(Int64(1),integers.y) | covar_pop(integers.x,NULL) | covar_pop(NULL,integers.y) |
+----------------------------------+--------------------------------+--------------------------------+----------------------------+----------------------------+
| 79.99999999999999 | 0.0 | 0.0 | | |
+----------------------------------+--------------------------------+--------------------------------+----------------------------+----------------------------+
-- Test sample covariance
SELECT COVAR_SAMP(x,y), COVAR_SAMP(x,1), COVAR_SAMP(1,y) FROM integers;
+-----------------------------------+---------------------------------+---------------------------------+
| covar_samp(integers.x,integers.y) | covar_samp(integers.x,Int64(1)) | covar_samp(Int64(1),integers.y) |
+-----------------------------------+---------------------------------+---------------------------------+
| 119.99999999999999 | 0.0 | 0.0 |
+-----------------------------------+---------------------------------+---------------------------------+
-- Test grouped covariance
CREATE TABLE covar_data(grp INTEGER, x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO covar_data VALUES
(1, 1.0, 2.0, 1000),
(1, 2.0, 4.0, 2000),
(1, 3.0, 6.0, 3000),
(2, 10.0, 5.0, 4000),
(2, 20.0, 10.0, 5000),
(2, 30.0, 15.0, 6000);
Affected Rows: 6
SELECT grp, COVAR_POP(x, y), COVAR_SAMP(x, y) FROM covar_data GROUP BY grp ORDER BY grp;
+-----+--------------------------------------+---------------------------------------+
| grp | covar_pop(covar_data.x,covar_data.y) | covar_samp(covar_data.x,covar_data.y) |
+-----+--------------------------------------+---------------------------------------+
| 1 | 1.3333333333333333 | 2.0 |
| 2 | 33.333333333333336 | 50.0 |
+-----+--------------------------------------+---------------------------------------+
-- cleanup
DROP TABLE integers;
Affected Rows: 0
DROP TABLE covar_data;
Affected Rows: 0

View File

@@ -0,0 +1,41 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_covar.test
-- Test COVAR operators (covariance)
-- Test population covariance on scalar values
SELECT COVAR_POP(3,3), COVAR_POP(NULL,3), COVAR_POP(3,NULL), COVAR_POP(NULL,NULL);
-- Test sample covariance on scalar values
SELECT COVAR_SAMP(3,3), COVAR_SAMP(NULL,3), COVAR_SAMP(3,NULL), COVAR_SAMP(NULL,NULL);
-- Test population covariance on a set of values
CREATE TABLE integers(x INTEGER, y INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO integers VALUES
(10, NULL, 1000),
(10, 11, 2000),
(20, 22, 3000),
(25, NULL, 4000),
(30, 35, 5000);
SELECT COVAR_POP(x,y), COVAR_POP(x,1), COVAR_POP(1,y), COVAR_POP(x,NULL), COVAR_POP(NULL,y) FROM integers;
-- Test sample covariance
SELECT COVAR_SAMP(x,y), COVAR_SAMP(x,1), COVAR_SAMP(1,y) FROM integers;
-- Test grouped covariance
CREATE TABLE covar_data(grp INTEGER, x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX);
INSERT INTO covar_data VALUES
(1, 1.0, 2.0, 1000),
(1, 2.0, 4.0, 2000),
(1, 3.0, 6.0, 3000),
(2, 10.0, 5.0, 4000),
(2, 20.0, 10.0, 5000),
(2, 30.0, 15.0, 6000);
SELECT grp, COVAR_POP(x, y), COVAR_SAMP(x, y) FROM covar_data GROUP BY grp ORDER BY grp;
-- cleanup
DROP TABLE integers;
DROP TABLE covar_data;

View File

@@ -0,0 +1,255 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_last.test
-- Test FIRST and LAST aggregate functions
-- Test with integers
CREATE TABLE five(i INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO five VALUES (1, 1000), (2, 2000), (3, 3000), (4, 4000), (5, 5000);
Affected Rows: 5
SELECT last_value(i) FROM five;
+--------------------+
| last_value(five.i) |
+--------------------+
| 5 |
+--------------------+
SELECT first_value(i) FROM five;
+---------------------+
| first_value(five.i) |
+---------------------+
| 1 |
+---------------------+
SELECT i % 3 AS g, last_value(i) FROM five GROUP BY g ORDER BY g;
+---+--------------------+
| g | last_value(five.i) |
+---+--------------------+
| 0 | 3 |
| 1 | 4 |
| 2 | 5 |
+---+--------------------+
SELECT i % 3 AS g, first_value(i) FROM five GROUP BY g ORDER BY g;
+---+---------------------+
| g | first_value(five.i) |
+---+---------------------+
| 0 | 3 |
| 1 | 1 |
| 2 | 2 |
+---+---------------------+
-- Test with ORDER BY
SELECT last_value(i ORDER BY i DESC) FROM five;
+-------------------------------------------------------+
| last_value(five.i) ORDER BY [five.i DESC NULLS FIRST] |
+-------------------------------------------------------+
| 1 |
+-------------------------------------------------------+
SELECT first_value(i ORDER BY i DESC) FROM five;
+--------------------------------------------------------+
| first_value(five.i) ORDER BY [five.i DESC NULLS FIRST] |
+--------------------------------------------------------+
| 5 |
+--------------------------------------------------------+
SELECT i % 3 AS g, last_value(i ORDER BY i DESC) FROM five GROUP BY g ORDER BY g;
+---+-------------------------------------------------------+
| g | last_value(five.i) ORDER BY [five.i DESC NULLS FIRST] |
+---+-------------------------------------------------------+
| 0 | 3 |
| 1 | 1 |
| 2 | 2 |
+---+-------------------------------------------------------+
SELECT i % 3 AS g, first_value(i ORDER BY i DESC) FROM five GROUP BY g ORDER BY g;
+---+--------------------------------------------------------+
| g | first_value(five.i) ORDER BY [five.i DESC NULLS FIRST] |
+---+--------------------------------------------------------+
| 0 | 3 |
| 1 | 4 |
| 2 | 5 |
+---+--------------------------------------------------------+
-- Test with strings
CREATE TABLE strings(s VARCHAR, g INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO strings VALUES ('apple', 1, 1000), ('banana', 1, 2000), ('cherry', 2, 3000), ('date', 2, 4000), ('elderberry', 3, 5000);
Affected Rows: 5
SELECT last_value(s) FROM strings;
+-----------------------+
| last_value(strings.s) |
+-----------------------+
| elderberry |
+-----------------------+
SELECT first_value(s) FROM strings;
+------------------------+
| first_value(strings.s) |
+------------------------+
| apple |
+------------------------+
SELECT g, last_value(s) FROM strings GROUP BY g ORDER BY g;
+---+-----------------------+
| g | last_value(strings.s) |
+---+-----------------------+
| 1 | banana |
| 2 | date |
| 3 | elderberry |
+---+-----------------------+
SELECT g, first_value(s) FROM strings GROUP BY g ORDER BY g;
+---+------------------------+
| g | first_value(strings.s) |
+---+------------------------+
| 1 | apple |
| 2 | cherry |
| 3 | elderberry |
+---+------------------------+
-- Test with ORDER BY on strings
SELECT last_value(s ORDER BY s) FROM strings;
+-----------------------------------------------------------+
| last_value(strings.s) ORDER BY [strings.s ASC NULLS LAST] |
+-----------------------------------------------------------+
| elderberry |
+-----------------------------------------------------------+
SELECT first_value(s ORDER BY s) FROM strings;
+------------------------------------------------------------+
| first_value(strings.s) ORDER BY [strings.s ASC NULLS LAST] |
+------------------------------------------------------------+
| apple |
+------------------------------------------------------------+
SELECT g, last_value(s ORDER BY s) FROM strings GROUP BY g ORDER BY g;
+---+-----------------------------------------------------------+
| g | last_value(strings.s) ORDER BY [strings.s ASC NULLS LAST] |
+---+-----------------------------------------------------------+
| 1 | banana |
| 2 | date |
| 3 | elderberry |
+---+-----------------------------------------------------------+
SELECT g, first_value(s ORDER BY s) FROM strings GROUP BY g ORDER BY g;
+---+------------------------------------------------------------+
| g | first_value(strings.s) ORDER BY [strings.s ASC NULLS LAST] |
+---+------------------------------------------------------------+
| 1 | apple |
| 2 | cherry |
| 3 | elderberry |
+---+------------------------------------------------------------+
-- Test with NULL values
INSERT INTO strings VALUES (NULL, 1, 6000), ('fig', NULL, 7000);
Affected Rows: 2
SELECT last_value(s) FROM strings;
+-----------------------+
| last_value(strings.s) |
+-----------------------+
| fig |
+-----------------------+
SELECT first_value(s) FROM strings;
+------------------------+
| first_value(strings.s) |
+------------------------+
| apple |
+------------------------+
SELECT g, last_value(s) FROM strings WHERE g IS NOT NULL GROUP BY g ORDER BY g;
+---+-----------------------+
| g | last_value(strings.s) |
+---+-----------------------+
| 1 | |
| 2 | date |
| 3 | elderberry |
+---+-----------------------+
-- Test with dates
CREATE TABLE dates(d DATE, i INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO dates VALUES ('2021-08-20', 1, 1000), ('2021-08-21', 2, 2000), ('2021-08-22', 3, 3000), ('2021-08-23', 4, 4000), ('2021-08-24', 5, 5000);
Affected Rows: 5
SELECT last_value(d) FROM dates;
+---------------------+
| last_value(dates.d) |
+---------------------+
| 2021-08-24 |
+---------------------+
SELECT first_value(d) FROM dates;
+----------------------+
| first_value(dates.d) |
+----------------------+
| 2021-08-20 |
+----------------------+
SELECT i % 3 AS g, last_value(d) FROM dates GROUP BY g ORDER BY g;
+---+---------------------+
| g | last_value(dates.d) |
+---+---------------------+
| 0 | 2021-08-22 |
| 1 | 2021-08-23 |
| 2 | 2021-08-24 |
+---+---------------------+
SELECT i % 3 AS g, first_value(d) FROM dates GROUP BY g ORDER BY g;
+---+----------------------+
| g | first_value(dates.d) |
+---+----------------------+
| 0 | 2021-08-22 |
| 1 | 2021-08-20 |
| 2 | 2021-08-21 |
+---+----------------------+
-- cleanup
DROP TABLE five;
Affected Rows: 0
DROP TABLE strings;
Affected Rows: 0
DROP TABLE dates;
Affected Rows: 0

View File

@@ -0,0 +1,75 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_last.test
-- Test FIRST and LAST aggregate functions
-- Test with integers
CREATE TABLE five(i INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO five VALUES (1, 1000), (2, 2000), (3, 3000), (4, 4000), (5, 5000);
SELECT last_value(i) FROM five;
SELECT first_value(i) FROM five;
SELECT i % 3 AS g, last_value(i) FROM five GROUP BY g ORDER BY g;
SELECT i % 3 AS g, first_value(i) FROM five GROUP BY g ORDER BY g;
-- Test with ORDER BY
SELECT last_value(i ORDER BY i DESC) FROM five;
SELECT first_value(i ORDER BY i DESC) FROM five;
SELECT i % 3 AS g, last_value(i ORDER BY i DESC) FROM five GROUP BY g ORDER BY g;
SELECT i % 3 AS g, first_value(i ORDER BY i DESC) FROM five GROUP BY g ORDER BY g;
-- Test with strings
CREATE TABLE strings(s VARCHAR, g INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO strings VALUES ('apple', 1, 1000), ('banana', 1, 2000), ('cherry', 2, 3000), ('date', 2, 4000), ('elderberry', 3, 5000);
SELECT last_value(s) FROM strings;
SELECT first_value(s) FROM strings;
SELECT g, last_value(s) FROM strings GROUP BY g ORDER BY g;
SELECT g, first_value(s) FROM strings GROUP BY g ORDER BY g;
-- Test with ORDER BY on strings
SELECT last_value(s ORDER BY s) FROM strings;
SELECT first_value(s ORDER BY s) FROM strings;
SELECT g, last_value(s ORDER BY s) FROM strings GROUP BY g ORDER BY g;
SELECT g, first_value(s ORDER BY s) FROM strings GROUP BY g ORDER BY g;
-- Test with NULL values
INSERT INTO strings VALUES (NULL, 1, 6000), ('fig', NULL, 7000);
SELECT last_value(s) FROM strings;
SELECT first_value(s) FROM strings;
SELECT g, last_value(s) FROM strings WHERE g IS NOT NULL GROUP BY g ORDER BY g;
-- Test with dates
CREATE TABLE dates(d DATE, i INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO dates VALUES ('2021-08-20', 1, 1000), ('2021-08-21', 2, 2000), ('2021-08-22', 3, 3000), ('2021-08-23', 4, 4000), ('2021-08-24', 5, 5000);
SELECT last_value(d) FROM dates;
SELECT first_value(d) FROM dates;
SELECT i % 3 AS g, last_value(d) FROM dates GROUP BY g ORDER BY g;
SELECT i % 3 AS g, first_value(d) FROM dates GROUP BY g ORDER BY g;
-- cleanup
DROP TABLE five;
DROP TABLE strings;
DROP TABLE dates;

View File

@@ -0,0 +1,119 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_median.test
-- Test MEDIAN aggregate
-- scalar median
SELECT median(NULL), median(1);
+--------------+------------------+
| median(NULL) | median(Int64(1)) |
+--------------+------------------+
| | 1 |
+--------------+------------------+
-- test with simple table
CREATE TABLE quantile(r INTEGER, v DOUBLE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO quantile VALUES
(0, 0.1, 1000), (1, 0.2, 2000), (2, 0.3, 3000), (3, 0.4, 4000), (4, 0.5, 5000),
(5, 0.6, 6000), (6, 0.7, 7000), (7, 0.8, 8000), (8, 0.9, 9000), (9, 1.0, 10000),
(NULL, 0.1, 11000), (NULL, 0.5, 12000), (NULL, 0.9, 13000);
Affected Rows: 13
SELECT median(r)::VARCHAR FROM quantile;
+--------------------+
| median(quantile.r) |
+--------------------+
| 4 |
+--------------------+
SELECT median(r::FLOAT)::VARCHAR FROM quantile;
+--------------------+
| median(quantile.r) |
+--------------------+
| 4.5 |
+--------------------+
SELECT median(r::DOUBLE)::VARCHAR FROM quantile;
+--------------------+
| median(quantile.r) |
+--------------------+
| 4.5 |
+--------------------+
SELECT median(r::SMALLINT)::VARCHAR FROM quantile WHERE r < 100;
+--------------------+
| median(quantile.r) |
+--------------------+
| 4 |
+--------------------+
SELECT median(r::INTEGER)::VARCHAR FROM quantile;
+--------------------+
| median(quantile.r) |
+--------------------+
| 4 |
+--------------------+
SELECT median(r::BIGINT)::VARCHAR FROM quantile;
+--------------------+
| median(quantile.r) |
+--------------------+
| 4 |
+--------------------+
-- test with NULL values
SELECT median(NULL) FROM quantile;
+--------------+
| median(NULL) |
+--------------+
| |
+--------------+
SELECT median(42) FROM quantile;
+-------------------+
| median(Int64(42)) |
+-------------------+
| 42 |
+-------------------+
-- test with grouped data
CREATE TABLE median_groups(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO median_groups VALUES
(1, 1, 1000), (2, 1, 2000), (3, 1, 3000), (4, 1, 4000), (5, 1, 5000),
(10, 2, 6000), (20, 2, 7000), (30, 2, 8000), (40, 2, 9000), (50, 2, 10000),
(NULL, 3, 11000);
Affected Rows: 11
SELECT grp, median(val) FROM median_groups GROUP BY grp ORDER BY grp;
+-----+---------------------------+
| grp | median(median_groups.val) |
+-----+---------------------------+
| 1 | 3 |
| 2 | 30 |
| 3 | |
+-----+---------------------------+
-- cleanup
DROP TABLE quantile;
Affected Rows: 0
DROP TABLE median_groups;
Affected Rows: 0

View File

@@ -0,0 +1,45 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_median.test
-- Test MEDIAN aggregate
-- scalar median
SELECT median(NULL), median(1);
-- test with simple table
CREATE TABLE quantile(r INTEGER, v DOUBLE, ts TIMESTAMP TIME INDEX);
INSERT INTO quantile VALUES
(0, 0.1, 1000), (1, 0.2, 2000), (2, 0.3, 3000), (3, 0.4, 4000), (4, 0.5, 5000),
(5, 0.6, 6000), (6, 0.7, 7000), (7, 0.8, 8000), (8, 0.9, 9000), (9, 1.0, 10000),
(NULL, 0.1, 11000), (NULL, 0.5, 12000), (NULL, 0.9, 13000);
SELECT median(r)::VARCHAR FROM quantile;
SELECT median(r::FLOAT)::VARCHAR FROM quantile;
SELECT median(r::DOUBLE)::VARCHAR FROM quantile;
SELECT median(r::SMALLINT)::VARCHAR FROM quantile WHERE r < 100;
SELECT median(r::INTEGER)::VARCHAR FROM quantile;
SELECT median(r::BIGINT)::VARCHAR FROM quantile;
-- test with NULL values
SELECT median(NULL) FROM quantile;
SELECT median(42) FROM quantile;
-- test with grouped data
CREATE TABLE median_groups(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO median_groups VALUES
(1, 1, 1000), (2, 1, 2000), (3, 1, 3000), (4, 1, 4000), (5, 1, 5000),
(10, 2, 6000), (20, 2, 7000), (30, 2, 8000), (40, 2, 9000), (50, 2, 10000),
(NULL, 3, 11000);
SELECT grp, median(val) FROM median_groups GROUP BY grp ORDER BY grp;
-- cleanup
DROP TABLE quantile;
DROP TABLE median_groups;

View File

@@ -0,0 +1,151 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_aggregate_types.test
-- Test MIN/MAX aggregate functions
-- Test with strings
CREATE TABLE strings(s STRING, g INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO strings VALUES ('hello', 0, 1000), ('world', 1, 2000), (NULL, 0, 3000), ('r', 1, 4000);
Affected Rows: 4
-- simple aggregates only
SELECT COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings;
+----------+------------------+----------------+----------------+
| count(*) | count(strings.s) | min(strings.s) | max(strings.s) |
+----------+------------------+----------------+----------------+
| 4 | 3 | hello | world |
+----------+------------------+----------------+----------------+
SELECT COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings WHERE s IS NULL;
+----------+------------------+----------------+----------------+
| count(*) | count(strings.s) | min(strings.s) | max(strings.s) |
+----------+------------------+----------------+----------------+
| 1 | 0 | | |
+----------+------------------+----------------+----------------+
-- grouped aggregates
SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings GROUP BY g ORDER BY g;
+---+----------+------------------+----------------+----------------+
| g | count(*) | count(strings.s) | min(strings.s) | max(strings.s) |
+---+----------+------------------+----------------+----------------+
| 0 | 2 | 1 | hello | hello |
| 1 | 2 | 2 | r | world |
+---+----------+------------------+----------------+----------------+
-- empty group
SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings WHERE s IS NULL OR s <> 'hello' GROUP BY g ORDER BY g;
+---+----------+------------------+----------------+----------------+
| g | count(*) | count(strings.s) | min(strings.s) | max(strings.s) |
+---+----------+------------------+----------------+----------------+
| 0 | 1 | 0 | | |
| 1 | 2 | 2 | r | world |
+---+----------+------------------+----------------+----------------+
-- Test with integers
CREATE TABLE integers(i INTEGER, g INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO integers VALUES (1, 0, 1000), (5, 1, 2000), (NULL, 0, 3000), (3, 1, 4000), (2, 0, 5000);
Affected Rows: 5
SELECT MIN(i), MAX(i) FROM integers;
+-----------------+-----------------+
| min(integers.i) | max(integers.i) |
+-----------------+-----------------+
| 1 | 5 |
+-----------------+-----------------+
SELECT g, MIN(i), MAX(i) FROM integers GROUP BY g ORDER BY g;
+---+-----------------+-----------------+
| g | min(integers.i) | max(integers.i) |
+---+-----------------+-----------------+
| 0 | 1 | 2 |
| 1 | 3 | 5 |
+---+-----------------+-----------------+
-- Test with doubles
CREATE TABLE doubles(d DOUBLE, g INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO doubles VALUES (1.5, 0, 1000), (5.5, 1, 2000), (NULL, 0, 3000), (3.5, 1, 4000), (2.5, 0, 5000);
Affected Rows: 5
SELECT MIN(d), MAX(d) FROM doubles;
+----------------+----------------+
| min(doubles.d) | max(doubles.d) |
+----------------+----------------+
| 1.5 | 5.5 |
+----------------+----------------+
SELECT g, MIN(d), MAX(d) FROM doubles GROUP BY g ORDER BY g;
+---+----------------+----------------+
| g | min(doubles.d) | max(doubles.d) |
+---+----------------+----------------+
| 0 | 1.5 | 2.5 |
| 1 | 3.5 | 5.5 |
+---+----------------+----------------+
-- Test with booleans
CREATE TABLE booleans(b BOOLEAN, g INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO booleans VALUES (false, 0, 1000), (true, 1, 2000), (NULL, 0, 3000), (false, 1, 4000);
Affected Rows: 4
SELECT COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans;
+----------+-------------------+-----------------+-----------------+
| count(*) | count(booleans.b) | min(booleans.b) | max(booleans.b) |
+----------+-------------------+-----------------+-----------------+
| 4 | 3 | false | true |
+----------+-------------------+-----------------+-----------------+
SELECT COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans WHERE b IS NULL;
+----------+-------------------+-----------------+-----------------+
| count(*) | count(booleans.b) | min(booleans.b) | max(booleans.b) |
+----------+-------------------+-----------------+-----------------+
| 1 | 0 | | |
+----------+-------------------+-----------------+-----------------+
SELECT g, COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans GROUP BY g ORDER BY g;
+---+----------+-------------------+-----------------+-----------------+
| g | count(*) | count(booleans.b) | min(booleans.b) | max(booleans.b) |
+---+----------+-------------------+-----------------+-----------------+
| 0 | 2 | 1 | false | false |
| 1 | 2 | 2 | false | true |
+---+----------+-------------------+-----------------+-----------------+
-- cleanup
DROP TABLE strings;
Affected Rows: 0
DROP TABLE integers;
Affected Rows: 0
DROP TABLE doubles;
Affected Rows: 0
DROP TABLE booleans;
Affected Rows: 0

View File

@@ -0,0 +1,56 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_aggregate_types.test
-- Test MIN/MAX aggregate functions
-- Test with strings
CREATE TABLE strings(s STRING, g INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO strings VALUES ('hello', 0, 1000), ('world', 1, 2000), (NULL, 0, 3000), ('r', 1, 4000);
-- simple aggregates only
SELECT COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings;
SELECT COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings WHERE s IS NULL;
-- grouped aggregates
SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings GROUP BY g ORDER BY g;
-- empty group
SELECT g, COUNT(*), COUNT(s), MIN(s), MAX(s) FROM strings WHERE s IS NULL OR s <> 'hello' GROUP BY g ORDER BY g;
-- Test with integers
CREATE TABLE integers(i INTEGER, g INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO integers VALUES (1, 0, 1000), (5, 1, 2000), (NULL, 0, 3000), (3, 1, 4000), (2, 0, 5000);
SELECT MIN(i), MAX(i) FROM integers;
SELECT g, MIN(i), MAX(i) FROM integers GROUP BY g ORDER BY g;
-- Test with doubles
CREATE TABLE doubles(d DOUBLE, g INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO doubles VALUES (1.5, 0, 1000), (5.5, 1, 2000), (NULL, 0, 3000), (3.5, 1, 4000), (2.5, 0, 5000);
SELECT MIN(d), MAX(d) FROM doubles;
SELECT g, MIN(d), MAX(d) FROM doubles GROUP BY g ORDER BY g;
-- Test with booleans
CREATE TABLE booleans(b BOOLEAN, g INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO booleans VALUES (false, 0, 1000), (true, 1, 2000), (NULL, 0, 3000), (false, 1, 4000);
SELECT COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans;
SELECT COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans WHERE b IS NULL;
SELECT g, COUNT(*), COUNT(b), MIN(b), MAX(b) FROM booleans GROUP BY g ORDER BY g;
-- cleanup
DROP TABLE strings;
DROP TABLE integers;
DROP TABLE doubles;
DROP TABLE booleans;

View File

@@ -0,0 +1,120 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_regression.test
-- Test REGRESSION functions
-- Test REGR_SLOPE, REGR_INTERCEPT, REGR_R2
CREATE TABLE regr_test(x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
-- Linear relationship: y = 2x + 1
INSERT INTO regr_test VALUES
(1.0, 3.0, 1000), (2.0, 5.0, 2000), (3.0, 7.0, 3000),
(4.0, 9.0, 4000), (5.0, 11.0, 5000);
Affected Rows: 5
-- Test regression slope (should be close to 2)
SELECT REGR_SLOPE(y, x) FROM regr_test;
+-------------------------------------+
| regr_slope(regr_test.y,regr_test.x) |
+-------------------------------------+
| 2.0 |
+-------------------------------------+
-- Test regression intercept (should be close to 1)
SELECT REGR_INTERCEPT(y, x) FROM regr_test;
+-----------------------------------------+
| regr_intercept(regr_test.y,regr_test.x) |
+-----------------------------------------+
| 1.0 |
+-----------------------------------------+
-- Test R-squared (should be close to 1 for perfect fit)
SELECT REGR_R2(y, x) FROM regr_test;
+----------------------------------+
| regr_r2(regr_test.y,regr_test.x) |
+----------------------------------+
| 1.0 |
+----------------------------------+
-- Test REGR_COUNT (number of non-null pairs)
SELECT REGR_COUNT(y, x) FROM regr_test;
+-------------------------------------+
| regr_count(regr_test.y,regr_test.x) |
+-------------------------------------+
| 5 |
+-------------------------------------+
-- Test REGR_SXX, REGR_SYY, REGR_SXY
SELECT REGR_SXX(y, x), REGR_SYY(y, x), REGR_SXY(y, x) FROM regr_test;
+-----------------------------------+-----------------------------------+-----------------------------------+
| regr_sxx(regr_test.y,regr_test.x) | regr_syy(regr_test.y,regr_test.x) | regr_sxy(regr_test.y,regr_test.x) |
+-----------------------------------+-----------------------------------+-----------------------------------+
| 10.0 | 40.0 | 20.0 |
+-----------------------------------+-----------------------------------+-----------------------------------+
-- Test REGR_AVGX, REGR_AVGY
SELECT REGR_AVGX(y, x), REGR_AVGY(y, x) FROM regr_test;
+------------------------------------+------------------------------------+
| regr_avgx(regr_test.y,regr_test.x) | regr_avgy(regr_test.y,regr_test.x) |
+------------------------------------+------------------------------------+
| 3.0 | 7.0 |
+------------------------------------+------------------------------------+
-- Test with noisy data
CREATE TABLE regr_noisy(x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO regr_noisy VALUES
(1.0, 3.1, 1000), (2.0, 4.9, 2000), (3.0, 7.2, 3000),
(4.0, 8.8, 4000), (5.0, 11.1, 5000);
Affected Rows: 5
SELECT REGR_SLOPE(y, x), REGR_INTERCEPT(y, x), REGR_R2(y, x) FROM regr_noisy;
+---------------------------------------+-------------------------------------------+------------------------------------+
| regr_slope(regr_noisy.y,regr_noisy.x) | regr_intercept(regr_noisy.y,regr_noisy.x) | regr_r2(regr_noisy.y,regr_noisy.x) |
+---------------------------------------+-------------------------------------------+------------------------------------+
| 1.9900000000000002 | 1.049999999999998 | 0.9973053289009772 |
+---------------------------------------+-------------------------------------------+------------------------------------+
-- Test with groups
CREATE TABLE regr_groups(grp INTEGER, x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO regr_groups VALUES
(1, 1.0, 3.0, 1000), (1, 2.0, 5.0, 2000), (1, 3.0, 7.0, 3000),
(2, 1.0, 2.0, 4000), (2, 2.0, 4.0, 5000), (2, 3.0, 6.0, 6000);
Affected Rows: 6
SELECT grp, REGR_SLOPE(y, x), REGR_INTERCEPT(y, x), REGR_R2(y, x)
FROM regr_groups GROUP BY grp ORDER BY grp;
+-----+-----------------------------------------+---------------------------------------------+--------------------------------------+
| grp | regr_slope(regr_groups.y,regr_groups.x) | regr_intercept(regr_groups.y,regr_groups.x) | regr_r2(regr_groups.y,regr_groups.x) |
+-----+-----------------------------------------+---------------------------------------------+--------------------------------------+
| 1 | 2.0 | 1.0 | 1.0 |
| 2 | 2.0 | 0.0 | 1.0 |
+-----+-----------------------------------------+---------------------------------------------+--------------------------------------+
DROP TABLE regr_test;
Affected Rows: 0
DROP TABLE regr_noisy;
Affected Rows: 0
DROP TABLE regr_groups;
Affected Rows: 0

View File

@@ -0,0 +1,53 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_regression.test
-- Test REGRESSION functions
-- Test REGR_SLOPE, REGR_INTERCEPT, REGR_R2
CREATE TABLE regr_test(x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX);
-- Linear relationship: y = 2x + 1
INSERT INTO regr_test VALUES
(1.0, 3.0, 1000), (2.0, 5.0, 2000), (3.0, 7.0, 3000),
(4.0, 9.0, 4000), (5.0, 11.0, 5000);
-- Test regression slope (should be close to 2)
SELECT REGR_SLOPE(y, x) FROM regr_test;
-- Test regression intercept (should be close to 1)
SELECT REGR_INTERCEPT(y, x) FROM regr_test;
-- Test R-squared (should be close to 1 for perfect fit)
SELECT REGR_R2(y, x) FROM regr_test;
-- Test REGR_COUNT (number of non-null pairs)
SELECT REGR_COUNT(y, x) FROM regr_test;
-- Test REGR_SXX, REGR_SYY, REGR_SXY
SELECT REGR_SXX(y, x), REGR_SYY(y, x), REGR_SXY(y, x) FROM regr_test;
-- Test REGR_AVGX, REGR_AVGY
SELECT REGR_AVGX(y, x), REGR_AVGY(y, x) FROM regr_test;
-- Test with noisy data
CREATE TABLE regr_noisy(x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX);
INSERT INTO regr_noisy VALUES
(1.0, 3.1, 1000), (2.0, 4.9, 2000), (3.0, 7.2, 3000),
(4.0, 8.8, 4000), (5.0, 11.1, 5000);
SELECT REGR_SLOPE(y, x), REGR_INTERCEPT(y, x), REGR_R2(y, x) FROM regr_noisy;
-- Test with groups
CREATE TABLE regr_groups(grp INTEGER, x DOUBLE, y DOUBLE, ts TIMESTAMP TIME INDEX);
INSERT INTO regr_groups VALUES
(1, 1.0, 3.0, 1000), (1, 2.0, 5.0, 2000), (1, 3.0, 7.0, 3000),
(2, 1.0, 2.0, 4000), (2, 2.0, 4.0, 5000), (2, 3.0, 6.0, 6000);
SELECT grp, REGR_SLOPE(y, x), REGR_INTERCEPT(y, x), REGR_R2(y, x)
FROM regr_groups GROUP BY grp ORDER BY grp;
DROP TABLE regr_test;
DROP TABLE regr_noisy;
DROP TABLE regr_groups;

View File

@@ -0,0 +1,175 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_stddev.test
-- Test STDDEV aggregations
CREATE TABLE stddev_test(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO stddev_test VALUES (42, 1, 1000), (43, 1, 2000), (42, 2, 3000), (1000, 2, 4000), (NULL, 1, 5000), (NULL, 3, 6000);
Affected Rows: 6
SELECT stddev_samp(1);
+------------------+
| stddev(Int64(1)) |
+------------------+
| |
+------------------+
SELECT var_samp(1);
+---------------+
| var(Int64(1)) |
+---------------+
| |
+---------------+
-- stddev_samp
SELECT round(stddev_samp(val), 1) FROM stddev_test;
+-----------------------------------------+
| round(stddev(stddev_test.val),Int64(1)) |
+-----------------------------------------+
| 478.8 |
+-----------------------------------------+
SELECT round(stddev_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL;
+-----------------------------------------+
| round(stddev(stddev_test.val),Int64(1)) |
+-----------------------------------------+
| 478.8 |
+-----------------------------------------+
SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp;
+-----+----------------------+-----------------------------------------+----------------------+
| grp | sum(stddev_test.val) | round(stddev(stddev_test.val),Int64(1)) | min(stddev_test.val) |
+-----+----------------------+-----------------------------------------+----------------------+
| 1 | 85 | 0.7 | 42 |
| 2 | 1042 | 677.4 | 42 |
| 3 | | | |
+-----+----------------------+-----------------------------------------+----------------------+
SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp;
+-----+----------------------+-----------------------------------------+----------------------+
| grp | sum(stddev_test.val) | round(stddev(stddev_test.val),Int64(1)) | min(stddev_test.val) |
+-----+----------------------+-----------------------------------------+----------------------+
| 1 | 85 | 0.7 | 42 |
| 2 | 1042 | 677.4 | 42 |
+-----+----------------------+-----------------------------------------+----------------------+
-- stddev_pop
SELECT round(stddev_pop(val), 1) FROM stddev_test;
+---------------------------------------------+
| round(stddev_pop(stddev_test.val),Int64(1)) |
+---------------------------------------------+
| 414.7 |
+---------------------------------------------+
SELECT round(stddev_pop(val), 1) FROM stddev_test WHERE val IS NOT NULL;
+---------------------------------------------+
| round(stddev_pop(stddev_test.val),Int64(1)) |
+---------------------------------------------+
| 414.7 |
+---------------------------------------------+
SELECT grp, sum(val), round(stddev_pop(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp;
+-----+----------------------+---------------------------------------------+----------------------+
| grp | sum(stddev_test.val) | round(stddev_pop(stddev_test.val),Int64(1)) | min(stddev_test.val) |
+-----+----------------------+---------------------------------------------+----------------------+
| 1 | 85 | 0.5 | 42 |
| 2 | 1042 | 479.0 | 42 |
| 3 | | | |
+-----+----------------------+---------------------------------------------+----------------------+
SELECT grp, sum(val), round(stddev_pop(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp;
+-----+----------------------+---------------------------------------------+----------------------+
| grp | sum(stddev_test.val) | round(stddev_pop(stddev_test.val),Int64(1)) | min(stddev_test.val) |
+-----+----------------------+---------------------------------------------+----------------------+
| 1 | 85 | 0.5 | 42 |
| 2 | 1042 | 479.0 | 42 |
+-----+----------------------+---------------------------------------------+----------------------+
-- var_samp
SELECT round(var_samp(val), 1) FROM stddev_test;
+--------------------------------------+
| round(var(stddev_test.val),Int64(1)) |
+--------------------------------------+
| 229281.6 |
+--------------------------------------+
SELECT round(var_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL;
+--------------------------------------+
| round(var(stddev_test.val),Int64(1)) |
+--------------------------------------+
| 229281.6 |
+--------------------------------------+
SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp;
+-----+----------------------+--------------------------------------+----------------------+
| grp | sum(stddev_test.val) | round(var(stddev_test.val),Int64(1)) | min(stddev_test.val) |
+-----+----------------------+--------------------------------------+----------------------+
| 1 | 85 | 0.5 | 42 |
| 2 | 1042 | 458882.0 | 42 |
| 3 | | | |
+-----+----------------------+--------------------------------------+----------------------+
SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp;
+-----+----------------------+--------------------------------------+----------------------+
| grp | sum(stddev_test.val) | round(var(stddev_test.val),Int64(1)) | min(stddev_test.val) |
+-----+----------------------+--------------------------------------+----------------------+
| 1 | 85 | 0.5 | 42 |
| 2 | 1042 | 458882.0 | 42 |
+-----+----------------------+--------------------------------------+----------------------+
-- var_pop
SELECT round(var_pop(val), 1) FROM stddev_test;
+------------------------------------------+
| round(var_pop(stddev_test.val),Int64(1)) |
+------------------------------------------+
| 171961.2 |
+------------------------------------------+
SELECT round(var_pop(val), 1) FROM stddev_test WHERE val IS NOT NULL;
+------------------------------------------+
| round(var_pop(stddev_test.val),Int64(1)) |
+------------------------------------------+
| 171961.2 |
+------------------------------------------+
SELECT grp, sum(val), round(var_pop(val), 2), min(val) FROM stddev_test GROUP BY grp ORDER BY grp;
+-----+----------------------+------------------------------------------+----------------------+
| grp | sum(stddev_test.val) | round(var_pop(stddev_test.val),Int64(2)) | min(stddev_test.val) |
+-----+----------------------+------------------------------------------+----------------------+
| 1 | 85 | 0.25 | 42 |
| 2 | 1042 | 229441.0 | 42 |
| 3 | | | |
+-----+----------------------+------------------------------------------+----------------------+
SELECT grp, sum(val), round(var_pop(val), 2), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp;
+-----+----------------------+------------------------------------------+----------------------+
| grp | sum(stddev_test.val) | round(var_pop(stddev_test.val),Int64(2)) | min(stddev_test.val) |
+-----+----------------------+------------------------------------------+----------------------+
| 1 | 85 | 0.25 | 42 |
| 2 | 1042 | 229441.0 | 42 |
+-----+----------------------+------------------------------------------+----------------------+
-- cleanup
DROP TABLE stddev_test;
Affected Rows: 0

View File

@@ -0,0 +1,49 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_stddev.test
-- Test STDDEV aggregations
CREATE TABLE stddev_test(val INTEGER, grp INTEGER, ts TIMESTAMP TIME INDEX);
INSERT INTO stddev_test VALUES (42, 1, 1000), (43, 1, 2000), (42, 2, 3000), (1000, 2, 4000), (NULL, 1, 5000), (NULL, 3, 6000);
SELECT stddev_samp(1);
SELECT var_samp(1);
-- stddev_samp
SELECT round(stddev_samp(val), 1) FROM stddev_test;
SELECT round(stddev_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL;
SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp;
SELECT grp, sum(val), round(stddev_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp;
-- stddev_pop
SELECT round(stddev_pop(val), 1) FROM stddev_test;
SELECT round(stddev_pop(val), 1) FROM stddev_test WHERE val IS NOT NULL;
SELECT grp, sum(val), round(stddev_pop(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp;
SELECT grp, sum(val), round(stddev_pop(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp;
-- var_samp
SELECT round(var_samp(val), 1) FROM stddev_test;
SELECT round(var_samp(val), 1) FROM stddev_test WHERE val IS NOT NULL;
SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test GROUP BY grp ORDER BY grp;
SELECT grp, sum(val), round(var_samp(val), 1), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp;
-- var_pop
SELECT round(var_pop(val), 1) FROM stddev_test;
SELECT round(var_pop(val), 1) FROM stddev_test WHERE val IS NOT NULL;
SELECT grp, sum(val), round(var_pop(val), 2), min(val) FROM stddev_test GROUP BY grp ORDER BY grp;
SELECT grp, sum(val), round(var_pop(val), 2), min(val) FROM stddev_test WHERE val IS NOT NULL GROUP BY grp ORDER BY grp;
-- cleanup
DROP TABLE stddev_test;

View File

@@ -0,0 +1,96 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_string_agg.test
-- Test STRING_AGG operator
-- test string aggregation on scalar values
SELECT STRING_AGG('a',',');
+---------------------------------+
| string_agg(Utf8("a"),Utf8(",")) |
+---------------------------------+
| a |
+---------------------------------+
-- test string aggregation on scalar values with NULL
SELECT STRING_AGG('a',','), STRING_AGG(NULL,','), STRING_AGG('a', NULL), STRING_AGG(NULL,NULL);
+---------------------------------+----------------------------+----------------------------+-----------------------+
| string_agg(Utf8("a"),Utf8(",")) | string_agg(NULL,Utf8(",")) | string_agg(Utf8("a"),NULL) | string_agg(NULL,NULL) |
+---------------------------------+----------------------------+----------------------------+-----------------------+
| a | | a | |
+---------------------------------+----------------------------+----------------------------+-----------------------+
-- test string aggregation on a set of values
CREATE TABLE strings(g INTEGER, x VARCHAR, y VARCHAR, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
INSERT INTO strings VALUES
(1,'a','/', 1000), (1,'b','-', 2000),
(2,'i','/', 3000), (2,NULL,'-', 4000), (2,'j','+', 5000),
(3,'p','/', 6000),
(4,'x','/', 7000), (4,'y','-', 8000), (4,'z','+', 9000);
Affected Rows: 9
SELECT g, STRING_AGG(x,'|') FROM strings GROUP BY g ORDER BY g;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1
-- test agg on empty set
SELECT STRING_AGG(x,',') FROM strings WHERE g > 100;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0
-- string_agg can be used instead of group_concat
SELECT string_agg('a', ',');
+---------------------------------+
| string_agg(Utf8("a"),Utf8(",")) |
+---------------------------------+
| a |
+---------------------------------+
SELECT string_agg('a', ',');
+---------------------------------+
| string_agg(Utf8("a"),Utf8(",")) |
+---------------------------------+
| a |
+---------------------------------+
SELECT g, string_agg(x, ',') FROM strings GROUP BY g ORDER BY g;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1
-- Test ORDER BY
-- Single group
SELECT STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0
SELECT STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0
-- Grouped with ORDER BY
SELECT g, STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings GROUP BY g ORDER BY g;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1
SELECT g, STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings GROUP BY g ORDER BY g;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1
-- Test with DISTINCT
SELECT STRING_AGG(DISTINCT x, '' ORDER BY x), STRING_AGG(DISTINCT x, '|' ORDER BY x) FROM strings;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0
SELECT g, STRING_AGG(DISTINCT x, '' ORDER BY x) FROM strings GROUP BY g ORDER BY g;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1
-- cleanup
DROP TABLE strings;
Affected Rows: 0

View File

@@ -0,0 +1,48 @@
-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_string_agg.test
-- Test STRING_AGG operator
-- test string aggregation on scalar values
SELECT STRING_AGG('a',',');
-- test string aggregation on scalar values with NULL
SELECT STRING_AGG('a',','), STRING_AGG(NULL,','), STRING_AGG('a', NULL), STRING_AGG(NULL,NULL);
-- test string aggregation on a set of values
CREATE TABLE strings(g INTEGER, x VARCHAR, y VARCHAR, ts TIMESTAMP TIME INDEX);
INSERT INTO strings VALUES
(1,'a','/', 1000), (1,'b','-', 2000),
(2,'i','/', 3000), (2,NULL,'-', 4000), (2,'j','+', 5000),
(3,'p','/', 6000),
(4,'x','/', 7000), (4,'y','-', 8000), (4,'z','+', 9000);
SELECT g, STRING_AGG(x,'|') FROM strings GROUP BY g ORDER BY g;
-- test agg on empty set
SELECT STRING_AGG(x,',') FROM strings WHERE g > 100;
-- string_agg can be used instead of group_concat
SELECT string_agg('a', ',');
SELECT string_agg('a', ',');
SELECT g, string_agg(x, ',') FROM strings GROUP BY g ORDER BY g;
-- Test ORDER BY
-- Single group
SELECT STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings;
SELECT STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings;
-- Grouped with ORDER BY
SELECT g, STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings GROUP BY g ORDER BY g;
SELECT g, STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings GROUP BY g ORDER BY g;
-- Test with DISTINCT
SELECT STRING_AGG(DISTINCT x, '' ORDER BY x), STRING_AGG(DISTINCT x, '|' ORDER BY x) FROM strings;
SELECT g, STRING_AGG(DISTINCT x, '' ORDER BY x) FROM strings GROUP BY g ORDER BY g;
-- cleanup
DROP TABLE strings;

View File

@@ -41,4 +41,5 @@ SELECT uddsketch_calc(0.1, uddsketch_merge(128, 0.1, `state`)) FROM grouped_udds
SELECT uddsketch_calc(0.1, uddsketch_merge(64, 0.01, `state`)) FROM grouped_uddsketch;
drop table test_uddsketch;
drop table grouped_uddsketch;