-- Migrated from DuckDB test: test/sql/aggregate/aggregates/test_approximate_distinct_count.test
-- Test approx_distinct function
-- Basic tests
SELECT APPROX_DISTINCT(1);

+---------------------------+
| approx_distinct(Int64(1)) |
+---------------------------+
| 1                         |
+---------------------------+

-- FIXME(dennis): This feature is not implemented: Support for 'approx_distinct' for data type Null is not implemented
-- SELECT APPROX_DISTINCT(NULL);
SELECT APPROX_DISTINCT('hello');

+--------------------------------+
| approx_distinct(Utf8("hello")) |
+--------------------------------+
| 1                              |
+--------------------------------+

-- Test with range data
SELECT APPROX_DISTINCT(10), APPROX_DISTINCT('hello') FROM numbers LIMIT 100;

+----------------------------+--------------------------------+
| approx_distinct(Int64(10)) | approx_distinct(Utf8("hello")) |
+----------------------------+--------------------------------+
| 1                          | 1                              |
+----------------------------+--------------------------------+

SELECT APPROX_DISTINCT(number) FROM numbers WHERE 1 = 0 LIMIT 100 ;

+---------------------------------+
| approx_distinct(numbers.number) |
+---------------------------------+
| 0                               |
+---------------------------------+

-- Test with different data types
CREATE TABLE dates_test(t DATE, ts TIMESTAMP TIME INDEX);

Affected Rows: 0

INSERT INTO dates_test VALUES
    ('2008-01-01', 1000), (NULL, 2000), ('2007-01-01', 3000),
    ('2008-02-01', 4000), ('2008-01-02', 5000), ('2008-01-01', 6000),
    ('2008-01-01', 7000), ('2008-01-01', 8000);

Affected Rows: 8

-- FIXME(dennis): This feature is not implemented: Support for 'approx_distinct' for data type Date32 is not implemented
-- SELECT APPROX_DISTINCT(t) FROM dates_test;
DROP TABLE dates_test;

Affected Rows: 0

CREATE TABLE names_test(t VARCHAR, ts TIMESTAMP TIME INDEX);

Affected Rows: 0

INSERT INTO names_test VALUES
    ('Pedro', 1000), (NULL, 2000), ('Pedro', 3000), ('Pedro', 4000),
    ('Mark', 5000), ('Mark', 6000), ('Mark', 7000),
    ('Hannes-Muehleisen', 8000), ('Hannes-Muehleisen', 9000);

Affected Rows: 9

SELECT APPROX_DISTINCT(t) FROM names_test;

+-------------------------------+
| approx_distinct(names_test.t) |
+-------------------------------+
| 3                             |
+-------------------------------+

DROP TABLE names_test;

Affected Rows: 0

-- Test with large dataset
CREATE TABLE large_test(a INTEGER, b INTEGER, ts TIMESTAMP TIME INDEX);

Affected Rows: 0

INSERT INTO large_test SELECT number, number % 10, number * 1000 FROM numbers LIMIT 2000;

Affected Rows: 2000

SELECT APPROX_DISTINCT(a), APPROX_DISTINCT(b) FROM large_test;

+-------------------------------+-------------------------------+
| approx_distinct(large_test.a) | approx_distinct(large_test.b) |
+-------------------------------+-------------------------------+
| 2000                          | 10                            |
+-------------------------------+-------------------------------+

-- Test with groups
SELECT b, APPROX_DISTINCT(a) FROM large_test GROUP BY b ORDER BY b;

+---+-------------------------------+
| b | approx_distinct(large_test.a) |
+---+-------------------------------+
| 0 | 200                           |
| 1 | 201                           |
| 2 | 201                           |
| 3 | 200                           |
| 4 | 199                           |
| 5 | 200                           |
| 6 | 199                           |
| 7 | 200                           |
| 8 | 200                           |
| 9 | 200                           |
+---+-------------------------------+

DROP TABLE large_test;

Affected Rows: 0