feat: adds regex_extract function and more type tests (#7107)

* feat: adds format, regex_extract function and more type tests

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* fix: forgot functions

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* chore: forgot null type

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* test: forgot date type

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* feat: remove format function

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* test: update results after upgrading datafusion

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

---------

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>
This commit is contained in:
dennis zhuang
2025-10-25 16:41:49 +08:00
committed by GitHub
parent 7da2f5ed12
commit d8563ba56d
58 changed files with 6502 additions and 15 deletions

View File

@@ -0,0 +1,93 @@
-- Migrated from DuckDB test: test/sql/sample/same_seed_same_sample.test
-- FIXME: the results are wrong in this test, waits for https://github.com/apache/datafusion/pull/16325
-- Test basic SAMPLE functionality
-- Create test table
CREATE TABLE test(x INTEGER, ts TIMESTAMP TIME INDEX);
Affected Rows: 0
-- Insert test data
INSERT INTO test SELECT number, number * 1000 FROM numbers limit 10000;
Affected Rows: 10000
-- Test TABLESAMPLE with percentage
SELECT COUNT(*) > 0 FROM test TABLESAMPLE (10 PERCENT);
+---------------------+
| count(*) > Int64(0) |
+---------------------+
| true |
+---------------------+
-- Test TABLESAMPLE with row count
SELECT COUNT(*) FROM test TABLESAMPLE (100 ROWS);
+----------+
| count(*) |
+----------+
| 10000 |
+----------+
-- Test TABLESAMPLE SYSTEM
SELECT COUNT(*) > 0 FROM test TABLESAMPLE SYSTEM (25 PERCENT);
+---------------------+
| count(*) > Int64(0) |
+---------------------+
| true |
+---------------------+
-- Test TABLESAMPLE BERNOULLI
SELECT COUNT(*) > 0 FROM test TABLESAMPLE BERNOULLI (25 PERCENT);
+---------------------+
| count(*) > Int64(0) |
+---------------------+
| true |
+---------------------+
-- Test with REPEATABLE for consistent results
SELECT COUNT(*) AS cnt1 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42);
+-------+
| cnt1 |
+-------+
| 10000 |
+-------+
SELECT COUNT(*) AS cnt2 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42);
+-------+
| cnt2 |
+-------+
| 10000 |
+-------+
-- Test sampling with WHERE clause
SELECT COUNT(*) FROM test TABLESAMPLE (10 PERCENT) WHERE x > 5000;
+----------+
| count(*) |
+----------+
| 4999 |
+----------+
-- Test sampling with ORDER BY
SELECT x FROM test TABLESAMPLE (5 ROWS) ORDER BY x LIMIT 5;
+---+
| x |
+---+
| 0 |
| 1 |
| 2 |
| 3 |
| 4 |
+---+
-- cleanup
DROP TABLE test;
Affected Rows: 0

View File

@@ -0,0 +1,35 @@
-- Migrated from DuckDB test: test/sql/sample/same_seed_same_sample.test
-- FIXME: the results are wrong in this test, waits for https://github.com/apache/datafusion/pull/16325
-- Test basic SAMPLE functionality
-- Create test table
CREATE TABLE test(x INTEGER, ts TIMESTAMP TIME INDEX);
-- Insert test data
INSERT INTO test SELECT number, number * 1000 FROM numbers limit 10000;
-- Test TABLESAMPLE with percentage
SELECT COUNT(*) > 0 FROM test TABLESAMPLE (10 PERCENT);
-- Test TABLESAMPLE with row count
SELECT COUNT(*) FROM test TABLESAMPLE (100 ROWS);
-- Test TABLESAMPLE SYSTEM
SELECT COUNT(*) > 0 FROM test TABLESAMPLE SYSTEM (25 PERCENT);
-- Test TABLESAMPLE BERNOULLI
SELECT COUNT(*) > 0 FROM test TABLESAMPLE BERNOULLI (25 PERCENT);
-- Test with REPEATABLE for consistent results
SELECT COUNT(*) AS cnt1 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42);
SELECT COUNT(*) AS cnt2 FROM test TABLESAMPLE SYSTEM (10 PERCENT) REPEATABLE (42);
-- Test sampling with WHERE clause
SELECT COUNT(*) FROM test TABLESAMPLE (10 PERCENT) WHERE x > 5000;
-- Test sampling with ORDER BY
SELECT x FROM test TABLESAMPLE (5 ROWS) ORDER BY x LIMIT 5;
-- cleanup
DROP TABLE test;