feat: supports large string (#7097)

* feat: supports large string

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* chore: add doc for extract_string_vector_values

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* chore: refactor by cr comments

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* chore: changes by cr comments

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* refactor: extract_string_vector_values

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* feat: remove large string type and refactor string vector

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* chore: revert some changes

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* feat: adds large string type

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* chore: impl default for StringSizeType

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* fix: tests and test compatibility

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* test: update sqlness tests

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* chore: remove panic

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

---------

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
dennis zhuang
2025-10-17 09:46:11 +08:00
committed by GitHub
parent cf1b8392af
commit 8a2371a05c
24 changed files with 571 additions and 117 deletions

View File

@@ -33,12 +33,23 @@ Affected Rows: 9
SELECT g, STRING_AGG(x,'|') FROM strings GROUP BY g ORDER BY g;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1
+---+---------------------------------+
| g | string_agg(strings.x,Utf8("|")) |
+---+---------------------------------+
| 1 | a|b |
| 2 | i|j |
| 3 | p |
| 4 | x|y|z |
+---+---------------------------------+
-- test agg on empty set
SELECT STRING_AGG(x,',') FROM strings WHERE g > 100;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0
+---------------------------------+
| string_agg(strings.x,Utf8(",")) |
+---------------------------------+
| |
+---------------------------------+
-- string_agg can be used instead of group_concat
SELECT string_agg('a', ',');
@@ -59,35 +70,75 @@ SELECT string_agg('a', ',');
SELECT g, string_agg(x, ',') FROM strings GROUP BY g ORDER BY g;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1
+---+---------------------------------+
| g | string_agg(strings.x,Utf8(",")) |
+---+---------------------------------+
| 1 | a,b |
| 2 | i,j |
| 3 | p |
| 4 | x,y,z |
+---+---------------------------------+
-- Test ORDER BY
-- Single group
SELECT STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0
+--------------------------------------------------------------------+---------------------------------------------------------------------+
| string_agg(strings.x,Utf8("")) ORDER BY [strings.x ASC NULLS LAST] | string_agg(strings.x,Utf8("|")) ORDER BY [strings.x ASC NULLS LAST] |
+--------------------------------------------------------------------+---------------------------------------------------------------------+
| abijpxyz | a|b|i|j|p|x|y|z |
+--------------------------------------------------------------------+---------------------------------------------------------------------+
SELECT STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0
+----------------------------------------------------------------------+-----------------------------------------------------------------------+
| string_agg(strings.x,Utf8("")) ORDER BY [strings.x DESC NULLS FIRST] | string_agg(strings.x,Utf8("|")) ORDER BY [strings.x DESC NULLS FIRST] |
+----------------------------------------------------------------------+-----------------------------------------------------------------------+
| zyxpjiba | z|y|x|p|j|i|b|a |
+----------------------------------------------------------------------+-----------------------------------------------------------------------+
-- Grouped with ORDER BY
SELECT g, STRING_AGG(x, '' ORDER BY x ASC), STRING_AGG(x, '|' ORDER BY x ASC) FROM strings GROUP BY g ORDER BY g;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1
+---+--------------------------------------------------------------------+---------------------------------------------------------------------+
| g | string_agg(strings.x,Utf8("")) ORDER BY [strings.x ASC NULLS LAST] | string_agg(strings.x,Utf8("|")) ORDER BY [strings.x ASC NULLS LAST] |
+---+--------------------------------------------------------------------+---------------------------------------------------------------------+
| 1 | ab | a|b |
| 2 | ij | i|j |
| 3 | p | p |
| 4 | xyz | x|y|z |
+---+--------------------------------------------------------------------+---------------------------------------------------------------------+
SELECT g, STRING_AGG(x, '' ORDER BY x DESC), STRING_AGG(x,'|' ORDER BY x DESC) FROM strings GROUP BY g ORDER BY g;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1
+---+----------------------------------------------------------------------+-----------------------------------------------------------------------+
| g | string_agg(strings.x,Utf8("")) ORDER BY [strings.x DESC NULLS FIRST] | string_agg(strings.x,Utf8("|")) ORDER BY [strings.x DESC NULLS FIRST] |
+---+----------------------------------------------------------------------+-----------------------------------------------------------------------+
| 1 | ba | b|a |
| 2 | ji | j|i |
| 3 | p | p |
| 4 | zyx | z|y|x |
+---+----------------------------------------------------------------------+-----------------------------------------------------------------------+
-- Test with DISTINCT
SELECT STRING_AGG(DISTINCT x, '' ORDER BY x), STRING_AGG(DISTINCT x, '|' ORDER BY x) FROM strings;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 0
+-----------------------------------------------------------------------------+------------------------------------------------------------------------------+
| string_agg(DISTINCT strings.x,Utf8("")) ORDER BY [strings.x ASC NULLS LAST] | string_agg(DISTINCT strings.x,Utf8("|")) ORDER BY [strings.x ASC NULLS LAST] |
+-----------------------------------------------------------------------------+------------------------------------------------------------------------------+
| abijpxyz | a|b|i|j|p|x|y|z |
+-----------------------------------------------------------------------------+------------------------------------------------------------------------------+
SELECT g, STRING_AGG(DISTINCT x, '' ORDER BY x) FROM strings GROUP BY g ORDER BY g;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected LargeUtf8 but found Utf8 at column index 1
+---+-----------------------------------------------------------------------------+
| g | string_agg(DISTINCT strings.x,Utf8("")) ORDER BY [strings.x ASC NULLS LAST] |
+---+-----------------------------------------------------------------------------+
| 1 | ab |
| 2 | ij |
| 3 | p |
| 4 | xyz |
+---+-----------------------------------------------------------------------------+
-- cleanup
DROP TABLE strings;

View File

@@ -36,12 +36,12 @@ Affected Rows: 0
-- create logical table with different data type on field column
CREATE TABLE t3 (ts timestamp time index, val string, host string, primary key (host)) engine=metric with ("on_physical_table" = "phy");
Error: 1004(InvalidArguments), Column type mismatch. Expect Float64(Float64Type), got String(StringType)
Error: 1004(InvalidArguments), Column type mismatch. Expect Float64(Float64Type), got String(StringType { size_type: Utf8 })
-- create logical table with different data type on tag column
CREATE TABLE t4 (ts timestamp time index, val double, host double, primary key (host)) engine=metric with ("on_physical_table" = "phy");
Error: 1004(InvalidArguments), Column type mismatch. Expect String(StringType), got Float64(Float64Type)
Error: 1004(InvalidArguments), Column type mismatch. Expect String(StringType { size_type: Utf8 }), got Float64(Float64Type)
-- create logical table with different column name on field column
CREATE TABLE t5 (ts timestamp time index, valval double, host string primary key) engine = metric with ("on_physical_table" = "phy");

View File

@@ -8,7 +8,7 @@ Affected Rows: 1
INSERT INTO strings VALUES (3, 4);
Error: 2000(InvalidSyntax), Failed to parse value: Fail to parse number 3, invalid column type: String(StringType)
Error: 2000(InvalidSyntax), Failed to parse value: Fail to parse number 3, invalid column type: String(StringType { size_type: Utf8 })
SELECT * FROM strings WHERE i = 'â‚(';