-- Migrated from DuckDB test: test/sql/function/string/test_substring.test -- Substring function tests CREATE TABLE strings(s VARCHAR, "off" INTEGER, length INTEGER, ts TIMESTAMP TIME INDEX); Affected Rows: 0 INSERT INTO strings VALUES ('hello', 1, 2, 1000), ('world', 2, 3, 2000), ('b', 1, 1, 3000), (NULL, 2, 2, 4000); Affected Rows: 4 -- Test zero length SELECT SUBSTRING('🦆ab', 1, 0), SUBSTRING('abc', 1, 0); +----------------------------------------+---------------------------------------+ | substr(Utf8("🦆ab"),Int64(1),Int64(0)) | substr(Utf8("abc"),Int64(1),Int64(0)) | +----------------------------------------+---------------------------------------+ | | | +----------------------------------------+---------------------------------------+ -- Normal substring with constant offset/length SELECT SUBSTRING(s, 1, 2) FROM strings ORDER BY ts; +-------------------------------------+ | substr(strings.s,Int64(1),Int64(2)) | +-------------------------------------+ | he | | wo | | b | | | +-------------------------------------+ -- Substring out of range SELECT SUBSTRING(s, 2, 2) FROM strings ORDER BY ts; +-------------------------------------+ | substr(strings.s,Int64(2),Int64(2)) | +-------------------------------------+ | el | | or | | | | | +-------------------------------------+ -- Variable length offset/length SELECT SUBSTRING(s, "off", "length") FROM strings ORDER BY ts; +----------------------------------------------+ | substr(strings.s,strings.off,strings.length) | +----------------------------------------------+ | he | | orl | | b | | | +----------------------------------------------+ SELECT SUBSTRING(s, "off", 2) FROM strings ORDER BY ts; +----------------------------------------+ | substr(strings.s,strings.off,Int64(2)) | +----------------------------------------+ | he | | or | | b | | | +----------------------------------------+ SELECT SUBSTRING(s, 1, length) FROM strings ORDER BY ts; +-------------------------------------------+ | substr(strings.s,Int64(1),strings.length) | +-------------------------------------------+ | he | | wor | | b | | | +-------------------------------------------+ SELECT SUBSTRING('hello', "off", length) FROM strings ORDER BY ts; +--------------------------------------------------+ | substr(Utf8("hello"),strings.off,strings.length) | +--------------------------------------------------+ | he | | ell | | h | | el | +--------------------------------------------------+ -- Test with NULL values SELECT SUBSTRING(NULL, "off", length) FROM strings ORDER BY ts; +-----------------------------------------+ | substr(NULL,strings.off,strings.length) | +-----------------------------------------+ | | | | | | | | +-----------------------------------------+ SELECT SUBSTRING(s, NULL, length) FROM strings ORDER BY ts; +---------------------------------------+ | substr(strings.s,NULL,strings.length) | +---------------------------------------+ | | | | | | | | +---------------------------------------+ SELECT SUBSTRING(s, "off", NULL) FROM strings ORDER BY ts; +------------------------------------+ | substr(strings.s,strings.off,NULL) | +------------------------------------+ | | | | | | | | +------------------------------------+ -- Test negative offsets SELECT SUBSTRING('hello', -1, 3); +------------------------------------------+ | substr(Utf8("hello"),Int64(-1),Int64(3)) | +------------------------------------------+ | h | +------------------------------------------+ SELECT SUBSTRING('hello', 0, 3); +-----------------------------------------+ | substr(Utf8("hello"),Int64(0),Int64(3)) | +-----------------------------------------+ | he | +-----------------------------------------+ -- Test with Unicode characters CREATE TABLE unicode_strings(s VARCHAR, ts TIMESTAMP TIME INDEX); Affected Rows: 0 INSERT INTO unicode_strings VALUES ('Hello 世界', 1000), ('🦆🦀🐧', 2000), ('café', 3000); Affected Rows: 3 SELECT s, SUBSTRING(s, 1, 5), SUBSTRING(s, 7, 2) FROM unicode_strings ORDER BY ts; +------------+---------------------------------------------+---------------------------------------------+ | s | substr(unicode_strings.s,Int64(1),Int64(5)) | substr(unicode_strings.s,Int64(7),Int64(2)) | +------------+---------------------------------------------+---------------------------------------------+ | Hello 世界 | Hello | 世界 | | 🦆🦀🐧 | 🦆🦀🐧 | | | café | café | | +------------+---------------------------------------------+---------------------------------------------+ DROP TABLE strings; Affected Rows: 0 DROP TABLE unicode_strings; Affected Rows: 0