(?) data-driven concretize
(?) select
(?) compaction

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
luofucong
2026-05-11 20:04:43 +08:00
parent abf4623440
commit 8bbb6b79a9
28 changed files with 813 additions and 136 deletions

View File

@@ -42,6 +42,14 @@ admin flush_table('json2_table');
| 0 |
+----------------------------------+
admin compact_table('json2_table', 'swcs', '86400');
+-----------------------------------------------------+
| ADMIN compact_table('json2_table', 'swcs', '86400') |
+-----------------------------------------------------+
| 0 |
+-----------------------------------------------------+
insert into json2_table
values (7, '{"a": {"b": "s7"}, "c": [1], "d": [{"e": {"g": -0.7}}]}'),
(8, '{"a": {"b": 8}, "c": "s8"}');
@@ -109,6 +117,23 @@ select j.a.b from json2_table order by ts;
| 10 |
+-------------------------------------+
select j.a, j.a.x from json2_table order by ts;
+--------------------------------------------------+----------------------------------------------------+
| json_get(json2_table.j,Utf8("a"),Utf8View(NULL)) | json_get(json2_table.j,Utf8("a.x"),Utf8View(NULL)) |
+--------------------------------------------------+----------------------------------------------------+
| {"b":1,"x":null} | |
| {"b":-2,"x":null} | |
| {"b":3,"x":null} | |
| {"b":-4,"x":null} | |
| {"b":null,"x":null} | |
| | |
| {"b":"s7","x":null} | |
| {"b":8,"x":null} | |
| {"b":null,"x":true} | true |
| {"b":10,"x":null} | |
+--------------------------------------------------+----------------------------------------------------+
select j.c, j.y from json2_table order by ts;
+-----------------------------------+-----------------------------------+
@@ -126,6 +151,44 @@ select j.c, j.y from json2_table order by ts;
| | false |
+-----------------------------------+-----------------------------------+
select j from json2_table order by ts;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Struct() but found Struct("a": Struct("b": Binary, "x": Boolean), "c": Binary, "d": List(Struct("e": Struct("f": Float64, "g": Float64))), "y": Boolean) at column index 0
select * from json2_table order by ts;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Struct() but found Struct("a": Struct("b": Binary, "x": Boolean), "c": Binary, "d": List(Struct("e": Struct("f": Float64, "g": Float64))), "y": Boolean) at column index 1
select j.a.b + 1 from json2_table order by ts;
+------------------------------------------------------------+
| json_get(json2_table.j,Utf8("a.b"),Int64(NULL)) + Int64(1) |
+------------------------------------------------------------+
| 2 |
| -1 |
| 4 |
| -3 |
| |
| |
| |
| 9 |
| |
| 11 |
+------------------------------------------------------------+
select abs(j.a.b) from json2_table order by ts;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts.
Candidate functions:
abs(Numeric(1))
-- "j.c" is of type "String", "abs" is expected to be all "null"s.
select abs(j.c) from json2_table order by ts;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts.
Candidate functions:
abs(Numeric(1))
select j.d from json2_table order by ts;
+-----------------------------------+

View File

@@ -22,6 +22,8 @@ values (4, '{"a": {"b": -4}, "d": [{"e": {"g": -0.4}}]}'),
admin flush_table('json2_table');
admin compact_table('json2_table', 'swcs', '86400');
insert into json2_table
values (7, '{"a": {"b": "s7"}, "c": [1], "d": [{"e": {"g": -0.7}}]}'),
(8, '{"a": {"b": 8}, "c": "s8"}');
@@ -40,8 +42,21 @@ explain select j.a.x::bool from json2_table;
select j.a.b from json2_table order by ts;
select j.a, j.a.x from json2_table order by ts;
select j.c, j.y from json2_table order by ts;
select j from json2_table order by ts;
select * from json2_table order by ts;
select j.a.b + 1 from json2_table order by ts;
select abs(j.a.b) from json2_table order by ts;
-- "j.c" is of type "String", "abs" is expected to be all "null"s.
select abs(j.c) from json2_table order by ts;
select j.d from json2_table order by ts;
drop table json2_table;

View File

@@ -0,0 +1,180 @@
CREATE TABLE bluesky (
`data` JSON2,
time_us TimestampMicrosecond TIME INDEX
) WITH ('append_mode' = 'true', 'sst_format' = 'flat');
Affected Rows: 0
INSERT INTO bluesky (time_us, data)
VALUES (1732206349000167,
'{"did":"did:plc:yj3sjq3blzpynh27cumnp5ks","time_us":1732206349000167,"kind":"commit","commit":{"rev":"3lbhtytnn2k2f","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtyteurk2y","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.095Z","langs":["en"],"reply":{"parent":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"},"root":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"}},"text":"aaaaah.  LIght shines in a corner of WTF...."},"cid":"bafyreidblutgvj75o4q4akzyyejedjj6l3it6hgqwee6jpwv2wqph5fsgm"}}');
Affected Rows: 1
INSERT INTO bluesky (time_us, data)
VALUES (1732206349000644,
'{"did":"did:plc:3i4xf2v4wcnyktgv6satke64","time_us":1732206349000644,"kind":"commit","commit":{"rev":"3lbhuvzds6d2a","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdked2a","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.221Z","subject":{"cid":"bafyreidjvrcmckkm765mct5fph36x7kupkfo35rjklbf2k76xkzwyiauge","uri":"at://did:plc:azrv4rcbws6kmcga4fsbphg2/app.bsky.feed.post/3lbgjdpbiec2l"}},"cid":"bafyreia5l5vrkh5oj4cjyhcqby2dprhyvcyofo2q5562tijlae2pzih23m"}}');
Affected Rows: 1
ADMIN flush_table('bluesky');
+------------------------------+
| ADMIN flush_table('bluesky') |
+------------------------------+
| 0 |
+------------------------------+
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001108,
'{"did":"did:plc:gccfnqqizz4urhchsaie6jft","time_us":1732206349001108,"kind":"commit","commit":{"rev":"3lbhuvze3gi2u","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhuvzdtmi2u","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:27:40.923Z","subject":"did:plc:r7cdh4sgzqbfdc6wcdxxti7c"},"cid":"bafyreiew2p6cgirfaj45qoenm4fgumib7xoloclrap3jgkz5es7g7kby3i"}}');
Affected Rows: 1
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001372,
'{"did":"did:plc:msxqf3twq7abtdw7dbfskphk","time_us":1732206349001372,"kind":"commit","commit":{"rev":"3lbhueija5p22","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhueiizcx22","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:15:58.232Z","subject":{"cid":"bafyreiavpshyqzrlo5m7fqodjhs6jevweqnif4phasiwimv4a7mnsqi2fe","uri":"at://did:plc:fusulxqc52zbrc75fi6xrcof/app.bsky.feed.post/3lbhskq5zn22f"}},"cid":"bafyreidjix4dauj2afjlbzmhj3a7gwftcevvmmy6edww6vrjdbst26rkby"}}');
Affected Rows: 1
ADMIN flush_table('bluesky');
+------------------------------+
| ADMIN flush_table('bluesky') |
+------------------------------+
| 0 |
+------------------------------+
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001905,
'{"did":"did:plc:l5o3qjrmfztir54cpwlv2eme","time_us":1732206349001905,"kind":"commit","commit":{"rev":"3lbhtytohxc2o","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtytjqzk2q","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.254Z","langs":["en"],"reply":{"parent":{"cid":"bafyreih35fe2jj3gchmgk4amold4l6sfxd2sby5wrg3jrws5fkdypxrbg4","uri":"at://did:plc:6wx2gg5yqgvmlu35r6y3bk6d/app.bsky.feed.post/3lbhtj2eb4s2o"},"root":{"cid":"bafyreifipyt3vctd4ptuoicvio7rbr5xvjv4afwuggnd2prnmn55mu6luu","uri":"at://did:plc:474ldquxwzrlcvjhhbbk2wte/app.bsky.feed.post/3lbhdzrynik27"}},"text":"okay i take mine back because I hadnt heard this one yet^^"},"cid":"bafyreigzdsdne3z2xxcakgisieyj7y47hj6eg7lj6v4q25ah5q2qotu5ku"}}');
Affected Rows: 1
ADMIN compact_table('bluesky', 'swcs', '86400');
+-------------------------------------------------+
| ADMIN compact_table('bluesky', 'swcs', '86400') |
+-------------------------------------------------+
| 0 |
+-------------------------------------------------+
SELECT count(*) FROM bluesky;
+----------+
| count(*) |
+----------+
| 5 |
+----------+
-- Query 1:
SELECT data.commit.collection AS event,
count() AS count
FROM bluesky
GROUP BY event
ORDER BY count DESC, event ASC;
+-----------------------+-------+
| event | count |
+-----------------------+-------+
| app.bsky.feed.like | 2 |
| app.bsky.feed.post | 2 |
| app.bsky.graph.follow | 1 |
+-----------------------+-------+
-- Query 2:
SELECT data.commit.collection AS event,
count() AS count,
count(DISTINCT data.did) AS users
FROM bluesky
WHERE data.kind = 'commit' AND data.commit.operation = 'create'
GROUP BY event
ORDER BY count DESC, event ASC;
+-----------------------+-------+-------+
| event | count | users |
+-----------------------+-------+-------+
| app.bsky.feed.like | 2 | 2 |
| app.bsky.feed.post | 2 | 2 |
| app.bsky.graph.follow | 1 | 1 |
+-----------------------+-------+-------+
-- Query 3:
SELECT data.commit.collection AS event,
date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day,
count() AS count
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection in ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like')
GROUP BY event, hour_of_day
ORDER BY hour_of_day, event;
+--------------------+-------------+-------+
| event | hour_of_day | count |
+--------------------+-------------+-------+
| app.bsky.feed.like | 16 | 2 |
| app.bsky.feed.post | 16 | 2 |
+--------------------+-------------+-------+
-- Query 4:
SELECT data.did::String as user_id,
min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) AS first_post_ts
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection = 'app.bsky.feed.post'
GROUP BY user_id
ORDER BY first_post_ts ASC, user_id DESC
LIMIT 3;
+----------------------------------+----------------------------+
| user_id | first_post_ts |
+----------------------------------+----------------------------+
| did:plc:yj3sjq3blzpynh27cumnp5ks | 2024-11-21T16:25:49.000167 |
| did:plc:l5o3qjrmfztir54cpwlv2eme | 2024-11-21T16:25:49.001905 |
+----------------------------------+----------------------------+
-- Query 5:
SELECT data.did::String as user_id,
date_part(
'epoch',
max(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) -
min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64')))
) AS activity_span
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection = 'app.bsky.feed.post'
GROUP BY user_id
ORDER BY activity_span DESC, user_id DESC
LIMIT 3;
+----------------------------------+---------------+
| user_id | activity_span |
+----------------------------------+---------------+
| did:plc:yj3sjq3blzpynh27cumnp5ks | 0.0 |
| did:plc:l5o3qjrmfztir54cpwlv2eme | 0.0 |
+----------------------------------+---------------+
-- SQLNESS REPLACE (peers.*) REDACTED
EXPLAIN
SELECT date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day
FROM bluesky;
+---------------+--------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | MergeScan [is_placeholder=false, remote_input=[ |
| | Projection: date_part(Utf8("hour"), to_timestamp_micros(json2_get(bluesky.data, Utf8("time_us"), Int64(NULL)))) AS hour_of_day |
| | TableScan: bluesky |
| | ]] |
| physical_plan | CooperativeExec |
| | MergeScanExec: REDACTED
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------+
DROP TABLE bluesky;
Affected Rows: 0

View File

@@ -0,0 +1,92 @@
CREATE TABLE bluesky (
`data` JSON2,
time_us TimestampMicrosecond TIME INDEX
) WITH ('append_mode' = 'true', 'sst_format' = 'flat');
INSERT INTO bluesky (time_us, data)
VALUES (1732206349000167,
'{"did":"did:plc:yj3sjq3blzpynh27cumnp5ks","time_us":1732206349000167,"kind":"commit","commit":{"rev":"3lbhtytnn2k2f","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtyteurk2y","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.095Z","langs":["en"],"reply":{"parent":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"},"root":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"}},"text":"aaaaah.  LIght shines in a corner of WTF...."},"cid":"bafyreidblutgvj75o4q4akzyyejedjj6l3it6hgqwee6jpwv2wqph5fsgm"}}');
INSERT INTO bluesky (time_us, data)
VALUES (1732206349000644,
'{"did":"did:plc:3i4xf2v4wcnyktgv6satke64","time_us":1732206349000644,"kind":"commit","commit":{"rev":"3lbhuvzds6d2a","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdked2a","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.221Z","subject":{"cid":"bafyreidjvrcmckkm765mct5fph36x7kupkfo35rjklbf2k76xkzwyiauge","uri":"at://did:plc:azrv4rcbws6kmcga4fsbphg2/app.bsky.feed.post/3lbgjdpbiec2l"}},"cid":"bafyreia5l5vrkh5oj4cjyhcqby2dprhyvcyofo2q5562tijlae2pzih23m"}}');
ADMIN flush_table('bluesky');
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001108,
'{"did":"did:plc:gccfnqqizz4urhchsaie6jft","time_us":1732206349001108,"kind":"commit","commit":{"rev":"3lbhuvze3gi2u","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhuvzdtmi2u","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:27:40.923Z","subject":"did:plc:r7cdh4sgzqbfdc6wcdxxti7c"},"cid":"bafyreiew2p6cgirfaj45qoenm4fgumib7xoloclrap3jgkz5es7g7kby3i"}}');
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001372,
'{"did":"did:plc:msxqf3twq7abtdw7dbfskphk","time_us":1732206349001372,"kind":"commit","commit":{"rev":"3lbhueija5p22","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhueiizcx22","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:15:58.232Z","subject":{"cid":"bafyreiavpshyqzrlo5m7fqodjhs6jevweqnif4phasiwimv4a7mnsqi2fe","uri":"at://did:plc:fusulxqc52zbrc75fi6xrcof/app.bsky.feed.post/3lbhskq5zn22f"}},"cid":"bafyreidjix4dauj2afjlbzmhj3a7gwftcevvmmy6edww6vrjdbst26rkby"}}');
ADMIN flush_table('bluesky');
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001905,
'{"did":"did:plc:l5o3qjrmfztir54cpwlv2eme","time_us":1732206349001905,"kind":"commit","commit":{"rev":"3lbhtytohxc2o","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtytjqzk2q","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.254Z","langs":["en"],"reply":{"parent":{"cid":"bafyreih35fe2jj3gchmgk4amold4l6sfxd2sby5wrg3jrws5fkdypxrbg4","uri":"at://did:plc:6wx2gg5yqgvmlu35r6y3bk6d/app.bsky.feed.post/3lbhtj2eb4s2o"},"root":{"cid":"bafyreifipyt3vctd4ptuoicvio7rbr5xvjv4afwuggnd2prnmn55mu6luu","uri":"at://did:plc:474ldquxwzrlcvjhhbbk2wte/app.bsky.feed.post/3lbhdzrynik27"}},"text":"okay i take mine back because I hadnt heard this one yet^^"},"cid":"bafyreigzdsdne3z2xxcakgisieyj7y47hj6eg7lj6v4q25ah5q2qotu5ku"}}');
ADMIN compact_table('bluesky', 'swcs', '86400');
SELECT count(*) FROM bluesky;
-- Query 1:
SELECT data.commit.collection AS event,
count() AS count
FROM bluesky
GROUP BY event
ORDER BY count DESC, event ASC;
-- Query 2:
SELECT data.commit.collection AS event,
count() AS count,
count(DISTINCT data.did) AS users
FROM bluesky
WHERE data.kind = 'commit' AND data.commit.operation = 'create'
GROUP BY event
ORDER BY count DESC, event ASC;
-- Query 3:
SELECT data.commit.collection AS event,
date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day,
count() AS count
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection in ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like')
GROUP BY event, hour_of_day
ORDER BY hour_of_day, event;
-- Query 4:
SELECT data.did::String as user_id,
min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) AS first_post_ts
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection = 'app.bsky.feed.post'
GROUP BY user_id
ORDER BY first_post_ts ASC, user_id DESC
LIMIT 3;
-- Query 5:
SELECT data.did::String as user_id,
date_part(
'epoch',
max(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) -
min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64')))
) AS activity_span
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection = 'app.bsky.feed.post'
GROUP BY user_id
ORDER BY activity_span DESC, user_id DESC
LIMIT 3;
-- SQLNESS REPLACE (peers.*) REDACTED
EXPLAIN
SELECT date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day
FROM bluesky;
DROP TABLE bluesky;