json2 type

insert
flush
query-driven and data-driven concretize
select
compaction

Signed-off-by: luofucong <luofc@foxmail.com>
This commit is contained in:
luofucong
2026-03-03 17:34:05 +08:00
parent 70ad412092
commit cc836c66db
75 changed files with 2475 additions and 552 deletions

View File

@@ -1,82 +0,0 @@
CREATE TABLE t (ts TIMESTAMP TIME INDEX, j JSON(format = "structured") DEFAULT '{"foo": "bar"}');
Error: 1001(Unsupported), Unsupported default constraint for column: 'j', reason: json column cannot have a default value
CREATE TABLE t (ts TIMESTAMP TIME INDEX, j JSON(format = "structured"));
Affected Rows: 0
DESC TABLE t;
+--------+----------------------+-----+------+---------+---------------+
| Column | Type | Key | Null | Default | Semantic Type |
+--------+----------------------+-----+------+---------+---------------+
| ts | TimestampMillisecond | PRI | NO | | TIMESTAMP |
| j | Json<"<Null>"> | | YES | | FIELD |
+--------+----------------------+-----+------+---------+---------------+
INSERT INTO t VALUES
(1762128001000, '{"int": 1}'),
(1762128002000, '{"int": 2, "list": [0.1, 0.2, 0.3]}'),
(1762128003000, '{"int": 3, "list": [0.4, 0.5, 0.6], "nested": {"a": {"x": "hello"}, "b": {"y": -1}}}');
Affected Rows: 3
DESC TABLE t;
+--------+---------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
| Column | Type | Key | Null | Default | Semantic Type |
+--------+---------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
| ts | TimestampMillisecond | PRI | NO | | TIMESTAMP |
| j | Json<{"int":"<Number>","list":["<Number>"],"nested":{"a":{"x":"<String>"},"b":{"y":"<Number>"}}}> | | YES | | FIELD |
+--------+---------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
INSERT INTO t VALUES
(1762128004000, '{"int": 4, "bool": true, "nested": {"a": {"y": 1}}}'),
(1762128005000, '{"int": 5, "bool": false, "nested": {"b": {"x": "world"}}}');
Affected Rows: 2
DESC TABLE t;
+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
| Column | Type | Key | Null | Default | Semantic Type |
+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
| ts | TimestampMillisecond | PRI | NO | | TIMESTAMP |
| j | Json<{"bool":"<Bool>","int":"<Number>","list":["<Number>"],"nested":{"a":{"x":"<String>","y":"<Number>"},"b":{"x":"<String>","y":"<Number>"}}}> | | YES | | FIELD |
+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
INSERT INTO t VALUES (1762128006000, '{"int": 6, "list": [-6.0], "bool": true, "nested": {"a": {"x": "ax", "y": 66}, "b": {"y": -66, "x": "bx"}}}');
Affected Rows: 1
DESC TABLE t;
+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
| Column | Type | Key | Null | Default | Semantic Type |
+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
| ts | TimestampMillisecond | PRI | NO | | TIMESTAMP |
| j | Json<{"bool":"<Bool>","int":"<Number>","list":["<Number>"],"nested":{"a":{"x":"<String>","y":"<Number>"},"b":{"x":"<String>","y":"<Number>"}}}> | | YES | | FIELD |
+--------+-------------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
INSERT INTO t VALUES (1762128011000, '{}');
Error: 1004(InvalidArguments), Invalid InsertRequest, reason: empty json object is not supported, consider adding a dummy field
SELECT ts, j FROM t order by ts;
+---------------------+----------------------------------------------------------------------------------------+
| ts | j |
+---------------------+----------------------------------------------------------------------------------------+
| 2025-11-03T00:00:01 | {bool: , int: 1, list: , nested: } |
| 2025-11-03T00:00:02 | {bool: , int: 2, list: [0.1, 0.2, 0.3], nested: } |
| 2025-11-03T00:00:03 | {bool: , int: 3, list: [0.4, 0.5, 0.6], nested: {a: {x: hello, y: }, b: {x: , y: -1}}} |
| 2025-11-03T00:00:04 | {bool: true, int: 4, list: , nested: {a: {x: , y: 1}, b: }} |
| 2025-11-03T00:00:05 | {bool: false, int: 5, list: , nested: {a: , b: {x: world, y: }}} |
| 2025-11-03T00:00:06 | {bool: true, int: 6, list: [-6.0], nested: {a: {x: ax, y: 66}, b: {x: bx, y: -66}}} |
+---------------------+----------------------------------------------------------------------------------------+
DROP table t;
Affected Rows: 0

View File

@@ -1,28 +0,0 @@
CREATE TABLE t (ts TIMESTAMP TIME INDEX, j JSON(format = "structured") DEFAULT '{"foo": "bar"}');
CREATE TABLE t (ts TIMESTAMP TIME INDEX, j JSON(format = "structured"));
DESC TABLE t;
INSERT INTO t VALUES
(1762128001000, '{"int": 1}'),
(1762128002000, '{"int": 2, "list": [0.1, 0.2, 0.3]}'),
(1762128003000, '{"int": 3, "list": [0.4, 0.5, 0.6], "nested": {"a": {"x": "hello"}, "b": {"y": -1}}}');
DESC TABLE t;
INSERT INTO t VALUES
(1762128004000, '{"int": 4, "bool": true, "nested": {"a": {"y": 1}}}'),
(1762128005000, '{"int": 5, "bool": false, "nested": {"b": {"x": "world"}}}');
DESC TABLE t;
INSERT INTO t VALUES (1762128006000, '{"int": 6, "list": [-6.0], "bool": true, "nested": {"a": {"x": "ax", "y": 66}, "b": {"y": -66, "x": "bx"}}}');
DESC TABLE t;
INSERT INTO t VALUES (1762128011000, '{}');
SELECT ts, j FROM t order by ts;
DROP table t;

View File

@@ -0,0 +1,182 @@
create table json2_table (
ts timestamp time index,
j json2
) with (
'append_mode' = 'true',
'sst_format' = 'flat',
);
Affected Rows: 0
insert into json2_table (ts, j)
values (1, '{"a": {"b": 1}, "c": "s1", "d": [{"e": {"f": 0.1}}]}'),
(2, '{"a": {"b": -2}, "c": "s2", "d": [{"e": {"f": 0.2}}]}');
Affected Rows: 2
admin flush_table('json2_table');
+----------------------------------+
| ADMIN flush_table('json2_table') |
+----------------------------------+
| 0 |
+----------------------------------+
insert into json2_table (ts, j)
values (3, '{"a": {"b": 3}, "c": "s3"}');
Affected Rows: 1
insert into json2_table
values (4, '{"a": {"b": -4}, "d": [{"e": {"g": -0.4}}]}'),
(5, '{"a": {}, "c": "s5"}'),
(6, '{"c": "s6"}');
Affected Rows: 3
admin flush_table('json2_table');
+----------------------------------+
| ADMIN flush_table('json2_table') |
+----------------------------------+
| 0 |
+----------------------------------+
admin compact_table('json2_table', 'swcs', '86400');
+-----------------------------------------------------+
| ADMIN compact_table('json2_table', 'swcs', '86400') |
+-----------------------------------------------------+
| 0 |
+-----------------------------------------------------+
insert into json2_table
values (7, '{"a": {"b": "s7"}, "c": [1], "d": [{"e": {"g": -0.7}}]}'),
(8, '{"a": {"b": 8}, "c": "s8"}');
Affected Rows: 2
admin flush_table('json2_table');
+----------------------------------+
| ADMIN flush_table('json2_table') |
+----------------------------------+
| 0 |
+----------------------------------+
insert into json2_table
values (9, '{"a": {"x": true}, "c": "s9", "d": [{"e": {"g": -0.9}}]}'),
(10, '{"a": {"b": 10}, "y": false}');
Affected Rows: 2
select j.a.b from json2_table order by ts;
+-----------------------------------------------------+
| json2_get(json2_table.j,Utf8("a.b"),Utf8View(NULL)) |
+-----------------------------------------------------+
| 1 |
| -2 |
| 3 |
| -4 |
| |
| |
| s7 |
| 8 |
| |
| 10 |
+-----------------------------------------------------+
select j.a, j.a.x from json2_table order by ts;
+---------------------------------------------------+-----------------------------------------------------+
| json2_get(json2_table.j,Utf8("a"),Utf8View(NULL)) | json2_get(json2_table.j,Utf8("a.x"),Utf8View(NULL)) |
+---------------------------------------------------+-----------------------------------------------------+
| {b: 1, x: } | |
| {b: -2, x: } | |
| {b: 3, x: } | |
| {b: -4, x: } | |
| {b: , x: } | |
| | |
| {b: s7, x: } | |
| {b: 8, x: } | |
| {b: , x: true} | true |
| {b: 10, x: } | |
+---------------------------------------------------+-----------------------------------------------------+
select j.c, j.y from json2_table order by ts;
+---------------------------------------------------+---------------------------------------------------+
| json2_get(json2_table.j,Utf8("c"),Utf8View(NULL)) | json2_get(json2_table.j,Utf8("y"),Utf8View(NULL)) |
+---------------------------------------------------+---------------------------------------------------+
| s1 | |
| s2 | |
| s3 | |
| | |
| s5 | |
| s6 | |
| [1] | |
| s8 | |
| s9 | |
| | false |
+---------------------------------------------------+---------------------------------------------------+
select j from json2_table order by ts;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Struct() but found Struct("a": Struct("b": Utf8, "x": Boolean), "c": Utf8, "d": List(Struct("e": Struct("f": Float64, "g": Float64))), "y": Boolean) at column index 0
select * from json2_table order by ts;
Error: 3001(EngineExecuteQuery), Invalid argument error: column types must match schema types, expected Struct() but found Struct("a": Struct("b": Utf8, "x": Boolean), "c": Utf8, "d": List(Struct("e": Struct("f": Float64, "g": Float64))), "y": Boolean) at column index 1
select j.a.b + 1 from json2_table order by ts;
+-------------------------------------------------------------+
| json2_get(json2_table.j,Utf8("a.b"),Int64(NULL)) + Int64(1) |
+-------------------------------------------------------------+
| 2 |
| -1 |
| 4 |
| -3 |
| |
| |
| |
| 9 |
| |
| 11 |
+-------------------------------------------------------------+
select abs(j.a.b) from json2_table order by ts;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts.
Candidate functions:
abs(Numeric(1))
-- "j.c" is of type "String", "abs" is expected to be all "null"s.
select abs(j.c) from json2_table order by ts;
Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String No function matches the given name and argument types 'abs(Utf8View)'. You might need to add explicit type casts.
Candidate functions:
abs(Numeric(1))
select j.d from json2_table order by ts;
+---------------------------------------------------+
| json2_get(json2_table.j,Utf8("d"),Utf8View(NULL)) |
+---------------------------------------------------+
| [{e: {f: 0.1, g: }}] |
| [{e: {f: 0.2, g: }}] |
| |
| [{e: {f: , g: -0.4}}] |
| |
| |
| [{e: {g: -0.7}}] |
| |
| [{e: {g: -0.9}}] |
| |
+---------------------------------------------------+
drop table json2_table;
Affected Rows: 0

View File

@@ -0,0 +1,56 @@
create table json2_table (
ts timestamp time index,
j json2
) with (
'append_mode' = 'true',
'sst_format' = 'flat',
);
insert into json2_table (ts, j)
values (1, '{"a": {"b": 1}, "c": "s1", "d": [{"e": {"f": 0.1}}]}'),
(2, '{"a": {"b": -2}, "c": "s2", "d": [{"e": {"f": 0.2}}]}');
admin flush_table('json2_table');
insert into json2_table (ts, j)
values (3, '{"a": {"b": 3}, "c": "s3"}');
insert into json2_table
values (4, '{"a": {"b": -4}, "d": [{"e": {"g": -0.4}}]}'),
(5, '{"a": {}, "c": "s5"}'),
(6, '{"c": "s6"}');
admin flush_table('json2_table');
admin compact_table('json2_table', 'swcs', '86400');
insert into json2_table
values (7, '{"a": {"b": "s7"}, "c": [1], "d": [{"e": {"g": -0.7}}]}'),
(8, '{"a": {"b": 8}, "c": "s8"}');
admin flush_table('json2_table');
insert into json2_table
values (9, '{"a": {"x": true}, "c": "s9", "d": [{"e": {"g": -0.9}}]}'),
(10, '{"a": {"b": 10}, "y": false}');
select j.a.b from json2_table order by ts;
select j.a, j.a.x from json2_table order by ts;
select j.c, j.y from json2_table order by ts;
select j from json2_table order by ts;
select * from json2_table order by ts;
select j.a.b + 1 from json2_table order by ts;
select abs(j.a.b) from json2_table order by ts;
-- "j.c" is of type "String", "abs" is expected to be all "null"s.
select abs(j.c) from json2_table order by ts;
select j.d from json2_table order by ts;
drop table json2_table;

View File

@@ -0,0 +1,176 @@
CREATE TABLE bluesky (
`data` JSON2,
time_us TimestampMicrosecond TIME INDEX
) WITH ('append_mode' = 'true', 'sst_format' = 'flat');
Affected Rows: 0
INSERT INTO bluesky (time_us, data)
VALUES (1732206349000167,
'{"did":"did:plc:yj3sjq3blzpynh27cumnp5ks","time_us":1732206349000167,"kind":"commit","commit":{"rev":"3lbhtytnn2k2f","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtyteurk2y","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.095Z","langs":["en"],"reply":{"parent":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"},"root":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"}},"text":"aaaaah.  LIght shines in a corner of WTF...."},"cid":"bafyreidblutgvj75o4q4akzyyejedjj6l3it6hgqwee6jpwv2wqph5fsgm"}}');
Affected Rows: 1
INSERT INTO bluesky (time_us, data)
VALUES (1732206349000644,
'{"did":"did:plc:3i4xf2v4wcnyktgv6satke64","time_us":1732206349000644,"kind":"commit","commit":{"rev":"3lbhuvzds6d2a","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdked2a","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.221Z","subject":{"cid":"bafyreidjvrcmckkm765mct5fph36x7kupkfo35rjklbf2k76xkzwyiauge","uri":"at://did:plc:azrv4rcbws6kmcga4fsbphg2/app.bsky.feed.post/3lbgjdpbiec2l"}},"cid":"bafyreia5l5vrkh5oj4cjyhcqby2dprhyvcyofo2q5562tijlae2pzih23m"}}');
Affected Rows: 1
ADMIN flush_table('bluesky');
+------------------------------+
| ADMIN flush_table('bluesky') |
+------------------------------+
| 0 |
+------------------------------+
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001108,
'{"did":"did:plc:gccfnqqizz4urhchsaie6jft","time_us":1732206349001108,"kind":"commit","commit":{"rev":"3lbhuvze3gi2u","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhuvzdtmi2u","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:27:40.923Z","subject":"did:plc:r7cdh4sgzqbfdc6wcdxxti7c"},"cid":"bafyreiew2p6cgirfaj45qoenm4fgumib7xoloclrap3jgkz5es7g7kby3i"}}');
Affected Rows: 1
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001372,
'{"did":"did:plc:msxqf3twq7abtdw7dbfskphk","time_us":1732206349001372,"kind":"commit","commit":{"rev":"3lbhueija5p22","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhueiizcx22","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:15:58.232Z","subject":{"cid":"bafyreiavpshyqzrlo5m7fqodjhs6jevweqnif4phasiwimv4a7mnsqi2fe","uri":"at://did:plc:fusulxqc52zbrc75fi6xrcof/app.bsky.feed.post/3lbhskq5zn22f"}},"cid":"bafyreidjix4dauj2afjlbzmhj3a7gwftcevvmmy6edww6vrjdbst26rkby"}}');
Affected Rows: 1
ADMIN flush_table('bluesky');
+------------------------------+
| ADMIN flush_table('bluesky') |
+------------------------------+
| 0 |
+------------------------------+
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001905,
'{"did":"did:plc:l5o3qjrmfztir54cpwlv2eme","time_us":1732206349001905,"kind":"commit","commit":{"rev":"3lbhtytohxc2o","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtytjqzk2q","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.254Z","langs":["en"],"reply":{"parent":{"cid":"bafyreih35fe2jj3gchmgk4amold4l6sfxd2sby5wrg3jrws5fkdypxrbg4","uri":"at://did:plc:6wx2gg5yqgvmlu35r6y3bk6d/app.bsky.feed.post/3lbhtj2eb4s2o"},"root":{"cid":"bafyreifipyt3vctd4ptuoicvio7rbr5xvjv4afwuggnd2prnmn55mu6luu","uri":"at://did:plc:474ldquxwzrlcvjhhbbk2wte/app.bsky.feed.post/3lbhdzrynik27"}},"text":"okay i take mine back because I hadnt heard this one yet^^"},"cid":"bafyreigzdsdne3z2xxcakgisieyj7y47hj6eg7lj6v4q25ah5q2qotu5ku"}}');
Affected Rows: 1
ADMIN compact_table('bluesky', 'swcs', '86400');
+-------------------------------------------------+
| ADMIN compact_table('bluesky', 'swcs', '86400') |
+-------------------------------------------------+
| 0 |
+-------------------------------------------------+
SELECT count(*) FROM bluesky;
+----------+
| count(*) |
+----------+
| 5 |
+----------+
-- Query 1:
SELECT data.commit.collection AS event,
count() AS count
FROM bluesky
GROUP BY event
ORDER BY count DESC, event ASC;
+-----------------------+-------+
| event | count |
+-----------------------+-------+
| app.bsky.feed.like | 2 |
| app.bsky.feed.post | 2 |
| app.bsky.graph.follow | 1 |
+-----------------------+-------+
-- Query 2:
SELECT data.commit.collection AS event,
count() AS count,
count(DISTINCT data.did) AS users
FROM bluesky
WHERE data.kind = 'commit' AND data.commit.operation = 'create'
GROUP BY event
ORDER BY count DESC, event ASC;
+-----------------------+-------+-------+
| event | count | users |
+-----------------------+-------+-------+
| app.bsky.feed.like | 2 | 2 |
| app.bsky.feed.post | 2 | 2 |
| app.bsky.graph.follow | 1 | 1 |
+-----------------------+-------+-------+
-- Query 3:
SELECT data.commit.collection AS event,
date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day,
count() AS count
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection in ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like')
GROUP BY event, hour_of_day
ORDER BY hour_of_day, event;
+--------------------+-------------+-------+
| event | hour_of_day | count |
+--------------------+-------------+-------+
| app.bsky.feed.like | 16 | 2 |
| app.bsky.feed.post | 16 | 2 |
+--------------------+-------------+-------+
-- Query 4:
SELECT data.did::String as user_id,
min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) AS first_post_ts
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection = 'app.bsky.feed.post'
GROUP BY user_id
ORDER BY first_post_ts ASC, user_id DESC
LIMIT 3;
+----------------------------------+----------------------------+
| user_id | first_post_ts |
+----------------------------------+----------------------------+
| did:plc:yj3sjq3blzpynh27cumnp5ks | 2024-11-21T16:25:49.000167 |
| did:plc:l5o3qjrmfztir54cpwlv2eme | 2024-11-21T16:25:49.001905 |
+----------------------------------+----------------------------+
-- Query 5:
SELECT data.did::String as user_id,
date_part(
'epoch',
max(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) -
min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64')))
) AS activity_span
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection = 'app.bsky.feed.post'
GROUP BY user_id
ORDER BY activity_span DESC, user_id DESC
LIMIT 3;
+----------------------------------+---------------+
| user_id | activity_span |
+----------------------------------+---------------+
| did:plc:yj3sjq3blzpynh27cumnp5ks | 0.0 |
| did:plc:l5o3qjrmfztir54cpwlv2eme | 0.0 |
+----------------------------------+---------------+
-- SQLNESS REPLACE (peers.*) REDACTED
EXPLAIN
SELECT date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day
FROM bluesky;
+---------------+--------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | MergeScan [is_placeholder=false, remote_input=[ |
| | Projection: date_part(Utf8("hour"), to_timestamp_micros(json2_get(bluesky.data, Utf8("time_us"), Int64(NULL)))) AS hour_of_day |
| | TableScan: bluesky |
| | ]] |
| physical_plan | CooperativeExec |
| | MergeScanExec: REDACTED
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------+

View File

@@ -0,0 +1,90 @@
CREATE TABLE bluesky (
`data` JSON2,
time_us TimestampMicrosecond TIME INDEX
) WITH ('append_mode' = 'true', 'sst_format' = 'flat');
INSERT INTO bluesky (time_us, data)
VALUES (1732206349000167,
'{"did":"did:plc:yj3sjq3blzpynh27cumnp5ks","time_us":1732206349000167,"kind":"commit","commit":{"rev":"3lbhtytnn2k2f","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtyteurk2y","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.095Z","langs":["en"],"reply":{"parent":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"},"root":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"}},"text":"aaaaah.  LIght shines in a corner of WTF...."},"cid":"bafyreidblutgvj75o4q4akzyyejedjj6l3it6hgqwee6jpwv2wqph5fsgm"}}');
INSERT INTO bluesky (time_us, data)
VALUES (1732206349000644,
'{"did":"did:plc:3i4xf2v4wcnyktgv6satke64","time_us":1732206349000644,"kind":"commit","commit":{"rev":"3lbhuvzds6d2a","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdked2a","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.221Z","subject":{"cid":"bafyreidjvrcmckkm765mct5fph36x7kupkfo35rjklbf2k76xkzwyiauge","uri":"at://did:plc:azrv4rcbws6kmcga4fsbphg2/app.bsky.feed.post/3lbgjdpbiec2l"}},"cid":"bafyreia5l5vrkh5oj4cjyhcqby2dprhyvcyofo2q5562tijlae2pzih23m"}}');
ADMIN flush_table('bluesky');
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001108,
'{"did":"did:plc:gccfnqqizz4urhchsaie6jft","time_us":1732206349001108,"kind":"commit","commit":{"rev":"3lbhuvze3gi2u","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhuvzdtmi2u","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:27:40.923Z","subject":"did:plc:r7cdh4sgzqbfdc6wcdxxti7c"},"cid":"bafyreiew2p6cgirfaj45qoenm4fgumib7xoloclrap3jgkz5es7g7kby3i"}}');
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001372,
'{"did":"did:plc:msxqf3twq7abtdw7dbfskphk","time_us":1732206349001372,"kind":"commit","commit":{"rev":"3lbhueija5p22","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhueiizcx22","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:15:58.232Z","subject":{"cid":"bafyreiavpshyqzrlo5m7fqodjhs6jevweqnif4phasiwimv4a7mnsqi2fe","uri":"at://did:plc:fusulxqc52zbrc75fi6xrcof/app.bsky.feed.post/3lbhskq5zn22f"}},"cid":"bafyreidjix4dauj2afjlbzmhj3a7gwftcevvmmy6edww6vrjdbst26rkby"}}');
ADMIN flush_table('bluesky');
INSERT INTO bluesky (time_us, data)
VALUES (1732206349001905,
'{"did":"did:plc:l5o3qjrmfztir54cpwlv2eme","time_us":1732206349001905,"kind":"commit","commit":{"rev":"3lbhtytohxc2o","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtytjqzk2q","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.254Z","langs":["en"],"reply":{"parent":{"cid":"bafyreih35fe2jj3gchmgk4amold4l6sfxd2sby5wrg3jrws5fkdypxrbg4","uri":"at://did:plc:6wx2gg5yqgvmlu35r6y3bk6d/app.bsky.feed.post/3lbhtj2eb4s2o"},"root":{"cid":"bafyreifipyt3vctd4ptuoicvio7rbr5xvjv4afwuggnd2prnmn55mu6luu","uri":"at://did:plc:474ldquxwzrlcvjhhbbk2wte/app.bsky.feed.post/3lbhdzrynik27"}},"text":"okay i take mine back because I hadnt heard this one yet^^"},"cid":"bafyreigzdsdne3z2xxcakgisieyj7y47hj6eg7lj6v4q25ah5q2qotu5ku"}}');
ADMIN compact_table('bluesky', 'swcs', '86400');
SELECT count(*) FROM bluesky;
-- Query 1:
SELECT data.commit.collection AS event,
count() AS count
FROM bluesky
GROUP BY event
ORDER BY count DESC, event ASC;
-- Query 2:
SELECT data.commit.collection AS event,
count() AS count,
count(DISTINCT data.did) AS users
FROM bluesky
WHERE data.kind = 'commit' AND data.commit.operation = 'create'
GROUP BY event
ORDER BY count DESC, event ASC;
-- Query 3:
SELECT data.commit.collection AS event,
date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day,
count() AS count
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection in ('app.bsky.feed.post', 'app.bsky.feed.repost', 'app.bsky.feed.like')
GROUP BY event, hour_of_day
ORDER BY hour_of_day, event;
-- Query 4:
SELECT data.did::String as user_id,
min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) AS first_post_ts
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection = 'app.bsky.feed.post'
GROUP BY user_id
ORDER BY first_post_ts ASC, user_id DESC
LIMIT 3;
-- Query 5:
SELECT data.did::String as user_id,
date_part(
'epoch',
max(to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) -
min(to_timestamp_micros(arrow_cast(data.time_us, 'Int64')))
) AS activity_span
FROM bluesky
WHERE data.kind = 'commit'
AND data.commit.operation = 'create'
AND data.commit.collection = 'app.bsky.feed.post'
GROUP BY user_id
ORDER BY activity_span DESC, user_id DESC
LIMIT 3;
-- SQLNESS REPLACE (peers.*) REDACTED
EXPLAIN
SELECT date_part('hour', to_timestamp_micros(arrow_cast(data.time_us, 'Int64'))) as hour_of_day
FROM bluesky;