feat: skip caching uncompressed pages if they are large (#4705)

* feat: cache each uncompressed page

* chore: remove unused function

* chore: log

* chore: log

* chore: row group pages cache kv

* feat: also support row group level cache

* chore: fix range count

* feat: don't cache compressed page for row group cache

* feat: use function to get part

* chore: log whether scan is from compaction

* chore: avoid get column

* feat: add timer metrics

* chore: Revert "feat: add timer metrics"

This reverts commit 4618f57fa2ba13b1e1a8dec83afd01c00ae4c867.

* feat: don't cache individual uncompressed page

* feat: append in row group level under append mode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: fetch pages cost

* perf: yield

* Update src/mito2/src/sst/parquet/row_group.rs

* refactor: cache key

* feat: print file num and row groups num in explain

* test: update sqlness test

* chore: Update src/mito2/src/sst/parquet/page_reader.rs

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Yingwen
2024-09-10 19:52:16 +08:00
committed by GitHub
parent 04de3ed929
commit 3e17c09e45
15 changed files with 251 additions and 131 deletions

View File

@@ -36,7 +36,7 @@ explain analyze SELECT count(*) FROM system_metrics;
|_|_|_CoalescePartitionsExec REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[COUNT(system_REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
|_|_| Total rows: 1_|
+-+-+-+

View File

@@ -34,7 +34,7 @@ select sum(val) from t group by host;
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[SUM(t.val)] REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
| 1_| 1_|_ProjectionExec: expr=[SUM(t.val)@1 as SUM(t.val)] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[host@0 as host], aggr=[SUM(t.val)] REDACTED
@@ -43,7 +43,7 @@ select sum(val) from t group by host;
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[SUM(t.val)] REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
|_|_| Total rows: 0_|
+-+-+-+
@@ -66,9 +66,9 @@ select sum(val) from t;
|_|_|_ProjectionExec: expr=[val@1 as val] REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED
| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
|_|_| Total rows: 1_|
+-+-+-+
@@ -95,9 +95,9 @@ select sum(val) from t group by idc;
|_|_|_ProjectionExec: expr=[val@1 as val, idc@3 as idc] REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file ranges) REDACTED
| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
|_|_| Total rows: 0_|
+-+-+-+

View File

@@ -77,7 +77,7 @@ EXPLAIN ANALYZE SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s';
|_|_|_CoalescePartitionsExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
|_|_| Total rows: 10_|
+-+-+-+

View File

@@ -89,7 +89,7 @@ explain analyze select * from demo where idc='idc1';
+-+-+-+
| 0_| 0_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
|_|_| Total rows: 2_|
+-+-+-+

View File

@@ -32,7 +32,7 @@ TQL ANALYZE (0, 10, '5s') test;
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_FilterExec: j@1 >= -300000 AND j@1 <= 310000 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
|_|_| Total rows: 4_|
+-+-+-+
@@ -63,7 +63,7 @@ TQL ANALYZE (0, 10, '1s', '2s') test;
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_FilterExec: j@1 >= -2000 AND j@1 <= 12000 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
|_|_| Total rows: 4_|
+-+-+-+
@@ -93,7 +93,7 @@ TQL ANALYZE ('1970-01-01T00:00:00'::timestamp, '1970-01-01T00:00:00'::timestamp
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_FilterExec: j@1 >= -300000 AND j@1 <= 310000 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
|_|_| Total rows: 4_|
+-+-+-+
@@ -125,7 +125,7 @@ TQL ANALYZE VERBOSE (0, 10, '5s') test;
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_FilterExec: j@1 >= -300000 AND j@1 <= 310000 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges) REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED
|_|_|_|
|_|_| Total rows: 4_|
+-+-+-+

View File

@@ -48,7 +48,7 @@ explain analyze
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[host@1 as host], aggr=[last_value(t.host) ORDER BY [t.ts ASC NULLS LAST], last_value(t.not_pk) ORDER BY [t.ts ASC NULLS LAST], last_value(t.val) ORDER BY [t.ts ASC NULLS LAST]] REDACTED
|_|_|_RepartitionExec: REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file ranges), selector=LastRow REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), selector=LastRow REDACTED
|_|_|_|
|_|_| Total rows: 4_|
+-+-+-+