More precisely control size of inmem layer (#1927)

* More precisely control size of inmem layer

* Force recompaction of L0 layers if them contains large non-wallogged BLOBs to avoid too large layers

* Add modified version of test_hot_update test (test_dup_key.py) which should generate large layers without large number of tables

* Change test name in test_dup_key

* Add Layer::get_max_key_range function

* Add layer::key_iter method and implement new approach of splitting layers during compaction based on total size of all key values

* Add test_large_schema test for checking layer file size after compaction

* Make clippy happy

* Restore checking LSN distance threshold for checkpoint in-memory layer

* Optimize stoage keys iterator

* Update pageserver/src/layered_repository.rs

Co-authored-by: Heikki Linnakangas <heikki@zenith.tech>

* Update pageserver/src/layered_repository.rs

Co-authored-by: Heikki Linnakangas <heikki@zenith.tech>

* Update pageserver/src/layered_repository.rs

Co-authored-by: Heikki Linnakangas <heikki@zenith.tech>

* Update pageserver/src/layered_repository.rs

Co-authored-by: Heikki Linnakangas <heikki@zenith.tech>

* Update pageserver/src/layered_repository.rs

Co-authored-by: Heikki Linnakangas <heikki@zenith.tech>

* Fix code style

* Reduce number of tables in test_large_schema to make it fit in timeout with debug build

* Fix style of test_large_schema.py

* Fix handlng of duplicates layers

Co-authored-by: Heikki Linnakangas <heikki@zenith.tech>
This commit is contained in:
Konstantin Knizhnik
2022-07-21 07:45:11 +03:00
committed by GitHub
parent b445cf7665
commit 572ae74388
7 changed files with 372 additions and 34 deletions

View File

@@ -0,0 +1,48 @@
import pytest
from contextlib import closing
from fixtures.compare_fixtures import PgCompare
from pytest_lazyfixture import lazy_fixture # type: ignore
@pytest.mark.parametrize(
"env",
[
# The test is too slow to run in CI, but fast enough to run with remote tests
pytest.param(lazy_fixture("neon_compare"), id="neon", marks=pytest.mark.slow),
pytest.param(lazy_fixture("vanilla_compare"), id="vanilla", marks=pytest.mark.slow),
pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster),
])
def test_dup_key(env: PgCompare):
# Update the same page many times, then measure read performance
with closing(env.pg.connect()) as conn:
with conn.cursor() as cur:
cur.execute("SET synchronous_commit=off")
cur.execute("SET statement_timeout=0")
# Write many updates to the same row
with env.record_duration('write'):
cur.execute("create table t (i integer, filler text);")
cur.execute('insert into t values (0);')
cur.execute("""
do $$
begin
for ivar in 1..5000000 loop
update t set i = ivar, filler = repeat('a', 50);
update t set i = ivar, filler = repeat('b', 50);
update t set i = ivar, filler = repeat('c', 50);
update t set i = ivar, filler = repeat('d', 50);
rollback;
end loop;
end;
$$;
""")
# Write 3-4 MB to evict t from compute cache
cur.execute('create table f (i integer);')
cur.execute(f'insert into f values (generate_series(1,100000));')
# Read
with env.record_duration('read'):
cur.execute('select * from t;')
cur.fetchall()