mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-07 21:42:56 +00:00
this patch adds support for tenants. This touches mostly pageserver. Directory layout on disk is changed to contain new layer of indirection. Now path to particular repository has the following structure: <pageserver workdir>/tenants/<tenant id>. Tenant id has the same format as timeline id. Tenant id is included in pageserver commands when needed. Also new commands are available in pageserver: tenant_list, tenant_create. This is also reflected CLI. During init default tenant is created and it's id is saved in CLI config, so following commands can use it without extra options. Tenant id is also included in compute postgres configuration, so it can be passed via ServerInfo to safekeeper and in connection string to pageserver. For more info see docs/multitenancy.md.
201 lines
6.7 KiB
Python
201 lines
6.7 KiB
Python
import pytest
|
|
import random
|
|
import time
|
|
|
|
from contextlib import closing
|
|
from multiprocessing import Process, Value
|
|
from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory
|
|
|
|
pytest_plugins = ("fixtures.zenith_fixtures")
|
|
|
|
|
|
# basic test, write something in setup with wal acceptors, ensure that commits
|
|
# succeed and data is written
|
|
def test_normal_work(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory):
|
|
zenith_cli.run(["branch", "test_wal_acceptors_normal_work", "empty"])
|
|
wa_factory.start_n_new(3)
|
|
pg = postgres.create_start('test_wal_acceptors_normal_work',
|
|
wal_acceptors=wa_factory.get_connstrs())
|
|
|
|
with closing(pg.connect()) as conn:
|
|
with conn.cursor() as cur:
|
|
# we rely upon autocommit after each statement
|
|
# as waiting for acceptors happens there
|
|
cur.execute('CREATE TABLE t(key int primary key, value text)')
|
|
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
|
cur.execute('SELECT sum(key) FROM t')
|
|
assert cur.fetchone() == (5000050000, )
|
|
|
|
|
|
# Run page server and multiple acceptors, and multiple compute nodes running
|
|
# against different timelines.
|
|
def test_many_timelines(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory):
|
|
n_timelines = 2
|
|
|
|
wa_factory.start_n_new(3)
|
|
|
|
branches = ["test_wal_acceptors_many_timelines_{}".format(tlin) for tlin in range(n_timelines)]
|
|
|
|
# start postgres on each timeline
|
|
pgs = []
|
|
for branch in branches:
|
|
zenith_cli.run(["branch", branch, "empty"])
|
|
pgs.append(postgres.create_start(branch, wal_acceptors=wa_factory.get_connstrs()))
|
|
|
|
# Do everything in different loops to have actions on different timelines
|
|
# interleaved.
|
|
# create schema
|
|
for pg in pgs:
|
|
pg.safe_psql("CREATE TABLE t(key int primary key, value text)")
|
|
|
|
# Populate data
|
|
for pg in pgs:
|
|
pg.safe_psql("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
|
|
|
# Check data
|
|
for pg in pgs:
|
|
res = pg.safe_psql("SELECT sum(key) FROM t")
|
|
assert res[0] == (5000050000, )
|
|
|
|
|
|
# Check that dead minority doesn't prevent the commits: execute insert n_inserts
|
|
# times, with fault_probability chance of getting a wal acceptor down or up
|
|
# along the way. 2 of 3 are always alive, so the work keeps going.
|
|
def test_restarts(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory):
|
|
fault_probability = 0.01
|
|
n_inserts = 1000
|
|
n_acceptors = 3
|
|
|
|
wa_factory.start_n_new(n_acceptors)
|
|
|
|
zenith_cli.run(["branch", "test_wal_acceptors_restarts", "empty"])
|
|
pg = postgres.create_start('test_wal_acceptors_restarts',
|
|
wal_acceptors=wa_factory.get_connstrs())
|
|
|
|
# we rely upon autocommit after each statement
|
|
# as waiting for acceptors happens there
|
|
pg_conn = pg.connect()
|
|
cur = pg_conn.cursor()
|
|
|
|
failed_node = None
|
|
cur.execute('CREATE TABLE t(key int primary key, value text)')
|
|
for i in range(n_inserts):
|
|
cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1, ))
|
|
|
|
if random.random() <= fault_probability:
|
|
if failed_node is None:
|
|
failed_node = wa_factory.instances[random.randrange(0, n_acceptors)]
|
|
failed_node.stop()
|
|
else:
|
|
failed_node.start()
|
|
failed_node = None
|
|
cur.execute('SELECT sum(key) FROM t')
|
|
assert cur.fetchone() == (500500, )
|
|
|
|
|
|
start_delay_sec = 2
|
|
|
|
|
|
def delayed_wal_acceptor_start(wa):
|
|
time.sleep(start_delay_sec)
|
|
wa.start()
|
|
|
|
|
|
# When majority of acceptors is offline, commits are expected to be frozen
|
|
def test_unavailability(zenith_cli, postgres: PostgresFactory, wa_factory):
|
|
wa_factory.start_n_new(2)
|
|
|
|
zenith_cli.run(["branch", "test_wal_acceptors_unavailability", "empty"])
|
|
pg = postgres.create_start('test_wal_acceptors_unavailability',
|
|
wal_acceptors=wa_factory.get_connstrs())
|
|
|
|
# we rely upon autocommit after each statement
|
|
# as waiting for acceptors happens there
|
|
pg_conn = pg.connect()
|
|
cur = pg_conn.cursor()
|
|
|
|
# check basic work with table
|
|
cur.execute('CREATE TABLE t(key int primary key, value text)')
|
|
cur.execute("INSERT INTO t values (1, 'payload')")
|
|
|
|
# shutdown one of two acceptors, that is, majority
|
|
wa_factory.instances[0].stop()
|
|
|
|
proc = Process(target=delayed_wal_acceptor_start, args=(wa_factory.instances[0], ))
|
|
proc.start()
|
|
|
|
start = time.time()
|
|
cur.execute("INSERT INTO t values (2, 'payload')")
|
|
# ensure that the query above was hanging while acceptor was down
|
|
assert (time.time() - start) >= start_delay_sec
|
|
proc.join()
|
|
|
|
# for the world's balance, do the same with second acceptor
|
|
wa_factory.instances[1].stop()
|
|
|
|
proc = Process(target=delayed_wal_acceptor_start, args=(wa_factory.instances[1], ))
|
|
proc.start()
|
|
|
|
start = time.time()
|
|
cur.execute("INSERT INTO t values (3, 'payload')")
|
|
# ensure that the query above was hanging while acceptor was down
|
|
assert (time.time() - start) >= start_delay_sec
|
|
proc.join()
|
|
|
|
cur.execute("INSERT INTO t values (4, 'payload')")
|
|
|
|
cur.execute('SELECT sum(key) FROM t')
|
|
assert cur.fetchone() == (10, )
|
|
|
|
|
|
# shut down random subset of acceptors, sleep, wake them up, rinse, repeat
|
|
def xmas_garland(acceptors, stop):
|
|
while not bool(stop.value):
|
|
victims = []
|
|
for wa in acceptors:
|
|
if random.random() >= 0.5:
|
|
victims.append(wa)
|
|
for v in victims:
|
|
v.stop()
|
|
time.sleep(1)
|
|
for v in victims:
|
|
v.start()
|
|
time.sleep(1)
|
|
|
|
|
|
# value which gets unset on exit
|
|
@pytest.fixture
|
|
def stop_value():
|
|
stop = Value('i', 0)
|
|
yield stop
|
|
stop.value = 1
|
|
|
|
|
|
# do inserts while concurrently getting up/down subsets of acceptors
|
|
def test_race_conditions(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory, stop_value):
|
|
|
|
wa_factory.start_n_new(3)
|
|
|
|
zenith_cli.run(["branch", "test_wal_acceptors_race_conditions", "empty"])
|
|
pg = postgres.create_start('test_wal_acceptors_race_conditions',
|
|
wal_acceptors=wa_factory.get_connstrs())
|
|
|
|
# we rely upon autocommit after each statement
|
|
# as waiting for acceptors happens there
|
|
pg_conn = pg.connect()
|
|
cur = pg_conn.cursor()
|
|
|
|
cur.execute('CREATE TABLE t(key int primary key, value text)')
|
|
|
|
proc = Process(target=xmas_garland, args=(wa_factory.instances, stop_value))
|
|
proc.start()
|
|
|
|
for i in range(1000):
|
|
cur.execute("INSERT INTO t values (%s, 'payload');", (i + 1, ))
|
|
|
|
cur.execute('SELECT sum(key) FROM t')
|
|
assert cur.fetchone() == (500500, )
|
|
|
|
stop_value.value = 1
|
|
proc.join()
|