diff --git a/.circleci/config.yml b/.circleci/config.yml index 9e4b9ea4ba..1ba8a62d4d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -267,7 +267,7 @@ jobs: # -n4 uses four processes to run tests via pytest-xdist # -s is not used to prevent pytest from capturing output, because tests are running # in parallel and logs are mixed between different tests - pipenv run pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short --verbose -rA $TEST_SELECTION $EXTRA_PARAMS + pipenv run pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short --verbose -m "not remote_cluster" -rA $TEST_SELECTION $EXTRA_PARAMS - run: # CircleCI artifacts are preserved one file at a time, so skipping # this step isn't a good idea. If you want to extract the diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml new file mode 100644 index 0000000000..c87a22afc1 --- /dev/null +++ b/.github/workflows/benchmarking.yml @@ -0,0 +1,158 @@ +name: benchmarking + +on: + # uncomment to run on push for debugging your PR + # push: + # branches: [ mybranch ] + schedule: + # * is a special character in YAML so you have to quote this string + # ┌───────────── minute (0 - 59) + # │ ┌───────────── hour (0 - 23) + # │ │ ┌───────────── day of the month (1 - 31) + # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) + # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) + - cron: '36 7 * * *' # run once a day, timezone is utc + +env: + BASE_URL: "https://console.zenith.tech" + +jobs: + bench: + # this workflow runs on self hosteed runner + # it's environment is quite different from usual guthub runner + # probably the most important difference is that it doesnt start from clean workspace each time + # e g if you install system packages they are not cleaned up since you install them directly in host machine + # not a container or something + # See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners + runs-on: [self-hosted, zenith-benchmarker] + + steps: + - name: Checkout zenith repo + uses: actions/checkout@v2 + + - name: Checkout zenith-perf-data repo + uses: actions/checkout@v2 + with: + repository: zenithdb/zenith-perf-data + token: ${{ secrets.VIP_VAP_ACCESS_TOKEN }} + ref: testing # TODO replace with master once everything is ready + path: zenith-perf-data + + # actions/setup-python@v2 is not working correctly on self-hosted runners + # see https://github.com/actions/setup-python/issues/162 + # and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular + # so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs. + # there is Python 3.7.10 already installed on the machine so use it to install pipenv and then use pipenv's virtuealenvs + - name: Install pipenv & deps + run: | + python3 -m pip install --upgrade pipenv wheel + # since pip/pipenv caches are reused there shouldn't be any troubles with install every time + pipenv install + + - name: Show versions + run: | + echo Python + python3 --version + pipenv run python3 --version + echo Pipenv + pipenv --version + echo Pgbench + pgbench --version + + # FIXME cluster setup is skipped due to various changes in console API + # for now pre created cluster is used. When API gain some stability + # after massive changes dynamic cluster setup will be revived. + # So use pre created cluster. It needs to be started manually, but stop is automatic after 5 minutes of inactivity + - name: Setup cluster + env: + BENCHMARK_CONSOLE_USER_PASSWORD: "${{ secrets.BENCHMARK_CONSOLE_USER_PASSWORD }}" + BENCHMARK_CONSOLE_ACCESS_TOKEN: "${{ secrets.BENCHMARK_CONSOLE_ACCESS_TOKEN }}" + # USERNAME: "benchmark" + shell: bash + run: | + set -e + # echo "Creating cluster" + + # CLUSTER=$(curl -s --fail --show-error $BASE_URL/api/v1/clusters.json \ + # -H 'Content-Type: application/json; charset=utf-8' \ + # -H "Authorization: Bearer $BENCHMARK_CONSOLE_ACCESS_TOKEN" \ + # --data-binary @- << EOF + # { + # "cluster": { + # "name": "default_cluster", + # "region_id": "2", + # "instance_type_id": 7, + # "settings": {} + # }, + # "database": {"name": "benchmark"}, + # "role": {"name": "$USERNAME", "password": "$BENCHMARK_CONSOLE_USER_PASSWORD"} + # } + # EOF + # ) + + # echo "Created cluster" + + echo "Starting cluster" + CLUSTER_ID=285 + CLUSTER=$(curl -s --fail --show-error -X POST $BASE_URL/api/v1/clusters/$CLUSTER_ID/start \ + -H "Authorization: Bearer $BENCHMARK_CONSOLE_ACCESS_TOKEN") + echo $CLUSTER | python -m json.tool + + echo "Waiting for cluster to become ready" + sleep 10 + + # # note that jq is installed on host system + # CLUSTER_ID=$(echo $CLUSTER| jq ".id") + echo "CLUSTER_ID=$CLUSTER_ID" >> $GITHUB_ENV + # echo "Constructing connstr" + # CLUSTER=$(curl -s --fail --show-error -X GET $BASE_URL/api/v1/clusters/$CLUSTER_ID.json \ + # -H "Authorization: Bearer $BENCHMARK_CONSOLE_ACCESS_TOKEN") + + # echo $CLUSTER | python -m json.tool + # CONNSTR=$(echo $CLUSTER | jq -r ".| \"postgresql://$USERNAME:$BENCHMARK_CONSOLE_USER_PASSWORD@\(.public_ip_address):\(.public_pg_port)/benchmark\"") + # echo "BENCHMARK_CONNSTR=$CONNSTR" >> $GITHUB_ENV + + - name: Run benchmark + # pgbench is installed system wide from official repo + # https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/ + # via + # sudo tee /etc/yum.repos.d/pgdg.repo< Tuple[List[Tuple[str, str]], List[str]]: + value_columns = [] + common_columns = [] + for item in values: + if item['name'] in KEY_EXCLUDE_FIELDS: + continue + if item['report'] != 'test_param': + value_columns.append(cast(str, item['name'])) + else: + common_columns.append( + (cast(str, item['name']), cast(str, item['value']))) + value_columns.sort() + common_columns.sort(key=lambda x: x[0]) # sort by name + return common_columns, value_columns + + +def format_ratio(ratio: float, report: str) -> Tuple[str, str]: + color = '' + sign = '+' if ratio > 0 else '' + if abs(ratio) < 0.05: + return f' ({sign}{ratio:.2f})', color + + if report not in {'test_param', 'higher_is_better', 'lower_is_better'}: + raise ValueError(f'Unknown report type: {report}') + + if report == 'test_param': + return f'{ratio:.2f}', color + + if ratio > 0: + if report == 'higher_is_better': + color = POSITIVE_COLOR + elif report == 'lower_is_better': + color = NEGATIVE_COLOR + elif ratio < 0: + if report == 'higher_is_better': + color = NEGATIVE_COLOR + elif report == 'lower_is_better': + color = POSITIVE_COLOR + + return f' ({sign}{ratio:.2f})', color + + +def extract_value(name: str, suit_run: SuitRun) -> Optional[Dict[str, Any]]: + for item in suit_run.values['data']: + if item['name'] == name: + return item + return None + + +def get_row_values(columns: List[str], run_result: SuitRun, + prev_result: Optional[SuitRun]) -> List[RowValue]: + row_values = [] + for column in columns: + current_value = extract_value(column, run_result) + if current_value is None: + # should never happen + raise ValueError(f'{column} not found in {run_result.values}') + + value = current_value["value"] + if isinstance(value, float): + value = f'{value:.2f}' + + if prev_result is None: + row_values.append(RowValue(value, '', '')) + continue + + prev_value = extract_value(column, prev_result) + if prev_value is None: + # this might happen when new metric is added and there is no value for it in previous run + # let this be here, TODO add proper handling when this actually happens + raise ValueError(f'{column} not found in previous result') + ratio = float(value) / float(prev_value['value']) - 1 + ratio_display, color = format_ratio(ratio, + current_value['report']) + row_values.append(RowValue(value, color, ratio_display)) + return row_values + + +@dataclass +class SuiteRunTableRow: + revision: str + values: List[RowValue] + + +def prepare_rows_from_runs(value_columns: List[str], + runs: List[SuitRun]) -> List[SuiteRunTableRow]: + rows = [] + prev_run = None + for run in runs: + rows.append( + SuiteRunTableRow(revision=run.revision, + values=get_row_values(value_columns, run, + prev_run))) + prev_run = run + + return rows + + +def main(args: argparse.Namespace) -> None: + input_dir = Path(args.input_dir) + grouped_runs = {} + # we have files in form: _.json + # fill them in the hashmap so we have grouped items for the + # same run configuration (scale, duration etc.) ordered by counter. + for item in sorted(input_dir.iterdir(), + key=lambda x: int(x.name.split('_')[0])): + run_data = json.loads(item.read_text()) + revision = run_data['revision'] + + for suit_result in run_data['result']: + key = "{}{}".format(run_data['platform'], suit_result['suit']) + # pack total duration as a synthetic value + total_duration = suit_result['total_duration'] + suit_result['data'].append({ + 'name': 'total_duration', + 'value': total_duration, + 'unit': 's', + 'report': 'lower_is_better', + }) + common_columns, value_columns = get_columns(suit_result['data']) + + grouped_runs.setdefault( + key, + SuitRuns( + platform=run_data['platform'], + suit=suit_result['suit'], + common_columns=common_columns, + value_columns=value_columns, + runs=[], + ), + ) + + grouped_runs[key].runs.append( + SuitRun(revision=revision, values=suit_result)) + context = {} + for result in grouped_runs.values(): + suit = result.suit + context[suit] = { + 'common_columns': + result.common_columns, + 'value_columns': + result.value_columns, + 'platform': + result.platform, + # reverse the order so newest results are on top of the table + 'rows': + reversed(prepare_rows_from_runs(result.value_columns, + result.runs)), + } + + template = Template((Path(__file__).parent / 'perf_report_template.html').read_text()) + + Path(args.out).write_text(template.render(context=context)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--input-dir', + dest='input_dir', + required=True, + help='Directory with jsons generated by the test suite', + ) + parser.add_argument('--out', required=True, help='Output html file path') + args = parser.parse_args() + main(args) diff --git a/scripts/perf_report_template.html b/scripts/perf_report_template.html new file mode 100644 index 0000000000..2847e75a00 --- /dev/null +++ b/scripts/perf_report_template.html @@ -0,0 +1,52 @@ + + + + + + +

Zenith Performance Tests

+ + {% for suit_name, suit_data in context.items() %} +

Runs for {{ suit_name }}

+ platform: {{ suit_data.platform }}
+ {% for common_column_name, common_column_value in suit_data.common_columns %} + {{ common_column_name }}: {{ common_column_value }}
+ {% endfor %} +
+ + + + + {% for column_name in suit_data.value_columns %} + + {% endfor %} + + {% for row in suit_data.rows %} + + + {% for column_value in row.values %} + + {% endfor %} + + {% endfor %} +
revision{{ column_name }}
{{ row.revision[:6] }}{{ column_value.value }}{{column_value.ratio}}
+ {% endfor %} + + + + diff --git a/test_runner/Pipfile b/test_runner/Pipfile index e179553dbd..572ff82ec8 100644 --- a/test_runner/Pipfile +++ b/test_runner/Pipfile @@ -5,13 +5,14 @@ name = "pypi" [packages] pytest = ">=6.0.0" -psycopg2 = "*" typing-extensions = "*" pyjwt = {extras = ["crypto"], version = "*"} requests = "*" pytest-xdist = "*" asyncpg = "*" cached-property = "*" +psycopg2-binary = "*" +jinja2 = "*" [dev-packages] # Behavior may change slightly between versions. These are run continuously, diff --git a/test_runner/Pipfile.lock b/test_runner/Pipfile.lock index 185b91189b..1524bbe584 100644 --- a/test_runner/Pipfile.lock +++ b/test_runner/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "63b72760ef37375186a638066ba0ad5804dbace99ddc503ea654e9749070ab24" + "sha256": "c309cb963a7b07ae3d30e9cbf08b495f77bdecc0e5356fc89d133c4fbcb65b2b" }, "pipfile-spec": 6, "requires": { @@ -177,13 +177,96 @@ ], "version": "==1.1.1" }, - "packaging": { + "jinja2": { "hashes": [ - "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7", - "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14" + "sha256:827a0e32839ab1600d4eb1c4c33ec5a8edfbc5cb42dafa13b81f182f97784b45", + "sha256:8569982d3f0889eed11dd620c706d39b60c36d6d25843961f33f77fb6bc6b20c" + ], + "index": "pypi", + "version": "==3.0.2" + }, + "markupsafe": { + "hashes": [ + "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298", + "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64", + "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b", + "sha256:04635854b943835a6ea959e948d19dcd311762c5c0c6e1f0e16ee57022669194", + "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567", + "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff", + "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724", + "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74", + "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646", + "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35", + "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6", + "sha256:20dca64a3ef2d6e4d5d615a3fd418ad3bde77a47ec8a23d984a12b5b4c74491a", + "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6", + "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad", + "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26", + "sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38", + "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac", + "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7", + "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6", + "sha256:4296f2b1ce8c86a6aea78613c34bb1a672ea0e3de9c6ba08a960efe0b0a09047", + "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75", + "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f", + "sha256:4dc8f9fb58f7364b63fd9f85013b780ef83c11857ae79f2feda41e270468dd9b", + "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135", + "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8", + "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a", + "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a", + "sha256:5b6d930f030f8ed98e3e6c98ffa0652bdb82601e7a016ec2ab5d7ff23baa78d1", + "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9", + "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864", + "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914", + "sha256:6300b8454aa6930a24b9618fbb54b5a68135092bc666f7b06901f897fa5c2fee", + "sha256:63f3268ba69ace99cab4e3e3b5840b03340efed0948ab8f78d2fd87ee5442a4f", + "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18", + "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8", + "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2", + "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d", + "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b", + "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b", + "sha256:89c687013cb1cd489a0f0ac24febe8c7a666e6e221b783e53ac50ebf68e45d86", + "sha256:8d206346619592c6200148b01a2142798c989edcb9c896f9ac9722a99d4e77e6", + "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f", + "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb", + "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833", + "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28", + "sha256:9f02365d4e99430a12647f09b6cc8bab61a6564363f313126f775eb4f6ef798e", + "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415", + "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902", + "sha256:aca6377c0cb8a8253e493c6b451565ac77e98c2951c45f913e0b52facdcff83f", + "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d", + "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9", + "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d", + "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145", + "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066", + "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c", + "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1", + "sha256:cdfba22ea2f0029c9261a4bd07e830a8da012291fbe44dc794e488b6c9bb353a", + "sha256:d6c7ebd4e944c85e2c3421e612a7057a2f48d478d79e61800d81468a8d842207", + "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f", + "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53", + "sha256:deb993cacb280823246a026e3b2d81c493c53de6acfd5e6bfe31ab3402bb37dd", + "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134", + "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85", + "sha256:f0567c4dc99f264f49fe27da5f735f414c4e7e7dd850cfd8e69f0862d7c74ea9", + "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5", + "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94", + "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509", + "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51", + "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872" ], "markers": "python_version >= '3.6'", - "version": "==21.0" + "version": "==2.0.1" + }, + "packaging": { + "hashes": [ + "sha256:096d689d78ca690e4cd8a89568ba06d07ca097e3306a4381635073ca91479966", + "sha256:14317396d1e8cdb122989b916fa2c7e9ca8e2be9e8060a6eff75b6b7b4d8a7e0" + ], + "markers": "python_version >= '3.6'", + "version": "==21.2" }, "pluggy": { "hashes": [ @@ -193,17 +276,42 @@ "markers": "python_version >= '3.6'", "version": "==1.0.0" }, - "psycopg2": { + "psycopg2-binary": { "hashes": [ - "sha256:079d97fc22de90da1d370c90583659a9f9a6ee4007355f5825e5f1c70dffc1fa", - "sha256:2087013c159a73e09713294a44d0c8008204d06326006b7f652bef5ace66eebb", - "sha256:2c992196719fadda59f72d44603ee1a2fdcc67de097eea38d41c7ad9ad246e62", - "sha256:7640e1e4d72444ef012e275e7b53204d7fab341fb22bc76057ede22fe6860b25", - "sha256:7f91312f065df517187134cce8e395ab37f5b601a42446bdc0f0d51773621854", - "sha256:830c8e8dddab6b6716a4bf73a09910c7954a92f40cf1d1e702fb93c8a919cc56", - "sha256:89409d369f4882c47f7ea20c42c5046879ce22c1e4ea20ef3b00a4dfc0a7f188", - "sha256:bf35a25f1aaa8a3781195595577fcbb59934856ee46b4f252f56ad12b8043bcf", - "sha256:de5303a6f1d0a7a34b9d40e4d3bef684ccc44a49bbe3eb85e3c0bffb4a131b7c" + "sha256:0b7dae87f0b729922e06f85f667de7bf16455d411971b2043bbd9577af9d1975", + "sha256:0f2e04bd2a2ab54fa44ee67fe2d002bb90cee1c0f1cc0ebc3148af7b02034cbd", + "sha256:123c3fb684e9abfc47218d3784c7b4c47c8587951ea4dd5bc38b6636ac57f616", + "sha256:1473c0215b0613dd938db54a653f68251a45a78b05f6fc21af4326f40e8360a2", + "sha256:14db1752acdd2187d99cb2ca0a1a6dfe57fc65c3281e0f20e597aac8d2a5bd90", + "sha256:1e3a362790edc0a365385b1ac4cc0acc429a0c0d662d829a50b6ce743ae61b5a", + "sha256:1e85b74cbbb3056e3656f1cc4781294df03383127a8114cbc6531e8b8367bf1e", + "sha256:20f1ab44d8c352074e2d7ca67dc00843067788791be373e67a0911998787ce7d", + "sha256:24b0b6688b9f31a911f2361fe818492650795c9e5d3a1bc647acbd7440142a4f", + "sha256:2f62c207d1740b0bde5c4e949f857b044818f734a3d57f1d0d0edc65050532ed", + "sha256:3242b9619de955ab44581a03a64bdd7d5e470cc4183e8fcadd85ab9d3756ce7a", + "sha256:35c4310f8febe41f442d3c65066ca93cccefd75013df3d8c736c5b93ec288140", + "sha256:4235f9d5ddcab0b8dbd723dca56ea2922b485ea00e1dafacf33b0c7e840b3d32", + "sha256:542875f62bc56e91c6eac05a0deadeae20e1730be4c6334d8f04c944fcd99759", + "sha256:5ced67f1e34e1a450cdb48eb53ca73b60aa0af21c46b9b35ac3e581cf9f00e31", + "sha256:661509f51531ec125e52357a489ea3806640d0ca37d9dada461ffc69ee1e7b6e", + "sha256:7360647ea04db2e7dff1648d1da825c8cf68dc5fbd80b8fb5b3ee9f068dcd21a", + "sha256:736b8797b58febabb85494142c627bd182b50d2a7ec65322983e71065ad3034c", + "sha256:8c13d72ed6af7fd2c8acbd95661cf9477f94e381fce0792c04981a8283b52917", + "sha256:988b47ac70d204aed01589ed342303da7c4d84b56c2f4c4b8b00deda123372bf", + "sha256:995fc41ebda5a7a663a254a1dcac52638c3e847f48307b5416ee373da15075d7", + "sha256:a36c7eb6152ba5467fb264d73844877be8b0847874d4822b7cf2d3c0cb8cdcb0", + "sha256:aed4a9a7e3221b3e252c39d0bf794c438dc5453bc2963e8befe9d4cd324dff72", + "sha256:aef9aee84ec78af51107181d02fe8773b100b01c5dfde351184ad9223eab3698", + "sha256:b0221ca5a9837e040ebf61f48899926b5783668b7807419e4adae8175a31f773", + "sha256:b4d7679a08fea64573c969f6994a2631908bb2c0e69a7235648642f3d2e39a68", + "sha256:c250a7ec489b652c892e4f0a5d122cc14c3780f9f643e1a326754aedf82d9a76", + "sha256:ca86db5b561b894f9e5f115d6a159fff2a2570a652e07889d8a383b5fae66eb4", + "sha256:cfc523edecddaef56f6740d7de1ce24a2fdf94fd5e704091856a201872e37f9f", + "sha256:d92272c7c16e105788efe2cfa5d680f07e34e0c29b03c1908f8636f55d5f915a", + "sha256:da113b70f6ec40e7d81b43d1b139b9db6a05727ab8be1ee559f3a69854a69d34", + "sha256:f6fac64a38f6768e7bc7b035b9e10d8a538a9fadce06b983fb3e6fa55ac5f5ce", + "sha256:f8559617b1fcf59a9aedba2c9838b5b6aa211ffedecabca412b92a1ff75aac1a", + "sha256:fbb42a541b1093385a2d8c7eec94d26d30437d0e77c1d25dae1dcc46741a385e" ], "index": "pypi", "version": "==2.9.1" @@ -334,11 +442,11 @@ }, "filelock": { "hashes": [ - "sha256:2b5eb3589e7fdda14599e7eb1a50e09b4cc14f34ed98b8ba56d33bfaafcbef2f", - "sha256:34a9f35f95c441e7b38209775d6e0337f9a3759f3565f6c5798f19618527c76f" + "sha256:7afc856f74fa7006a289fd10fa840e1eebd8bbff6bffb69c26c54a0512ea8cf8", + "sha256:bb2a1c717df74c48a2d00ed625e5a66f8572a3a30baacb7657add1d7bac4097b" ], "markers": "python_version >= '3.6'", - "version": "==3.3.1" + "version": "==3.3.2" }, "flake8": { "hashes": [ @@ -510,11 +618,11 @@ }, "virtualenv": { "hashes": [ - "sha256:10062e34c204b5e4ec5f62e6ef2473f8ba76513a9a617e873f1f8fb4a519d300", - "sha256:bcc17f0b3a29670dd777d6f0755a4c04f28815395bca279cdcb213b97199a6b8" + "sha256:4b02e52a624336eece99c96e3ab7111f469c24ba226a53ec474e8e787b365814", + "sha256:576d05b46eace16a9c348085f7d0dc8ef28713a2cabaa1cf0aea41e8f12c9218" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==20.8.1" + "version": "==20.10.0" }, "virtualenv-clone": { "hashes": [ diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index 8f5deef690..a83cbc95dd 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -1,29 +1,22 @@ +import dataclasses +import json import os +from pathlib import Path import re +import subprocess import timeit -import pathlib -import uuid -import psycopg2 +import calendar +import enum +from datetime import datetime import pytest from _pytest.config import Config -from _pytest.runner import CallInfo from _pytest.terminal import TerminalReporter -import shutil -import signal -import subprocess -import time +import warnings from contextlib import contextmanager -from contextlib import closing -from pathlib import Path -from dataclasses import dataclass # Type-related stuff -from psycopg2.extensions import connection as PgConnection -from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast -from typing_extensions import Literal - -from .utils import (get_self_dir, mkdir_if_needed, subprocess_capture) +from typing import Iterator, Optional """ This file contains fixtures for micro-benchmarks. @@ -39,7 +32,7 @@ def test_mybench(zenith_simple_env: env, zenbenchmark): # Initialize the test ... - + # Run the test, timing how long it takes with zenbenchmark.record_duration('test_query'): cur.execute('SELECT test_query(...)') @@ -55,36 +48,91 @@ in the test initialization, or measure disk usage after the test query. """ -# TODO: It would perhaps be better to store the results as additional -# properties in the pytest TestReport objects, to make them visible to -# other pytest tools. -class ZenithBenchmarkResults: - """ An object for recording benchmark results. """ - def __init__(self): - self.results = [] +@dataclasses.dataclass +class PgBenchRunResult: + scale: int + number_of_clients: int + number_of_threads: int + number_of_transactions_actually_processed: int + latency_average: float + latency_stddev: float + tps_including_connection_time: float + tps_excluding_connection_time: float + init_duration: float + init_start_timestamp: int + init_end_timestamp: int + run_duration: float + run_start_timestamp: int + run_end_timestamp: int - def record(self, test_name: str, metric_name: str, metric_value: float, unit: str): - """ - Record a benchmark result. - """ + # TODO progress - self.results.append((test_name, metric_name, metric_value, unit)) + @classmethod + def parse_from_output( + cls, + out: 'subprocess.CompletedProcess[str]', + init_duration: float, + init_start_timestamp: int, + init_end_timestamp: int, + run_duration: float, + run_start_timestamp: int, + run_end_timestamp: int, + ): + stdout_lines = out.stdout.splitlines() + # we know significant parts of these values from test input + # but to be precise take them from output + # scaling factor: 5 + assert "scaling factor" in stdout_lines[1] + scale = int(stdout_lines[1].split()[-1]) + # number of clients: 1 + assert "number of clients" in stdout_lines[3] + number_of_clients = int(stdout_lines[3].split()[-1]) + # number of threads: 1 + assert "number of threads" in stdout_lines[4] + number_of_threads = int(stdout_lines[4].split()[-1]) + # number of transactions actually processed: 1000/1000 + assert "number of transactions actually processed" in stdout_lines[6] + number_of_transactions_actually_processed = int(stdout_lines[6].split("/")[1]) + # latency average = 19.894 ms + assert "latency average" in stdout_lines[7] + latency_average = stdout_lines[7].split()[-2] + # latency stddev = 3.387 ms + assert "latency stddev" in stdout_lines[8] + latency_stddev = stdout_lines[8].split()[-2] + # tps = 50.219689 (including connections establishing) + assert "(including connections establishing)" in stdout_lines[9] + tps_including_connection_time = stdout_lines[9].split()[2] + # tps = 50.264435 (excluding connections establishing) + assert "(excluding connections establishing)" in stdout_lines[10] + tps_excluding_connection_time = stdout_lines[10].split()[2] + + return cls( + scale=scale, + number_of_clients=number_of_clients, + number_of_threads=number_of_threads, + number_of_transactions_actually_processed=number_of_transactions_actually_processed, + latency_average=float(latency_average), + latency_stddev=float(latency_stddev), + tps_including_connection_time=float(tps_including_connection_time), + tps_excluding_connection_time=float(tps_excluding_connection_time), + init_duration=init_duration, + init_start_timestamp=init_start_timestamp, + init_end_timestamp=init_end_timestamp, + run_duration=run_duration, + run_start_timestamp=run_start_timestamp, + run_end_timestamp=run_end_timestamp, + ) -# Will be recreated in each session. -zenbenchmark_results: ZenithBenchmarkResults = ZenithBenchmarkResults() - - -# Session scope fixture that initializes the results object -@pytest.fixture(autouse=True, scope='session') -def zenbenchmark_global(request) -> Iterator[ZenithBenchmarkResults]: - """ - This is a python decorator for benchmark fixtures - """ - global zenbenchmark_results - zenbenchmark_results = ZenithBenchmarkResults() - - yield zenbenchmark_results +@enum.unique +class MetricReport(str, enum.Enum): # str is a hack to make it json serializable + # this means that this is a constant test parameter + # like number of transactions, or number of clients + TEST_PARAM = 'test_param' + # reporter can use it to mark test runs with higher values as improvements + HIGHER_IS_BETTER = 'higher_is_better' + # the same but for lower values + LOWER_IS_BETTER = 'lower_is_better' class ZenithBenchmarker: @@ -92,30 +140,109 @@ class ZenithBenchmarker: An object for recording benchmark results. This is created for each test function by the zenbenchmark fixture """ - def __init__(self, results, request): - self.results = results - self.request = request + def __init__(self, property_recorder): + # property recorder here is a pytest fixture provided by junitxml module + # https://docs.pytest.org/en/6.2.x/reference.html#pytest.junitxml.record_property + self.property_recorder = property_recorder - def record(self, metric_name: str, metric_value: float, unit: str): + def record( + self, + metric_name: str, + metric_value: float, + unit: str, + report: MetricReport, + ): """ Record a benchmark result. """ - self.results.record(self.request.node.name, metric_name, metric_value, unit) + # just to namespace the value + name = f"zenith_benchmarker_{metric_name}" + self.property_recorder( + name, + { + "name": metric_name, + "value": metric_value, + "unit": unit, + "report": report, + }, + ) @contextmanager - def record_duration(self, metric_name): + def record_duration(self, metric_name: str): """ Record a duration. Usage: - + with zenbenchmark.record_duration('foobar_runtime'): foobar() # measure this - """ start = timeit.default_timer() yield end = timeit.default_timer() - self.results.record(self.request.node.name, metric_name, end - start, 's') + self.record( + metric_name=metric_name, + metric_value=end - start, + unit="s", + report=MetricReport.LOWER_IS_BETTER, + ) + + def record_pg_bench_result(self, pg_bench_result: PgBenchRunResult): + self.record("scale", pg_bench_result.scale, '', MetricReport.TEST_PARAM) + self.record("number_of_clients", + pg_bench_result.number_of_clients, + '', + MetricReport.TEST_PARAM) + self.record("number_of_threads", + pg_bench_result.number_of_threads, + '', + MetricReport.TEST_PARAM) + self.record( + "number_of_transactions_actually_processed", + pg_bench_result.number_of_transactions_actually_processed, + '', + # thats because this is predefined by test matrix and doesnt change across runs + report=MetricReport.TEST_PARAM, + ) + self.record("latency_average", + pg_bench_result.latency_average, + unit="ms", + report=MetricReport.LOWER_IS_BETTER) + self.record("latency_stddev", + pg_bench_result.latency_stddev, + unit="ms", + report=MetricReport.LOWER_IS_BETTER) + self.record("tps_including_connection_time", + pg_bench_result.tps_including_connection_time, + '', + report=MetricReport.HIGHER_IS_BETTER) + self.record("tps_excluding_connection_time", + pg_bench_result.tps_excluding_connection_time, + '', + report=MetricReport.HIGHER_IS_BETTER) + self.record("init_duration", + pg_bench_result.init_duration, + unit="s", + report=MetricReport.LOWER_IS_BETTER) + self.record("init_start_timestamp", + pg_bench_result.init_start_timestamp, + '', + MetricReport.TEST_PARAM) + self.record("init_end_timestamp", + pg_bench_result.init_end_timestamp, + '', + MetricReport.TEST_PARAM) + self.record("run_duration", + pg_bench_result.run_duration, + unit="s", + report=MetricReport.LOWER_IS_BETTER) + self.record("run_start_timestamp", + pg_bench_result.run_start_timestamp, + '', + MetricReport.TEST_PARAM) + self.record("run_end_timestamp", + pg_bench_result.run_end_timestamp, + '', + MetricReport.TEST_PARAM) def get_io_writes(self, pageserver) -> int: """ @@ -149,7 +276,7 @@ class ZenithBenchmarker: assert matches return int(round(float(matches.group(1)))) - def get_timeline_size(self, repo_dir: str, tenantid: str, timelineid: str): + def get_timeline_size(self, repo_dir: Path, tenantid: str, timelineid: str): """ Calculate the on-disk size of a timeline """ @@ -171,47 +298,82 @@ class ZenithBenchmarker: yield after = self.get_io_writes(pageserver) - self.results.record(self.request.node.name, - metric_name, - round((after - before) / (1024 * 1024)), - 'MB') + self.record(metric_name, + round((after - before) / (1024 * 1024)), + "MB", + report=MetricReport.LOWER_IS_BETTER) -@pytest.fixture(scope='function') -def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]: +@pytest.fixture(scope="function") +def zenbenchmark(record_property) -> Iterator[ZenithBenchmarker]: """ This is a python decorator for benchmark fixtures. It contains functions for recording measurements, and prints them out at the end. """ - benchmarker = ZenithBenchmarker(zenbenchmark_global, request) + benchmarker = ZenithBenchmarker(record_property) yield benchmarker +def get_out_path(target_dir: Path, revision: str) -> Path: + """ + get output file path + if running in the CI uses commit revision + to avoid duplicates uses counter + """ + # use UTC timestamp as a counter marker to avoid weird behaviour + # when for example files are deleted + ts = calendar.timegm(datetime.utcnow().utctimetuple()) + path = target_dir / f"{ts}_{revision}.json" + assert not path.exists() + return path + + # Hook to print the results at the end @pytest.hookimpl(hookwrapper=True) def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, config: Config): yield + revision = os.getenv("GITHUB_SHA", "local") + platform = os.getenv("PLATFORM", "local") - global zenbenchmark_results + terminalreporter.section("Benchmark results", "-") - if not zenbenchmark_results: + result = [] + for test_report in terminalreporter.stats.get("passed", []): + result_entry = [] + + for _, recorded_property in test_report.user_properties: + terminalreporter.write("{}.{}: ".format(test_report.head_line, + recorded_property["name"])) + unit = recorded_property["unit"] + value = recorded_property["value"] + if unit == "MB": + terminalreporter.write("{0:,.0f}".format(value), green=True) + elif unit in ("s", "ms") and isinstance(value, float): + terminalreporter.write("{0:,.3f}".format(value), green=True) + elif isinstance(value, float): + terminalreporter.write("{0:,.4f}".format(value), green=True) + else: + terminalreporter.write(str(value), green=True) + terminalreporter.line(" {}".format(unit)) + + result_entry.append(recorded_property) + + result.append({ + "suit": test_report.nodeid, + "total_duration": test_report.duration, + "data": result_entry, + }) + + out_dir = config.getoption("out_dir") + if out_dir is None: + warnings.warn("no out dir provided to store performance test results") return - terminalreporter.section('Benchmark results', "-") + if not result: + warnings.warn("no results to store (no passed test suites)") + return - for result in zenbenchmark_results.results: - func = result[0] - metric_name = result[1] - metric_value = result[2] - unit = result[3] - - terminalreporter.write("{}.{}: ".format(func, metric_name)) - - if unit == 'MB': - terminalreporter.write("{0:,.0f}".format(metric_value), green=True) - elif unit == 's': - terminalreporter.write("{0:,.3f}".format(metric_value), green=True) - else: - terminalreporter.write("{0:,.4f}".format(metric_value), green=True) - - terminalreporter.line(" {}".format(unit)) + get_out_path(Path(out_dir), revision=revision).write_text( + json.dumps({ + "revision": revision, "platform": platform, "result": result + }, indent=4)) diff --git a/test_runner/fixtures/zenith_fixtures.py b/test_runner/fixtures/zenith_fixtures.py index 4622cf64d4..48faf47a6d 100644 --- a/test_runner/fixtures/zenith_fixtures.py +++ b/test_runner/fixtures/zenith_fixtures.py @@ -6,6 +6,7 @@ import asyncpg import os import pathlib import uuid +import warnings import jwt import json import psycopg2 @@ -26,6 +27,7 @@ from dataclasses import dataclass from psycopg2.extensions import connection as PgConnection from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast from typing_extensions import Literal +import pytest import requests @@ -58,6 +60,16 @@ DEFAULT_POSTGRES_DIR = 'tmp_install' BASE_PORT = 15000 WORKER_PORT_NUM = 100 + +def pytest_addoption(parser): + parser.addoption( + "--skip-interfering-proc-check", + dest="skip_interfering_proc_check", + action="store_true", + help="skip check for interferring processes", + ) + + # These are set in pytest_configure() base_dir = "" zenith_binpath = "" @@ -65,14 +77,10 @@ pg_distrib_dir = "" top_output_dir = "" -def pytest_configure(config): - """ - Ensure that no unwanted daemons are running before we start testing. - Check that we do not owerflow available ports range. - """ - numprocesses = config.getoption('numprocesses') - if numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768: # do not use ephemeral ports - raise Exception('Too many workers configured. Cannot distrubute ports for services.') +def check_interferring_processes(config): + if config.getoption("skip_interfering_proc_check"): + warnings.warn("interferring process check is skipped") + return # does not use -c as it is not supported on macOS cmd = ['pgrep', 'pageserver|postgres|safekeeper'] @@ -86,11 +94,36 @@ def pytest_configure(config): 'Found interfering processes running. Stop all Zenith pageservers, nodes, safekeepers, as well as stand-alone Postgres.' ) + +def pytest_configure(config): + """ + Ensure that no unwanted daemons are running before we start testing. + Check that we do not owerflow available ports range. + """ + check_interferring_processes(config) + + numprocesses = config.getoption('numprocesses') + if numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768: # do not use ephemeral ports + raise Exception('Too many workers configured. Cannot distrubute ports for services.') + # find the base directory (currently this is the git root) global base_dir base_dir = os.path.normpath(os.path.join(get_self_dir(), '../..')) log.info(f'base_dir is {base_dir}') + # Compute the top-level directory for all tests. + global top_output_dir + env_test_output = os.environ.get('TEST_OUTPUT') + if env_test_output is not None: + top_output_dir = env_test_output + else: + top_output_dir = os.path.join(base_dir, DEFAULT_OUTPUT_DIR) + mkdir_if_needed(top_output_dir) + + if os.getenv("REMOTE_ENV"): + # we are in remote env and do not have zenith binaries locally + # this is the case for benchmarks run on self-hosted runner + return # Find the zenith binaries. global zenith_binpath env_zenith_bin = os.environ.get('ZENITH_BIN') @@ -100,7 +133,7 @@ def pytest_configure(config): zenith_binpath = os.path.join(base_dir, 'target/debug') log.info(f'zenith_binpath is {zenith_binpath}') if not os.path.exists(os.path.join(zenith_binpath, 'pageserver')): - raise Exception('zenith binaries not found at "{}"'.format(zenith_dir)) + raise Exception('zenith binaries not found at "{}"'.format(zenith_binpath)) # Find the postgres installation. global pg_distrib_dir @@ -113,15 +146,6 @@ def pytest_configure(config): if not os.path.exists(os.path.join(pg_distrib_dir, 'bin/postgres')): raise Exception('postgres not found at "{}"'.format(pg_distrib_dir)) - # Compute the top-level directory for all tests. - global top_output_dir - env_test_output = os.environ.get('TEST_OUTPUT') - if env_test_output is not None: - top_output_dir = env_test_output - else: - top_output_dir = os.path.join(base_dir, DEFAULT_OUTPUT_DIR) - mkdir_if_needed(top_output_dir) - def zenfixture(func: Fn) -> Fn: """ diff --git a/test_runner/performance/__init__.py b/test_runner/performance/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test_runner/performance/conftest.py b/test_runner/performance/conftest.py new file mode 100644 index 0000000000..cd8b40ca82 --- /dev/null +++ b/test_runner/performance/conftest.py @@ -0,0 +1,8 @@ +# pytest some has quirks with discovering plugins, so having it there just works +# probably we should create custom plugin and add it to pytest config to always have needed things at hand +def pytest_addoption(parser): + parser.addoption( + "--out-dir", + dest="out_dir", + help="Directory to ouput performance tests results to.", + ) diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py index 46dcb01c71..9892a70516 100644 --- a/test_runner/performance/test_bulk_insert.py +++ b/test_runner/performance/test_bulk_insert.py @@ -1,7 +1,7 @@ -import os from contextlib import closing from fixtures.zenith_fixtures import ZenithEnv from fixtures.log_helper import log +from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") @@ -16,7 +16,7 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") # 3. Disk space used # 4. Peak memory usage # -def test_bulk_insert(zenith_simple_env: ZenithEnv, zenbenchmark): +def test_bulk_insert(zenith_simple_env: ZenithEnv, zenbenchmark: ZenithBenchmarker): env = zenith_simple_env # Create a branch for us env.zenith_cli(["branch", "test_bulk_insert", "empty"]) @@ -47,10 +47,16 @@ def test_bulk_insert(zenith_simple_env: ZenithEnv, zenbenchmark): pscur.execute(f"do_gc {env.initial_tenant} {timeline} 0") # Record peak memory usage - zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(env.pageserver) / 1024, 'MB') + zenbenchmark.record("peak_mem", + zenbenchmark.get_peak_mem(env.pageserver) / 1024, + 'MB', + report=MetricReport.LOWER_IS_BETTER) # Report disk space used by the repository timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline) - zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB') + zenbenchmark.record('size', + timeline_size / (1024 * 1024), + 'MB', + report=MetricReport.LOWER_IS_BETTER) diff --git a/test_runner/performance/test_bulk_tenant_create.py b/test_runner/performance/test_bulk_tenant_create.py index e913afc27c..f2ccb1dc34 100644 --- a/test_runner/performance/test_bulk_tenant_create.py +++ b/test_runner/performance/test_bulk_tenant_create.py @@ -1,4 +1,5 @@ import timeit +from fixtures.benchmark_fixture import MetricReport import pytest from fixtures.zenith_fixtures import ZenithEnvBuilder @@ -54,4 +55,7 @@ def test_bulk_tenant_create( pg_tenant.stop() - zenbenchmark.record('tenant_creation_time', sum(time_slices) / len(time_slices), 's') + zenbenchmark.record('tenant_creation_time', + sum(time_slices) / len(time_slices), + 's', + report=MetricReport.LOWER_IS_BETTER) diff --git a/test_runner/performance/test_gist_build.py b/test_runner/performance/test_gist_build.py index b078c820b0..daa8c71df1 100644 --- a/test_runner/performance/test_gist_build.py +++ b/test_runner/performance/test_gist_build.py @@ -1,5 +1,6 @@ import os from contextlib import closing +from fixtures.benchmark_fixture import MetricReport from fixtures.zenith_fixtures import ZenithEnv from fixtures.log_helper import log @@ -48,10 +49,16 @@ def test_gist_buffering_build(zenith_simple_env: ZenithEnv, zenbenchmark): pscur.execute(f"do_gc {env.initial_tenant} {timeline} 1000000") # Record peak memory usage - zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(env.pageserver) / 1024, 'MB') + zenbenchmark.record("peak_mem", + zenbenchmark.get_peak_mem(env.pageserver) / 1024, + 'MB', + report=MetricReport.LOWER_IS_BETTER) # Report disk space used by the repository timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline) - zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB') + zenbenchmark.record('size', + timeline_size / (1024 * 1024), + 'MB', + report=MetricReport.LOWER_IS_BETTER) diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py index dc50587a82..307dfb3559 100644 --- a/test_runner/performance/test_perf_pgbench.py +++ b/test_runner/performance/test_perf_pgbench.py @@ -1,6 +1,7 @@ -import os from contextlib import closing -from fixtures.zenith_fixtures import ZenithEnv +from fixtures.zenith_fixtures import PgBin, ZenithEnv + +from fixtures.benchmark_fixture import MetricReport, ZenithBenchmarker from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") @@ -15,7 +16,7 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") # 2. Time to run 5000 pgbench transactions # 3. Disk space used # -def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin, zenbenchmark): +def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin: PgBin, zenbenchmark: ZenithBenchmarker): env = zenith_simple_env # Create a branch for us env.zenith_cli(["branch", "test_pgbench_perf", "empty"]) @@ -55,4 +56,7 @@ def test_pgbench(zenith_simple_env: ZenithEnv, pg_bin, zenbenchmark): # Report disk space used by the repository timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline) - zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB') + zenbenchmark.record('size', + timeline_size / (1024 * 1024), + 'MB', + report=MetricReport.LOWER_IS_BETTER) diff --git a/test_runner/performance/test_perf_pgbench_remote.py b/test_runner/performance/test_perf_pgbench_remote.py new file mode 100644 index 0000000000..2d64a39a95 --- /dev/null +++ b/test_runner/performance/test_perf_pgbench_remote.py @@ -0,0 +1,125 @@ +import dataclasses +import os +import subprocess +from typing import List +from fixtures.benchmark_fixture import PgBenchRunResult, ZenithBenchmarker +import pytest +from datetime import datetime +import calendar +import timeit +import os + +pytest_plugins = ("fixtures.benchmark_fixture", ) + + +def utc_now_timestamp() -> int: + return calendar.timegm(datetime.utcnow().utctimetuple()) + + +@dataclasses.dataclass +class PgBenchRunner: + connstr: str + scale: int + transactions: int + pgbench_bin_path: str = "pgbench" + + def invoke(self, args: List[str]) -> 'subprocess.CompletedProcess[str]': + return subprocess.run([self.pgbench_bin_path, *args], + check=True, + text=True, + capture_output=True) + + def init(self, vacuum: bool = True) -> 'subprocess.CompletedProcess[str]': + args = [] + if not vacuum: + args.append("--no-vacuum") + args.extend([f"--scale={self.scale}", "--initialize", self.connstr]) + return self.invoke(args) + + def run(self, jobs: int = 1, clients: int = 1): + return self.invoke([ + f"--transactions={self.transactions}", + f"--jobs={jobs}", + f"--client={clients}", + "--progress=2", # print progress every two seconds + self.connstr, + ]) + + +@pytest.fixture +def connstr(): + res = os.getenv("BENCHMARK_CONNSTR") + if res is None: + raise ValueError("no connstr provided, use BENCHMARK_CONNSTR environment variable") + return res + + +def get_transactions_matrix(): + transactions = os.getenv("TEST_PG_BENCH_TRANSACTIONS_MATRIX") + if transactions is None: + return [10**4, 10**5] + return list(map(int, transactions.split(","))) + + +def get_scales_matrix(): + scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX") + if scales is None: + return [10, 20] + return list(map(int, scales.split(","))) + + +@pytest.mark.parametrize("scale", get_scales_matrix()) +@pytest.mark.parametrize("transactions", get_transactions_matrix()) +@pytest.mark.remote_cluster +def test_pg_bench_remote_cluster(zenbenchmark: ZenithBenchmarker, + connstr: str, + scale: int, + transactions: int): + """ + The best way is to run same pack of tests both, for local zenith + and against staging, but currently local tests heavily depend on + things available only locally e.g. zenith binaries, pageserver api, etc. + Also separate test allows to run pgbench workload against vanilla postgres + or other systems that support postgres protocol. + + Also now this is more of a liveness test because it stresses pageserver internals, + so we clearly see what goes wrong in more "real" environment. + """ + pg_bin = os.getenv("PG_BIN") + if pg_bin is not None: + pgbench_bin_path = os.path.join(pg_bin, "pgbench") + else: + pgbench_bin_path = "pgbench" + + runner = PgBenchRunner( + connstr=connstr, + scale=scale, + transactions=transactions, + pgbench_bin_path=pgbench_bin_path, + ) + # calculate timestamps and durations separately + # timestamp is intended to be used for linking to grafana and logs + # duration is actually a metric and uses float instead of int for timestamp + init_start_timestamp = utc_now_timestamp() + t0 = timeit.default_timer() + runner.init() + init_duration = timeit.default_timer() - t0 + init_end_timestamp = utc_now_timestamp() + + run_start_timestamp = utc_now_timestamp() + t0 = timeit.default_timer() + out = runner.run() # TODO handle failures + run_duration = timeit.default_timer() - t0 + run_end_timestamp = utc_now_timestamp() + + res = PgBenchRunResult.parse_from_output( + out=out, + init_duration=init_duration, + init_start_timestamp=init_start_timestamp, + init_end_timestamp=init_end_timestamp, + run_duration=run_duration, + run_start_timestamp=run_start_timestamp, + run_end_timestamp=run_end_timestamp, + ) + + zenbenchmark.record_pg_bench_result(res) diff --git a/test_runner/performance/test_write_amplification.py b/test_runner/performance/test_write_amplification.py index a5850e98f6..46e8ac5266 100644 --- a/test_runner/performance/test_write_amplification.py +++ b/test_runner/performance/test_write_amplification.py @@ -12,6 +12,7 @@ # Amplification problem at its finest. import os from contextlib import closing +from fixtures.benchmark_fixture import MetricReport from fixtures.zenith_fixtures import ZenithEnv from fixtures.log_helper import log @@ -76,4 +77,7 @@ def test_write_amplification(zenith_simple_env: ZenithEnv, zenbenchmark): timeline_size = zenbenchmark.get_timeline_size(env.repo_dir, env.initial_tenant, timeline) - zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB') + zenbenchmark.record('size', + timeline_size / (1024 * 1024), + 'MB', + report=MetricReport.LOWER_IS_BETTER) diff --git a/test_runner/pytest.ini b/test_runner/pytest.ini index 7ea2ae5dfb..b7d42dfe46 100644 --- a/test_runner/pytest.ini +++ b/test_runner/pytest.ini @@ -1,4 +1,8 @@ [pytest] +addopts = + -m 'not remote_cluster' +markers = + remote_cluster minversion = 6.0 log_format = %(asctime)s.%(msecs)-3d %(levelname)s [%(filename)s:%(lineno)d] %(message)s log_date_format = %Y-%m-%d %H:%M:%S