mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-02 21:10:38 +00:00
Compare commits
21 Commits
test_for_u
...
dprice-add
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1a7afd7c1a | ||
|
|
89e988d9d8 | ||
|
|
2d6a022bb8 | ||
|
|
2cdf07f12c | ||
|
|
200a520e6c | ||
|
|
4e359db4c7 | ||
|
|
be177f82dc | ||
|
|
339a3e3146 | ||
|
|
a560b28829 | ||
|
|
024109fbeb | ||
|
|
2b25f0dfa0 | ||
|
|
057cceb559 | ||
|
|
ae805b985d | ||
|
|
85e76090ea | ||
|
|
08e7d2407b | ||
|
|
ab2757f64a | ||
|
|
e5617021a7 | ||
|
|
83ba02b431 | ||
|
|
37ecebe45b | ||
|
|
6052ecee07 | ||
|
|
e11ba24ec5 |
11
.github/workflows/build_and_test.yml
vendored
11
.github/workflows/build_and_test.yml
vendored
@@ -407,9 +407,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- uses: actions/github-script@v6
|
||||
if: >
|
||||
!cancelled() &&
|
||||
github.event_name == 'pull_request'
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
||||
retries: 5
|
||||
@@ -419,7 +417,7 @@ jobs:
|
||||
reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
|
||||
}
|
||||
|
||||
const script = require("./scripts/pr-comment-test-report.js")
|
||||
const script = require("./scripts/comment-test-report.js")
|
||||
await script({
|
||||
github,
|
||||
context,
|
||||
@@ -661,6 +659,9 @@ jobs:
|
||||
project: nrdv0s4kcs
|
||||
push: true
|
||||
tags: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:depot-${{needs.tag.outputs.build-tag}}
|
||||
build-args: |
|
||||
GIT_VERSION=${{ github.sha }}
|
||||
REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
|
||||
compute-tools-image:
|
||||
runs-on: [ self-hosted, gen3, large ]
|
||||
@@ -775,7 +776,7 @@ jobs:
|
||||
run:
|
||||
shell: sh -eu {0}
|
||||
env:
|
||||
VM_BUILDER_VERSION: v0.7.3-alpha3
|
||||
VM_BUILDER_VERSION: v0.8.0
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
|
||||
614
Cargo.lock
generated
614
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
atty = "0.2.14"
|
||||
aws-config = { version = "0.55", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "0.25"
|
||||
aws-sdk-s3 = "0.27"
|
||||
aws-smithy-http = "0.55"
|
||||
aws-credential-types = "0.55"
|
||||
aws-types = "0.55"
|
||||
|
||||
@@ -632,6 +632,7 @@ RUN apt update && \
|
||||
libxml2 \
|
||||
libxslt1.1 \
|
||||
libzstd1 \
|
||||
libcurl4-openssl-dev \
|
||||
procps && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||
|
||||
@@ -362,6 +362,8 @@ impl ComputeNode {
|
||||
};
|
||||
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
||||
client.simple_query("SET neon.forward_ddl = false")?;
|
||||
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
|
||||
handle_roles(spec, &mut client)?;
|
||||
handle_databases(spec, &mut client)?;
|
||||
@@ -403,7 +405,9 @@ impl ComputeNode {
|
||||
self.pg_reload_conf(&mut client)?;
|
||||
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
||||
if spec.mode == ComputeMode::Primary {
|
||||
client.simple_query("SET neon.forward_ddl = false")?;
|
||||
handle_roles(&spec, &mut client)?;
|
||||
handle_databases(&spec, &mut client)?;
|
||||
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
|
||||
|
||||
@@ -121,9 +121,8 @@ impl RoleExt for Role {
|
||||
/// string of arguments.
|
||||
fn to_pg_options(&self) -> String {
|
||||
// XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.
|
||||
// For now, we do not use generic `options` for roles. Once used, add
|
||||
// `self.options.as_pg_options()` somewhere here.
|
||||
let mut params: String = "LOGIN".to_string();
|
||||
let mut params: String = self.options.as_pg_options();
|
||||
params.push_str(" LOGIN");
|
||||
|
||||
if let Some(pass) = &self.encrypted_password {
|
||||
// Some time ago we supported only md5 and treated all encrypted_password as md5.
|
||||
|
||||
@@ -62,7 +62,7 @@ fn do_control_plane_request(
|
||||
}
|
||||
}
|
||||
|
||||
/// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT`
|
||||
/// Request spec from the control-plane by compute_id. If `NEON_CONTROL_PLANE_TOKEN`
|
||||
/// env variable is set, it will be used for authorization.
|
||||
pub fn get_spec_from_control_plane(
|
||||
base_uri: &str,
|
||||
|
||||
@@ -16,7 +16,7 @@ mod pg_helpers_tests {
|
||||
);
|
||||
assert_eq!(
|
||||
spec.cluster.roles.first().unwrap().to_pg_options(),
|
||||
"LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
|
||||
" LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
|
||||
const DEFAULT_BRANCH_NAME: &str = "main";
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
const DEFAULT_PG_VERSION: &str = "14";
|
||||
const DEFAULT_PG_VERSION: &str = "15";
|
||||
|
||||
fn default_conf() -> String {
|
||||
format!(
|
||||
|
||||
@@ -24,7 +24,7 @@ use utils::{
|
||||
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
|
||||
pub const DEFAULT_PG_VERSION: u32 = 14;
|
||||
pub const DEFAULT_PG_VERSION: u32 = 15;
|
||||
|
||||
//
|
||||
// This data structures represents neon_local CLI config
|
||||
|
||||
@@ -370,6 +370,10 @@ impl PageServerNode {
|
||||
.remove("evictions_low_residence_duration_metric_threshold")
|
||||
.map(|x| x.to_string()),
|
||||
};
|
||||
|
||||
// If tenant ID was not specified, generate one
|
||||
let new_tenant_id = new_tenant_id.unwrap_or(TenantId::generate());
|
||||
|
||||
let request = models::TenantCreateRequest {
|
||||
new_tenant_id,
|
||||
config,
|
||||
@@ -495,6 +499,9 @@ impl PageServerNode {
|
||||
ancestor_timeline_id: Option<TimelineId>,
|
||||
pg_version: Option<u32>,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
// If timeline ID was not specified, generate one
|
||||
let new_timeline_id = new_timeline_id.unwrap_or(TimelineId::generate());
|
||||
|
||||
self.http_request(
|
||||
Method::POST,
|
||||
format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
#!/bin/bash
|
||||
set -eux
|
||||
|
||||
# Generate a random tenant or timeline ID
|
||||
#
|
||||
# Takes a variable name as argument. The result is stored in that variable.
|
||||
generate_id() {
|
||||
local -n resvar=$1
|
||||
printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
|
||||
}
|
||||
|
||||
PG_VERSION=${PG_VERSION:-14}
|
||||
|
||||
SPEC_FILE_ORG=/var/db/postgres/specs/spec.json
|
||||
@@ -13,29 +21,29 @@ done
|
||||
echo "Page server is ready."
|
||||
|
||||
echo "Create a tenant and timeline"
|
||||
generate_id tenant_id
|
||||
PARAMS=(
|
||||
-sb
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{}"
|
||||
-d "{\"new_tenant_id\": \"${tenant_id}\"}"
|
||||
http://pageserver:9898/v1/tenant/
|
||||
)
|
||||
tenant_id=$(curl "${PARAMS[@]}" | sed 's/"//g')
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
|
||||
generate_id timeline_id
|
||||
PARAMS=(
|
||||
-sb
|
||||
-X POST
|
||||
-H "Content-Type: application/json"
|
||||
-d "{\"tenant_id\":\"${tenant_id}\", \"pg_version\": ${PG_VERSION}}"
|
||||
-d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
|
||||
)
|
||||
result=$(curl "${PARAMS[@]}")
|
||||
echo $result | jq .
|
||||
|
||||
echo "Overwrite tenant id and timeline id in spec file"
|
||||
tenant_id=$(echo ${result} | jq -r .tenant_id)
|
||||
timeline_id=$(echo ${result} | jq -r .timeline_id)
|
||||
|
||||
sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
|
||||
sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}
|
||||
|
||||
|
||||
392
docs/CHANGELOG.md
Normal file
392
docs/CHANGELOG.md
Normal file
@@ -0,0 +1,392 @@
|
||||
# CHANGELOG
|
||||
|
||||
This is the Neon storage and compute changelog. It is a record of **notable changes** made to this project. The intended audience is developers, including those on the Neon team and external developers who may use or contribute to this project. The format of the changelog is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
The changes documented here may also appear in the [Neon Release Notes](https://neon.tech/docs/release-notes), but the information provided here is typically more technical in nature and aimed at developers. The _Neon Release Notes_ are more user-oriented and focused on benefits and usage of the new features or changes.
|
||||
|
||||
## 2023-05-23
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Implemented a `cargo neon` utility to facilitate setting up the Neon project locally. [Setup instructions](https://github.com/neondatabase/neon#running-neon-database) have been updated to reflect this change.
|
||||
|
||||
### Changed
|
||||
|
||||
- Compute: Updated PostgreSQL versions to 14.8 and 15.3, respectively.
|
||||
|
||||
## 2023-05-16
|
||||
|
||||
### Changed
|
||||
|
||||
- Proxy: Neon uses compute endpoint domain names to route incoming client connections. For example, to connect to the compute endpoint `ep-mute-recipe-239816`, we ask that you connect to `ep-mute-recipe-239816.us-east-2.aws.neon.tech`. However, the PostgreSQL wire protocol does not transfer the server domain name, so Neon relies on the Server Name Indication (SNI) extension of the TLS protocol to do this. Unfortunately, not all PostgreSQL clients support SNI. When these clients attempt to connect, they receive an error indicating that the "endpoint ID is not specified".
|
||||
|
||||
As a workaround, Neon provides a special connection option that allows clients to specify the compute endpoint they are connecting to. The connection option was previously named `project`. This option name is now deprecated but remains supported for backward compatibility. The new name for the connection option is `endpoint`, which is used as shown in the following example:
|
||||
|
||||
```txt
|
||||
postgres://<user>:<password>@ep-mute-recipe-239816.us-east-2.aws.neon.tech/main?options=endpoint%3Dep-mute-recipe-239816
|
||||
```
|
||||
|
||||
For more information about this special connection option for PostgreSQL clients that do not support SNI, refer to our [connection workarounds](https://neon.tech/docs/connect/connectivity-issues#workarounds) documentation.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Pageserver: Branch deletion status was not tracked in S3 storage, which could result in a deleted branch remaining accessible.
|
||||
- Pageserver: Addressed intermittent `failed to flush page requests` errors by adjusting Pageserver timeout settings.
|
||||
|
||||
## 2023-5-05
|
||||
|
||||
### Added
|
||||
|
||||
- Pageserver: Added WAL receiver context information for `Timed out while waiting for WAL record` errors. The additional information is used for diagnostic purposes.
|
||||
- Safekeeper, Pageserver: Added Safekeeper and Pageserver metrics that count the number of received queries, broker messages, removed WAL segments, and connection switch events.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Safekeeper: When establishing a connection to a Safekeeper, an `Lsn::INVALID` value was sent from the Safekeeper to the Pageserver if there were no WAL records to send. This incorrectly indicated to the Pageserver that the Safekeeper was lagging behind, causing the Pageserver to connect to a different Safekeeper. Instead of `Lsn::INVALID`, the most recent `commit_lsn` value is now sent instead.
|
||||
|
||||
## 2023-05-09
|
||||
|
||||
### Changed
|
||||
|
||||
- Compute: Dockerfile updates for the [neondatabase/neon](https://github.com/neondatabase/neon) repository are now automatically pushed to our [public Docker Hub repository](https://hub.docker.com/u/neondatabase). This enhancement means that you no longer need to manually track and incorporate Dockerfile updates to build and test Neon locally. Instead, these changes will be available and ready to use directly from our Docker Hub repository.
|
||||
- Safekeepers: Enabled parallel offload of WAL segments to remote storage. This change allows Safekeepers to upload up to five WAL segments concurrently.
|
||||
|
||||
## 2023-04-28
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Added support for the `US East (N. Virginia) — aws-us-east-1` region. For more information about Neon's region support, see [Regions](https://neon.tech/docs/introduction/regions).
|
||||
- Compute: Added support for the `ip4r` and `pg_hint_plan` extensions. For more information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
- Compute: Added support for `lz4` and `zstd` WAL compression methods.
|
||||
- Compute: Added support for `procps`, which is a set of utilities for process monitoring.
|
||||
- Pageserver: Implemented `syscalls` changes in the WAL redo `seccomp` (secure computing mode) code to ensure AArch64 support.
|
||||
|
||||
## 2023-04-11
|
||||
|
||||
### Added
|
||||
|
||||
- Pageserver: Added `disk_size` and `instance_type` properties to the Pageserver API. This data is required to support assigning Neon projects to Pageservers based on Pageserver disk usage.
|
||||
- Proxy: Added error reporting for unusually low `proxy_io_bytes_per_client metric` values.
|
||||
- Proxy: Added support for additional domain names to enable partner integrations with Neon.
|
||||
- Safekeeper: The `wal_backup_lsn` is now advanced after each WAL segment is offloaded to storage to avoid lags in WAL segment cleanup.
|
||||
- Safekeeper: Added a timeout for reading from the socket in the Safekeeper WAL service to avoid an accumulation of waiting threads.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Pageserver: Corrected an issue that caused data layer eviction to occur at a percentage above the configured disk-usage threshold.
|
||||
- Proxy: The passwordless authentication proxy ignored non-wildcard common names, passing a `None` value instead. A non-wildcard common name is now set, and an error is reported if a `None` value is passed.
|
||||
|
||||
## 2023-03-28
|
||||
|
||||
### Changed
|
||||
|
||||
- Pageserver: Logical size and partitioning values are now computed before threshold-based eviction of data layers to avoid downloading previously evicted data layer files when restarting a Pageserver.
|
||||
- Compute: Free space in the local file system that Neon uses for temporary files, unlogged tables, and the local file cache, is now monitored in order to maximize the space available for the local file cache.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Pageserver: The delete timeline endpoint in the Pageserver management API did not return the proper HTTP code.
|
||||
- Pageserver: Fixed an issue in a data storage size calculation that caused an incorrect value to be returned.
|
||||
- Pageserver: Addressed unexpected data layer downloads that occurred after a Pageserver restart. The data layers most likely required for the data storage size calculation after a Pageserver restart are now retained.
|
||||
|
||||
## 2023-03-21
|
||||
|
||||
### Added
|
||||
|
||||
- Added metrics that enable detection of data layer eviction thrashing (repetition of eviction and on-demand download of data layers).
|
||||
- Safekeeper: Added an internal metric to track bytes written or read in PostgreSQL connections to Safekeepers, which enables monitoring traffic between availability zones.
|
||||
|
||||
### Changed
|
||||
|
||||
- Pageserver: Improved the check for unexpected trailing data when importing a basebackup, which is tarball with files required to bootstrap a compute node.
|
||||
- Pageserver: Separated the management and `libpq` configuration, making it possible to enable authentication for only the management HTTP API or the Compute API.
|
||||
- Pageserver: Reduced the amount of metrics data collected for Pageservers.
|
||||
- Pageserver: JWT (JSON Web Token) generation is now permitted to fail when running Pageservers with authentication disabled, which enables running without the 'openssl' binary. This change enables switching to the EdDSA algorithm for storing JWT authentication tokens.
|
||||
- Pageserver: Switched to the EdDSA algorithm for the storage JWT authentication tokens. The Neon Control Plane only supports EdDSA.
|
||||
- Pageserver, Safekeeper: Revised `$NEON_AUTH_TOKEN` variable handling when connecting from a compute to Pageservers and Safekeepers.
|
||||
- Proxy: All compute node connection errors are now logged.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Pageserver: Fixed an issue that resulted in old data layers not being garbage collected.
|
||||
- Proxy: Fixed an issue that caused Websocket connections through the Proxy to become unresponsive.
|
||||
|
||||
### Removed
|
||||
|
||||
- Pageserver, Safekeeper: Removed unused Control Plane code.
|
||||
|
||||
## 2023-03-13
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Released a new `pg_tiktoken` PostgreSQL extension, created by the Neon engineering team. The extension is a wrapper for [OpenAI’s tokenizer](https://github.com/openai/tiktoken). It provides fast and efficient tokenization of data stored in a PostgreSQL database.
|
||||
The extension supports two functions:
|
||||
|
||||
- The `tiktoken_encode` function takes text input and returns tokenized output, making it easier to analyze and process text data.
|
||||
- The `tiktoken_count` function returns the number of tokens in a text, which is useful for checking text length limits, such as those imposed by OpenAI’s language models.
|
||||
|
||||
For more information about the `pg_tiktoken` extension, refer to the blog post: [Announcing pg_tiktoken: A Postgres Extension for Fast BPE Tokenization](https://neon.tech/blog/announcing-pg_tiktoken-a-postgres-extension-for-fast-bpe-tokenization). The `pg_tiktoken` code is available on [GitHub](https://github.com/kelvich/pg_tiktoken).
|
||||
- Compute: Added support for the PostgreSQL `prefix`, `hll` and `plpgsql_check` extensions. For more information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
- Compute, Pageserver, Safekeeper: Added support for RS384 and RS512 JWT tokens, used to securely transmit information as JSON objects.
|
||||
- Autoscaling: Added support for scaling Neon's local file cache size when scaling a virtual machine.
|
||||
|
||||
### Removed
|
||||
|
||||
- Pageserver: Removed the block cursor cache, which provided little performance benefit and would hold page references that caused deadlocks.
|
||||
|
||||
## 2023-03-07
|
||||
|
||||
### Added
|
||||
|
||||
- Pageserver: Added logic to handle unexpected Write-Ahead Log (WAL) redo process failures, which could cause a `Broken pipe` error on the Pageserver. In case of a failure, the WAL redo process is now restarted, and requests to apply redo records are retried automatically.
|
||||
- Pageserver: Added timeout logic for the copy operation that occurs when downloading a data layer. The timeout logic prevents a deadlock state if a data layer download is blocked.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Safekeeper: Addressed `Failed to open WAL file` warnings that appeared in the Safekeeper log files. The warnings were due to an outdated `truncate_lsn` value on the Safekeeper, which caused the _walproposer_ (the Postgres compute node) to download WAL records starting from a Log Sequence Number (LSN) that was older than the `backup_lsn`. This resulted in unnecessary WAL record downloads from cold storage.
|
||||
|
||||
## 2023-03-03
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Added support for the PostgreSQL `rum` and `pgTAP` extensions. For more information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
|
||||
### Fixed
|
||||
|
||||
- Pageserver: A system metric that monitors physical data size overflowed when a garbage collection operation was performed on an evicted data layer.
|
||||
- Pageserver: An index upload was skipped when a compaction operation did not perform an on-demand download from storage. With no on-demand downloads, the compaction function would exit before scheduling the index upload.
|
||||
|
||||
## 2023-02-28
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Added support for the following PostgreSQL extensions:
|
||||
- `pg_graphql`
|
||||
- `pg_jsonschema`
|
||||
- `pg_hashids`
|
||||
- `pgrouting`
|
||||
- `hypopg`
|
||||
- Server Programming Interface (SPI) extensions:
|
||||
- `autoinc`
|
||||
- `insert_username`
|
||||
- `moddatetime`
|
||||
- `refint`
|
||||
|
||||
For more information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
|
||||
### Changed
|
||||
|
||||
- Compute: Updated supported PostgreSQL versions to [14.7](https://www.postgresql.org/docs/release/14.7/) and [15.2](https://www.postgresql.org/docs/release/15.2/), respectively.
|
||||
- Pageserver: Optimized the log-structured merge-tree (LSM tree) implementation to reduce [write amplification](https://en.wikipedia.org/wiki/Write_amplification).
|
||||
|
||||
## 2023-02-21
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Added support for the PostgreSQL `xml2` and `pgjwt` extensions. For more information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
|
||||
### Changed
|
||||
|
||||
- Compute: Updated the versions for the following PostgreSQL extensions:
|
||||
- Updated the `address_standardizer`, `address_standardizer_data_us`, `postgis`, `postgis_raster`, `postgis_tiger_geocoder`, `postgis_topology` extensions to version `3.3.2`.
|
||||
- Updated the `plv8`, `plls`, `plcoffee` extensions to `3.1.5`.
|
||||
- Updated the `h3_pg` extension to `4.1.2`.
|
||||
|
||||
Updating an extension version requires running an `ALTER EXTENSION <extension_name> UPDATE TO <new_version>` statement. For example, to update the `postgis_topology` extension to the newly supported version, run this statement:
|
||||
|
||||
```sql
|
||||
ALTER EXTENSION postgis_topology UPDATE TO '3.3.2';
|
||||
```
|
||||
|
||||
- Proxy: Enabled `OpenTelemetry` tracing to capture all incoming requests. This change enables Neon to perform an end-to-end trace when a new connection is established.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Pageserver: Corrected the storage size metrics calculation to ensure that only active branches are counted.
|
||||
|
||||
## 2023-02-14
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Added support for the PostgreSQL `pgvector`, `plls` and `plcoffee` extensions. For more information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
- Pageserver: Added an experimental feature that automatically evicts layer files from Pageservers to optimize local storage space usage. When enabled for a project, Pageservers periodically check the access timestamps of the project's layer files. If the most recent layer file access is further in the past than a configurable threshold, the Pageserver removes the layer file from local storage. The authoritative copy of the layer file remains in S3. A Pageserver can download a layer file from S3 on-demand if it is needed again, to reconstruct a page version for a client request, for example.
|
||||
|
||||
### Changed
|
||||
|
||||
- Proxy: Reduced network latencies for WebSocket and pooled connections by implementing caching mechanism for compute node connection information and enabling the `TCP_NODELAY` protocol option. The `TCP_NODELAY` option causes segments to be sent as soon as possible, even if there is only a small amount of data. For more information, refer to the [TCP protocol man page](https://linux.die.net/man/7/tcp).
|
||||
|
||||
## 2023-02-07
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Added support for the PostgreSQL `postgis-sfcgal` extension. For more information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
- Compute: Added support for [International Components for Unicode (ICU)](https://icu.unicode.org/), which permits defining collation objects that use ICU as the collation provider. For example:
|
||||
|
||||
```sql
|
||||
CREATE COLLATION german (provider = icu, locale = 'de');
|
||||
```
|
||||
|
||||
## 2023-01-23
|
||||
|
||||
### Fixed
|
||||
|
||||
- Compute: Fixed a compute instance restart error. When a compute instance was restarted after a role was deleted in the console, the restart operation failed with a "role does not exist" error while attempting to reassign the objects owned by the deleted role.
|
||||
|
||||
## 2023-01-31
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Added support for the PostgreSQL `unit` extension. For more information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
- Pageserver: Implemented an asynchronous pipe for communication with the Write Ahead Log (WAL) redo process, which helps improves OLAP query performance.
|
||||
|
||||
### Changed
|
||||
|
||||
- Pageserver: Reimplemented the layer map used to track the data layers in a branch. The layer map now uses an immutable binary search tree (BST) data structure, which improves data layer lookup performance over the previous R-tree implementation. The data required to reconstruct page versions is stored as data layers in Neon Pageservers.
|
||||
- Pageserver: Changed the garbage collection (`gc`) interval from 100 seconds to 60 minutes. This change reduces the frequency of layer map locks.
|
||||
|
||||
### Removed
|
||||
|
||||
- Compute: Removed logic that updated roles each time a Neon compute instance was restarted. Roles were updated on each restart to address a password-related backward compatibility issue that is no longer relevant.
|
||||
|
||||
## 2023-01-17
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Added support for several PostgreSQL extensions. Newly supported extensions include:
|
||||
- `bloom`
|
||||
- `pgrowlocks`
|
||||
- `intagg`
|
||||
- `pgstattuple`
|
||||
- `earthdistance`
|
||||
- `address_standardizer`
|
||||
- `address_standardizer_data_us`
|
||||
|
||||
For more information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
- Compute: Added statistics to `EXPLAIN` that show prefetch hits and misses for sequential scans.
|
||||
|
||||
### Changed
|
||||
|
||||
- Compute: Updated the list of PostgreSQL client libraries and runtimes that Neon tests for connection support. The `pg8000` Python PostgreSQL driver, version 1.29.3 and higher, now supports connecting to Neon.
|
||||
- Proxy: Updated the error message reported when attempting to connect from a client or driver that does not support Server Name Indication (SNI). For more information about the SNI requirement, see [Connect from old clients](https://neon.tech/docs/connect/connectivity-issues). Previously, the error message indicated that the "Project ID" is not specified. The error message now states that the "Endpoint ID" is not specified. Connecting to Neon with a Project ID remains supported for backward compatibility, but connecting with an Endpoint ID is now the recommended connection method. For general information about connecting to Neon, see [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app).
|
||||
|
||||
## 2022-12-08
|
||||
|
||||
### Added
|
||||
|
||||
- Pageserver: Added support for on-demand download of layer files from cold storage. Layer files contain the data required reconstruct any version of a data page. On-demand download enables Neon to quickly distribute data across Pageservers and recover from local Pageserver failures. This feature augments Neon's storage capability by allowing data to be transferred efficiently from cold storage to Pageservers whenever the data is needed.
|
||||
|
||||
## 2022-11-16
|
||||
|
||||
### Added
|
||||
|
||||
- Pageserver: Added a tenant sizing model and an endpoint for retrieving the tenant size.
|
||||
|
||||
### Changed
|
||||
|
||||
- Pageserver: Moved the Write-Ahead Log (WAL) redo process code from Neon's `postgres` repository to the `neon` repository and created a separate `wal_redo` binary in order to reduce the amount of change in the `postgres` repository codebase.
|
||||
- Compute: Updated prefetching support to store requests and responses in a ring buffer instead of a queue, which enables using prefetches from many relations concurrently.
|
||||
|
||||
### Removed
|
||||
|
||||
- Pageserver, Safekeeper, Compute, and Proxy: Reduced the size of Neon storage binaries by 50% by removing dependency debug symbols from the release build.
|
||||
- Pageserver and Safekeeper: Removed support for the `--daemonize` option from the CLI process that starts the Pageserver and Safekeeper storage components. The required library is no longer being maintained and the option was only used in our test environment.
|
||||
|
||||
## 2022-10-25
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Added support for PostgreSQL 15.0 and its PostgreSQL extensions.
|
||||
For information about supported extensions, see [Supported PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
- Pageserver: Added a timeline `state` field to the `TimelineInfo` struct that is returned by the `timelines` internal management API endpoint. Timeline `state` information improves observability and communication between Pageserver modules.
|
||||
|
||||
### Changed
|
||||
|
||||
- Compute: Disabled the `wal_log_hints` parameter, which is the default PostgreSQL setting. The Pageserver-related issue that required enabling `wal_log_hints` has been addressed, and enabling `wal_log_hints` is no longer necessary.
|
||||
|
||||
## 2022-10-21
|
||||
|
||||
### Fixed
|
||||
|
||||
- Compute: Fixed an issue that prevented creating a database when the specified database name included trailing spaces.
|
||||
- Pageserver: Fixed an `INSERT ... ON CONFLICT` handling issue for speculative Write-Ahead Log (WAL) record inserts. Under certain load conditions, records added with `INSERT ... ON CONFLICT` could be replayed incorrectly.
|
||||
- Pageserver: Fixed a Free Space Map (FSM) and Visibility Map (VM) page reconstruction issue that caused compute nodes to start slowly under certain workloads.
|
||||
- Pageserver: Fixed a garbage collection (GC) issue that could lead to a database failure under high load.
|
||||
- Pageserver: Improved query performance for larger databases by improving R-tree layer map search. The envelope for each layer is now remembered so that it does not have to be reconstructed for each call.
|
||||
|
||||
## 2022-10-07
|
||||
|
||||
### Added
|
||||
|
||||
- Pageserver: Added initial support for online tenant relocation.
|
||||
- Pageserver: Added support for multiple PostgreSQL versions.
|
||||
- Compute: Added support for the `h3_pg` and `plv8` PostgreSQL extensions. For information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
- Compute: Added support for a future implementation of sequential scan prefetch, which improves I/O performance for operations such as table scans.
|
||||
|
||||
### Changed
|
||||
|
||||
- Pageserver: Increased the default `compaction_period` setting to 20 seconds to reduce the frequency of polling that is performed to determine if compaction is required. The frequency of polling with the previous setting of 1 could result in excessive CPU consumption when there are numerous tenants and projects.
|
||||
- Compute: Moved the backpressure throttling algorithm to the Neon extension to minimize changes to the Neon PostgreSQL core code, and added a `backpressure_throttling_time` function that returns the total time spent throttling since the system was started.
|
||||
- Proxy: Improved error messages and logging.
|
||||
|
||||
## 2022-09-01
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Added support for the `PostGIS` extension, version 3.3.0. For information about PostgreSQL extensions supported by Neon, see [PostgreSQL extensions](https://neon.tech/docs/extensions/pg-extensions).
|
||||
- Proxy: Added support for forwarding the `options`, `application_name`, and `replication` connection parameters to compute nodes.
|
||||
|
||||
### Changed
|
||||
|
||||
- Compute: Updated the PostgreSQL version to 14.5.
|
||||
|
||||
## 2022-08-02
|
||||
|
||||
### Added
|
||||
|
||||
- Compute: Installed the `uuid-ossp` extension binaries, which provide functions for generating universally unique identifiers (UUIDs). `CREATE EXTENSION "uuid-ossp"` is now supported. For information about extensions supported by Neon, see [Available PostgreSQL extensions]https://neon.tech/docs/extensions/pg-extensions).
|
||||
- Compute: Added logging for compute node initialization failure during the 'basebackup' stage.
|
||||
- Pageserver: Added reporting of the physical size with the tenant status, in the internal management API.
|
||||
|
||||
### Changed
|
||||
|
||||
- Pageserver: Merged the 'wal_receiver' endpoint with 'timeline_detail', in the internal management API.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Pageserver: Avoided busy looping when deletion from cloud storage is skipped due to failed upload tasks.
|
||||
|
||||
## 2022-07-19
|
||||
|
||||
### Added
|
||||
|
||||
- Safekeeper: Added support for backing up Write-Ahead Logs (WAL) to S3 storage for disaster recovery.
|
||||
- Safekeeper: Added support for downloading WAL from S3 storage on demand.
|
||||
- Proxy: Added support for propagating SASL/SCRAM PostgreSQL authentication errors to clients.
|
||||
- Pageserver: Implemented a page service `fullbackup` endpoint that works like basebackup but also sends relational files.
|
||||
- Pageserver: Added support for importing a base backup taken from a standalone PostgreSQL instance or another Pageserver using `psql` copy.
|
||||
- Compute: Enabled the use of the `CREATE EXTENSION` statement for users that are not database owners.
|
||||
|
||||
### Changed
|
||||
|
||||
- Safekeeper: Switched to [etcd](https://etcd.io/) subscriptions to keep Pageservers up to date with the Safekeeper status.
|
||||
- Safekeeper: Implemented JSON Web Token (JWT) authentication in the Safekeeper HTTP API.
|
||||
- Compute: Updated the PostgreSQL version to 14.4.
|
||||
- Compute: Renamed the following custom configuration parameters:
|
||||
- `zenith.page_server_connstring` to `neon.pageserver_connstring`
|
||||
- `zenith.zenith_tenant` to `neon.tenant_id`
|
||||
- `zenith.zenith_timeline` to `neon.timeline_id`
|
||||
- `zenith.max_cluster_size` to `neon.max_cluster_size`
|
||||
- `wal_acceptors` to `safekeepers`
|
||||
- Control Plane: Renamed `zenith_admin` role to `cloud_admin`.
|
||||
- Pageserver: Updated the timeline size reported when `DROP DATABASE` is executed.
|
||||
- Pageserver: Switched to per-tenant attach/detach. Download operations of all timelines for one tenant are now grouped together so that branches can be used safely with attach/detach.
|
||||
- Pageserver: Decreased the number of threads by running gc and compaction in a blocking tokio thread pool.
|
||||
- Compute: Enabled the use of the `CREATE EXTENSION` statement for users that are not database owners.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Pageserver: Fixed the database size calculation to count Visibility Maps (VMs) and Free Space Maps (FSMs) in addition to the main fork of the relation.
|
||||
- Safekeeper: Fixed the walreceiver connection selection mechanism:
|
||||
- Reconnecting to a Safekeeper immediately after it fails is now avoided by limiting candidates to those with the fewest connection attempts.
|
||||
- Increased the `max_lsn_wal_lag` default setting to avoid constant reconnections during normal work.
|
||||
- Fixed `wal_connection_attempts` maintenance, preventing busy reconnection loops.
|
||||
@@ -118,9 +118,8 @@ pub enum TimelineState {
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub new_timeline_id: Option<TimelineId>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub new_timeline_id: TimelineId,
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub ancestor_timeline_id: Option<TimelineId>,
|
||||
@@ -131,12 +130,11 @@ pub struct TimelineCreateRequest {
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, Debug, Default)]
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct TenantCreateRequest {
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub new_tenant_id: Option<TenantId>,
|
||||
#[serde_as(as = "DisplayFromStr")]
|
||||
pub new_tenant_id: TenantId,
|
||||
#[serde(flatten)]
|
||||
pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
|
||||
}
|
||||
@@ -184,10 +182,10 @@ pub struct StatusResponse {
|
||||
}
|
||||
|
||||
impl TenantCreateRequest {
|
||||
pub fn new(new_tenant_id: Option<TenantId>) -> TenantCreateRequest {
|
||||
pub fn new(new_tenant_id: TenantId) -> TenantCreateRequest {
|
||||
TenantCreateRequest {
|
||||
new_tenant_id,
|
||||
..Default::default()
|
||||
config: TenantConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::auth::{Claims, JwtAuth};
|
||||
use crate::http::error;
|
||||
use crate::http::error::{api_error_handler, route_error_handler, ApiError};
|
||||
use anyhow::{anyhow, Context};
|
||||
use hyper::header::{HeaderName, AUTHORIZATION};
|
||||
use hyper::http::HeaderValue;
|
||||
@@ -16,8 +16,6 @@ use std::future::Future;
|
||||
use std::net::TcpListener;
|
||||
use std::str::FromStr;
|
||||
|
||||
use super::error::ApiError;
|
||||
|
||||
static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"libmetrics_metric_handler_requests_total",
|
||||
@@ -35,8 +33,12 @@ struct RequestId(String);
|
||||
/// Adds a tracing info_span! instrumentation around the handler events,
|
||||
/// logs the request start and end events for non-GET requests and non-200 responses.
|
||||
///
|
||||
/// Usage: Replace `my_handler` with `|r| request_span(r, my_handler)`
|
||||
///
|
||||
/// Use this to distinguish between logs of different HTTP requests: every request handler wrapped
|
||||
/// in this type will get request info logged in the wrapping span, including the unique request ID.
|
||||
/// with this will get request info logged in the wrapping span, including the unique request ID.
|
||||
///
|
||||
/// This also handles errors, logging them and converting them to an HTTP error response.
|
||||
///
|
||||
/// There could be other ways to implement similar functionality:
|
||||
///
|
||||
@@ -54,60 +56,56 @@ struct RequestId(String);
|
||||
/// tries to achive with its `.instrument` used in the current approach.
|
||||
///
|
||||
/// If needed, a declarative macro to substitute the |r| ... closure boilerplate could be introduced.
|
||||
pub struct RequestSpan<E, R, H>(pub H)
|
||||
pub async fn request_span<R, H>(request: Request<Body>, handler: H) -> R::Output
|
||||
where
|
||||
E: Into<Box<dyn std::error::Error + Send + Sync>> + 'static,
|
||||
R: Future<Output = Result<Response<Body>, E>> + Send + 'static,
|
||||
H: Fn(Request<Body>) -> R + Send + Sync + 'static;
|
||||
|
||||
impl<E, R, H> RequestSpan<E, R, H>
|
||||
where
|
||||
E: Into<Box<dyn std::error::Error + Send + Sync>> + 'static,
|
||||
R: Future<Output = Result<Response<Body>, E>> + Send + 'static,
|
||||
H: Fn(Request<Body>) -> R + Send + Sync + 'static,
|
||||
R: Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,
|
||||
H: FnOnce(Request<Body>) -> R + Send + Sync + 'static,
|
||||
{
|
||||
/// Creates a tracing span around inner request handler and executes the request handler in the contex of that span.
|
||||
/// Use as `|r| RequestSpan(my_handler).handle(r)` instead of `my_handler` as the request handler to get the span enabled.
|
||||
pub async fn handle(self, request: Request<Body>) -> Result<Response<Body>, E> {
|
||||
let request_id = request.context::<RequestId>().unwrap_or_default().0;
|
||||
let method = request.method();
|
||||
let path = request.uri().path();
|
||||
let request_span = info_span!("request", %method, %path, %request_id);
|
||||
let request_id = request.context::<RequestId>().unwrap_or_default().0;
|
||||
let method = request.method();
|
||||
let path = request.uri().path();
|
||||
let request_span = info_span!("request", %method, %path, %request_id);
|
||||
|
||||
let log_quietly = method == Method::GET;
|
||||
async move {
|
||||
let cancellation_guard = RequestCancelled::warn_when_dropped_without_responding();
|
||||
if log_quietly {
|
||||
debug!("Handling request");
|
||||
} else {
|
||||
info!("Handling request");
|
||||
}
|
||||
|
||||
// Note that we reuse `error::handler` here and not returning and error at all,
|
||||
// yet cannot use `!` directly in the method signature due to `routerify::RouterBuilder` limitation.
|
||||
// Usage of the error handler also means that we expect only the `ApiError` errors to be raised in this call.
|
||||
//
|
||||
// Panics are not handled separately, there's a `tracing_panic_hook` from another module to do that globally.
|
||||
let res = (self.0)(request).await;
|
||||
|
||||
cancellation_guard.disarm();
|
||||
|
||||
match res {
|
||||
Ok(response) => {
|
||||
let response_status = response.status();
|
||||
if log_quietly && response_status.is_success() {
|
||||
debug!("Request handled, status: {response_status}");
|
||||
} else {
|
||||
info!("Request handled, status: {response_status}");
|
||||
}
|
||||
Ok(response)
|
||||
}
|
||||
Err(e) => Ok(error::handler(e.into()).await),
|
||||
}
|
||||
let log_quietly = method == Method::GET;
|
||||
async move {
|
||||
let cancellation_guard = RequestCancelled::warn_when_dropped_without_responding();
|
||||
if log_quietly {
|
||||
debug!("Handling request");
|
||||
} else {
|
||||
info!("Handling request");
|
||||
}
|
||||
|
||||
// No special handling for panics here. There's a `tracing_panic_hook` from another
|
||||
// module to do that globally.
|
||||
let res = handler(request).await;
|
||||
|
||||
cancellation_guard.disarm();
|
||||
|
||||
// Log the result if needed.
|
||||
//
|
||||
// We also convert any errors into an Ok response with HTTP error code here.
|
||||
// `make_router` sets a last-resort error handler that would do the same, but
|
||||
// we prefer to do it here, before we exit the request span, so that the error
|
||||
// is still logged with the span.
|
||||
//
|
||||
// (Because we convert errors to Ok response, we never actually return an error,
|
||||
// and we could declare the function to return the never type (`!`). However,
|
||||
// using `routerify::RouterBuilder` requires a proper error type.)
|
||||
match res {
|
||||
Ok(response) => {
|
||||
let response_status = response.status();
|
||||
if log_quietly && response_status.is_success() {
|
||||
debug!("Request handled, status: {response_status}");
|
||||
} else {
|
||||
info!("Request handled, status: {response_status}");
|
||||
}
|
||||
Ok(response)
|
||||
}
|
||||
Err(err) => Ok(api_error_handler(err)),
|
||||
}
|
||||
.instrument(request_span)
|
||||
.await
|
||||
}
|
||||
.instrument(request_span)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Drop guard to WARN in case the request was dropped before completion.
|
||||
@@ -207,10 +205,8 @@ pub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
|
||||
.middleware(Middleware::post_with_info(
|
||||
add_request_id_header_to_response,
|
||||
))
|
||||
.get("/metrics", |r| {
|
||||
RequestSpan(prometheus_metrics_handler).handle(r)
|
||||
})
|
||||
.err_handler(error::handler)
|
||||
.get("/metrics", |r| request_span(r, prometheus_metrics_handler))
|
||||
.err_handler(route_error_handler)
|
||||
}
|
||||
|
||||
pub fn attach_openapi_ui(
|
||||
@@ -220,12 +216,14 @@ pub fn attach_openapi_ui(
|
||||
ui_mount_path: &'static str,
|
||||
) -> RouterBuilder<hyper::Body, ApiError> {
|
||||
router_builder
|
||||
.get(spec_mount_path, move |r| {
|
||||
RequestSpan(move |_| async move { Ok(Response::builder().body(Body::from(spec)).unwrap()) })
|
||||
.handle(r)
|
||||
})
|
||||
.get(ui_mount_path, move |r| RequestSpan( move |_| async move {
|
||||
Ok(Response::builder().body(Body::from(format!(r#"
|
||||
.get(spec_mount_path,
|
||||
move |r| request_span(r, move |_| async move {
|
||||
Ok(Response::builder().body(Body::from(spec)).unwrap())
|
||||
})
|
||||
)
|
||||
.get(ui_mount_path,
|
||||
move |r| request_span(r, move |_| async move {
|
||||
Ok(Response::builder().body(Body::from(format!(r#"
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
@@ -255,7 +253,8 @@ pub fn attach_openapi_ui(
|
||||
</body>
|
||||
</html>
|
||||
"#, spec_mount_path))).unwrap())
|
||||
}).handle(r))
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_token(header_value: &str) -> Result<&str, ApiError> {
|
||||
|
||||
@@ -83,13 +83,24 @@ impl HttpErrorBody {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn handler(err: routerify::RouteError) -> Response<Body> {
|
||||
let api_error = err
|
||||
.downcast::<ApiError>()
|
||||
.expect("handler should always return api error");
|
||||
pub async fn route_error_handler(err: routerify::RouteError) -> Response<Body> {
|
||||
match err.downcast::<ApiError>() {
|
||||
Ok(api_error) => api_error_handler(*api_error),
|
||||
Err(other_error) => {
|
||||
// We expect all the request handlers to return an ApiError, so this should
|
||||
// not be reached. But just in case.
|
||||
error!("Error processing HTTP request: {other_error:?}");
|
||||
HttpErrorBody::response_from_msg_and_status(
|
||||
other_error.to_string(),
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn api_error_handler(api_error: ApiError) -> Response<Body> {
|
||||
// Print a stack trace for Internal Server errors
|
||||
if let ApiError::InternalServerError(_) = api_error.as_ref() {
|
||||
if let ApiError::InternalServerError(_) = api_error {
|
||||
error!("Error processing HTTP request: {api_error:?}");
|
||||
} else {
|
||||
error!("Error processing HTTP request: {api_error:#}");
|
||||
|
||||
@@ -9,6 +9,7 @@ use clap::{Arg, ArgAction, Command};
|
||||
use fail::FailScenario;
|
||||
use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
|
||||
use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
|
||||
use pageserver::task_mgr::WALRECEIVER_RUNTIME;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use tracing::*;
|
||||
|
||||
@@ -18,9 +19,7 @@ use pageserver::{
|
||||
context::{DownloadBehavior, RequestContext},
|
||||
http, page_cache, page_service, task_mgr,
|
||||
task_mgr::TaskKind,
|
||||
task_mgr::{
|
||||
BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
|
||||
},
|
||||
task_mgr::{BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME},
|
||||
tenant::mgr,
|
||||
virtual_file,
|
||||
};
|
||||
@@ -276,7 +275,18 @@ fn start_pageserver(
|
||||
let pageserver_listener = tcp_listener::bind(pg_addr)?;
|
||||
|
||||
// Launch broker client
|
||||
WALRECEIVER_RUNTIME.block_on(pageserver::broker_client::init_broker_client(conf))?;
|
||||
// The storage_broker::connect call needs to happen inside a tokio runtime thread.
|
||||
let broker_client = WALRECEIVER_RUNTIME
|
||||
.block_on(async {
|
||||
// Note: we do not attempt connecting here (but validate endpoints sanity).
|
||||
storage_broker::connect(conf.broker_endpoint.clone(), conf.broker_keepalive_interval)
|
||||
})
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"create broker client for uri={:?} keepalive_interval={:?}",
|
||||
&conf.broker_endpoint, conf.broker_keepalive_interval,
|
||||
)
|
||||
})?;
|
||||
|
||||
// Initialize authentication for incoming connections
|
||||
let http_auth;
|
||||
@@ -326,7 +336,11 @@ fn start_pageserver(
|
||||
let remote_storage = create_remote_storage_client(conf)?;
|
||||
|
||||
// Scan the local 'tenants/' directory and start loading the tenants
|
||||
BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(conf, remote_storage.clone()))?;
|
||||
BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(
|
||||
conf,
|
||||
broker_client.clone(),
|
||||
remote_storage.clone(),
|
||||
))?;
|
||||
|
||||
// shared state between the disk-usage backed eviction background task and the http endpoint
|
||||
// that allows triggering disk-usage based eviction manually. note that the http endpoint
|
||||
@@ -351,6 +365,7 @@ fn start_pageserver(
|
||||
conf,
|
||||
launch_ts,
|
||||
http_auth,
|
||||
broker_client.clone(),
|
||||
remote_storage,
|
||||
disk_usage_eviction_state,
|
||||
)?
|
||||
@@ -427,6 +442,7 @@ fn start_pageserver(
|
||||
async move {
|
||||
page_service::libpq_listener_main(
|
||||
conf,
|
||||
broker_client,
|
||||
pg_auth,
|
||||
pageserver_listener,
|
||||
conf.pg_auth_type,
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
//! The broker client instance of the pageserver, created during pageserver startup.
|
||||
//! Used by each timelines' [`walreceiver`].
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
|
||||
use anyhow::Context;
|
||||
use once_cell::sync::OnceCell;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use tracing::*;
|
||||
|
||||
static BROKER_CLIENT: OnceCell<BrokerClientChannel> = OnceCell::new();
|
||||
|
||||
///
|
||||
/// Initialize the broker client. This must be called once at page server startup.
|
||||
///
|
||||
pub async fn init_broker_client(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
let broker_endpoint = conf.broker_endpoint.clone();
|
||||
|
||||
// Note: we do not attempt connecting here (but validate endpoints sanity).
|
||||
let broker_client =
|
||||
storage_broker::connect(broker_endpoint.clone(), conf.broker_keepalive_interval).context(
|
||||
format!(
|
||||
"Failed to create broker client to {}",
|
||||
&conf.broker_endpoint
|
||||
),
|
||||
)?;
|
||||
|
||||
if BROKER_CLIENT.set(broker_client).is_err() {
|
||||
panic!("broker already initialized");
|
||||
}
|
||||
|
||||
info!(
|
||||
"Initialized broker client with endpoints: {}",
|
||||
broker_endpoint
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Get a handle to the broker client
|
||||
///
|
||||
pub fn get_broker_client() -> &'static BrokerClientChannel {
|
||||
BROKER_CLIENT.get().expect("broker client not initialized")
|
||||
}
|
||||
|
||||
pub fn is_broker_client_initialized() -> bool {
|
||||
BROKER_CLIENT.get().is_some()
|
||||
}
|
||||
@@ -88,6 +88,7 @@
|
||||
use crate::task_mgr::TaskKind;
|
||||
|
||||
// The main structure of this module, see module-level comment.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RequestContext {
|
||||
task_kind: TaskKind,
|
||||
download_behavior: DownloadBehavior,
|
||||
@@ -95,7 +96,7 @@ pub struct RequestContext {
|
||||
|
||||
/// Desired behavior if the operation requires an on-demand download
|
||||
/// to proceed.
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
|
||||
pub enum DownloadBehavior {
|
||||
/// Download the layer file. It can take a while.
|
||||
Download,
|
||||
|
||||
@@ -678,6 +678,8 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
required:
|
||||
- new_timeline_id
|
||||
properties:
|
||||
new_timeline_id:
|
||||
type: string
|
||||
@@ -936,6 +938,8 @@ components:
|
||||
allOf:
|
||||
- $ref: '#/components/schemas/TenantConfig'
|
||||
- type: object
|
||||
required:
|
||||
- new_tenant_id
|
||||
properties:
|
||||
new_tenant_id:
|
||||
type: string
|
||||
|
||||
@@ -7,10 +7,11 @@ use hyper::{Body, Request, Response, Uri};
|
||||
use metrics::launch_timestamp::LaunchTimestamp;
|
||||
use pageserver_api::models::{DownloadRemoteLayersTaskSpawnRequest, TenantAttachRequest};
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use tenant_size_model::{SizeResult, StorageModel};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::http::endpoint::RequestSpan;
|
||||
use utils::http::endpoint::request_span;
|
||||
use utils::http::json::json_request_or_empty_body;
|
||||
use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
|
||||
|
||||
@@ -24,7 +25,9 @@ use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::config::TenantConfOpt;
|
||||
use crate::tenant::mgr::{TenantMapInsertError, TenantStateError};
|
||||
use crate::tenant::mgr::{
|
||||
GetTenantError, SetNewTenantConfigError, TenantMapInsertError, TenantStateError,
|
||||
};
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||
use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
|
||||
@@ -51,6 +54,7 @@ struct State {
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
allowlist_routes: Vec<Uri>,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
}
|
||||
|
||||
@@ -59,6 +63,7 @@ impl State {
|
||||
conf: &'static PageServerConf,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
) -> anyhow::Result<Self> {
|
||||
let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml"]
|
||||
@@ -70,6 +75,7 @@ impl State {
|
||||
auth,
|
||||
allowlist_routes,
|
||||
remote_storage,
|
||||
broker_client,
|
||||
disk_usage_eviction_state,
|
||||
})
|
||||
}
|
||||
@@ -140,6 +146,36 @@ impl From<TenantStateError> for ApiError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetTenantError> for ApiError {
|
||||
fn from(tse: GetTenantError) -> ApiError {
|
||||
match tse {
|
||||
GetTenantError::NotFound(tid) => ApiError::NotFound(anyhow!("tenant {}", tid)),
|
||||
e @ GetTenantError::NotActive(_) => {
|
||||
// Why is this not `ApiError::NotFound`?
|
||||
// Because we must be careful to never return 404 for a tenant if it does
|
||||
// in fact exist locally. If we did, the caller could draw the conclusion
|
||||
// that it can attach the tenant to another PS and we'd be in split-brain.
|
||||
//
|
||||
// (We can produce this variant only in `mgr::get_tenant(..., active=true)` calls).
|
||||
ApiError::InternalServerError(anyhow::Error::new(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SetNewTenantConfigError> for ApiError {
|
||||
fn from(e: SetNewTenantConfigError) -> ApiError {
|
||||
match e {
|
||||
SetNewTenantConfigError::GetTenant(tid) => {
|
||||
ApiError::NotFound(anyhow!("tenant {}", tid))
|
||||
}
|
||||
e @ SetNewTenantConfigError::Persist(_) => {
|
||||
ApiError::InternalServerError(anyhow::Error::new(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<crate::tenant::DeleteTimelineError> for ApiError {
|
||||
fn from(value: crate::tenant::DeleteTimelineError) -> Self {
|
||||
use crate::tenant::DeleteTimelineError::*;
|
||||
@@ -159,7 +195,7 @@ impl From<crate::tenant::mgr::DeleteTimelineError> for ApiError {
|
||||
match value {
|
||||
// Report Precondition failed so client can distinguish between
|
||||
// "tenant is missing" case from "timeline is missing"
|
||||
Tenant(TenantStateError::NotFound(..)) => {
|
||||
Tenant(GetTenantError::NotFound(..)) => {
|
||||
ApiError::PreconditionFailed("Requested tenant is missing")
|
||||
}
|
||||
Tenant(t) => ApiError::from(t),
|
||||
@@ -265,12 +301,12 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
let request_data: TimelineCreateRequest = json_request(&mut request).await?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let new_timeline_id = request_data
|
||||
.new_timeline_id
|
||||
.unwrap_or_else(TimelineId::generate);
|
||||
let new_timeline_id = request_data.new_timeline_id;
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
async {
|
||||
let tenant = mgr::get_tenant(tenant_id, true).await?;
|
||||
match tenant.create_timeline(
|
||||
@@ -278,6 +314,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
request_data.ancestor_timeline_id.map(TimelineId::from),
|
||||
request_data.ancestor_start_lsn,
|
||||
request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION),
|
||||
state.broker_client.clone(),
|
||||
&ctx,
|
||||
)
|
||||
.await {
|
||||
@@ -291,7 +328,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
Err(err) => Err(ApiError::InternalServerError(err)),
|
||||
}
|
||||
}
|
||||
.instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
|
||||
.instrument(info_span!("timeline_create", tenant = %tenant_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -408,6 +445,7 @@ async fn tenant_attach_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
state.conf,
|
||||
tenant_id,
|
||||
tenant_conf,
|
||||
state.broker_client.clone(),
|
||||
remote_storage.clone(),
|
||||
&ctx,
|
||||
)
|
||||
@@ -457,9 +495,15 @@ async fn tenant_load_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
|
||||
|
||||
let state = get_state(&request);
|
||||
mgr::load_tenant(state.conf, tenant_id, state.remote_storage.clone(), &ctx)
|
||||
.instrument(info_span!("load", tenant = %tenant_id))
|
||||
.await?;
|
||||
mgr::load_tenant(
|
||||
state.conf,
|
||||
tenant_id,
|
||||
state.broker_client.clone(),
|
||||
state.remote_storage.clone(),
|
||||
&ctx,
|
||||
)
|
||||
.instrument(info_span!("load", tenant = %tenant_id))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::ACCEPTED, ())
|
||||
}
|
||||
@@ -511,7 +555,7 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
}
|
||||
|
||||
let state = tenant.current_state();
|
||||
Ok(TenantInfo {
|
||||
Result::<_, ApiError>::Ok(TenantInfo {
|
||||
id: tenant_id,
|
||||
state: state.clone(),
|
||||
current_physical_size: Some(current_physical_size),
|
||||
@@ -519,8 +563,7 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
})
|
||||
}
|
||||
.instrument(info_span!("tenant_status_handler", tenant = %tenant_id))
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, tenant_info)
|
||||
}
|
||||
@@ -719,6 +762,8 @@ pub fn html_response(status: StatusCode, data: String) -> Result<Response<Body>,
|
||||
}
|
||||
|
||||
async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
let target_tenant_id = request_data.new_tenant_id;
|
||||
check_permission(&request, None)?;
|
||||
|
||||
let _timer = STORAGE_TIME_GLOBAL
|
||||
@@ -726,17 +771,10 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
.expect("bug")
|
||||
.start_timer();
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
|
||||
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
|
||||
let tenant_conf =
|
||||
TenantConfOpt::try_from(&request_data.config).map_err(ApiError::BadRequest)?;
|
||||
|
||||
let target_tenant_id = request_data
|
||||
.new_tenant_id
|
||||
.map(TenantId::from)
|
||||
.unwrap_or_else(TenantId::generate);
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
@@ -744,6 +782,7 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
state.conf,
|
||||
tenant_conf,
|
||||
target_tenant_id,
|
||||
state.broker_client.clone(),
|
||||
state.remote_storage.clone(),
|
||||
&ctx,
|
||||
)
|
||||
@@ -1099,6 +1138,7 @@ pub fn make_router(
|
||||
conf: &'static PageServerConf,
|
||||
launch_ts: &'static LaunchTimestamp,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
broker_client: BrokerClientChannel,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
|
||||
@@ -1139,63 +1179,65 @@ pub fn make_router(
|
||||
#[cfg(not(feature = "testing"))]
|
||||
let handler = cfg_disabled;
|
||||
|
||||
move |r| RequestSpan(handler).handle(r)
|
||||
move |r| request_span(r, handler)
|
||||
}};
|
||||
}
|
||||
|
||||
Ok(router
|
||||
.data(Arc::new(
|
||||
State::new(conf, auth, remote_storage, disk_usage_eviction_state)
|
||||
.context("Failed to initialize router state")?,
|
||||
State::new(
|
||||
conf,
|
||||
auth,
|
||||
remote_storage,
|
||||
broker_client,
|
||||
disk_usage_eviction_state,
|
||||
)
|
||||
.context("Failed to initialize router state")?,
|
||||
))
|
||||
.get("/v1/status", |r| RequestSpan(status_handler).handle(r))
|
||||
.get("/v1/status", |r| request_span(r, status_handler))
|
||||
.put(
|
||||
"/v1/failpoints",
|
||||
testing_api!("manage failpoints", failpoints_handler),
|
||||
)
|
||||
.get("/v1/tenant", |r| RequestSpan(tenant_list_handler).handle(r))
|
||||
.post("/v1/tenant", |r| {
|
||||
RequestSpan(tenant_create_handler).handle(r)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id", |r| {
|
||||
RequestSpan(tenant_status).handle(r)
|
||||
})
|
||||
.get("/v1/tenant", |r| request_span(r, tenant_list_handler))
|
||||
.post("/v1/tenant", |r| request_span(r, tenant_create_handler))
|
||||
.get("/v1/tenant/:tenant_id", |r| request_span(r, tenant_status))
|
||||
.get("/v1/tenant/:tenant_id/synthetic_size", |r| {
|
||||
RequestSpan(tenant_size_handler).handle(r)
|
||||
request_span(r, tenant_size_handler)
|
||||
})
|
||||
.put("/v1/tenant/config", |r| {
|
||||
RequestSpan(update_tenant_config_handler).handle(r)
|
||||
request_span(r, update_tenant_config_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/config", |r| {
|
||||
RequestSpan(get_tenant_config_handler).handle(r)
|
||||
request_span(r, get_tenant_config_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline", |r| {
|
||||
RequestSpan(timeline_list_handler).handle(r)
|
||||
request_span(r, timeline_list_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/timeline", |r| {
|
||||
RequestSpan(timeline_create_handler).handle(r)
|
||||
request_span(r, timeline_create_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/attach", |r| {
|
||||
RequestSpan(tenant_attach_handler).handle(r)
|
||||
request_span(r, tenant_attach_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/detach", |r| {
|
||||
RequestSpan(tenant_detach_handler).handle(r)
|
||||
request_span(r, tenant_detach_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/load", |r| {
|
||||
RequestSpan(tenant_load_handler).handle(r)
|
||||
request_span(r, tenant_load_handler)
|
||||
})
|
||||
.post("/v1/tenant/:tenant_id/ignore", |r| {
|
||||
RequestSpan(tenant_ignore_handler).handle(r)
|
||||
request_span(r, tenant_ignore_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
|
||||
RequestSpan(timeline_detail_handler).handle(r)
|
||||
request_span(r, timeline_detail_handler)
|
||||
})
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
|
||||
|r| RequestSpan(get_lsn_by_timestamp_handler).handle(r),
|
||||
|r| request_span(r, get_lsn_by_timestamp_handler),
|
||||
)
|
||||
.put("/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc", |r| {
|
||||
RequestSpan(timeline_gc_handler).handle(r)
|
||||
request_span(r, timeline_gc_handler)
|
||||
})
|
||||
.put(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/compact",
|
||||
@@ -1207,34 +1249,34 @@ pub fn make_router(
|
||||
)
|
||||
.post(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
|
||||
|r| RequestSpan(timeline_download_remote_layers_handler_post).handle(r),
|
||||
|r| request_span(r, timeline_download_remote_layers_handler_post),
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
|
||||
|r| RequestSpan(timeline_download_remote_layers_handler_get).handle(r),
|
||||
|r| request_span(r, timeline_download_remote_layers_handler_get),
|
||||
)
|
||||
.delete("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
|
||||
RequestSpan(timeline_delete_handler).handle(r)
|
||||
request_span(r, timeline_delete_handler)
|
||||
})
|
||||
.get("/v1/tenant/:tenant_id/timeline/:timeline_id/layer", |r| {
|
||||
RequestSpan(layer_map_info_handler).handle(r)
|
||||
request_span(r, layer_map_info_handler)
|
||||
})
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||
|r| RequestSpan(layer_download_handler).handle(r),
|
||||
|r| request_span(r, layer_download_handler),
|
||||
)
|
||||
.delete(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
|
||||
|r| RequestSpan(evict_timeline_layer_handler).handle(r),
|
||||
|r| request_span(r, evict_timeline_layer_handler),
|
||||
)
|
||||
.put("/v1/disk_usage_eviction/run", |r| {
|
||||
RequestSpan(disk_usage_eviction_run).handle(r)
|
||||
request_span(r, disk_usage_eviction_run)
|
||||
})
|
||||
.put(
|
||||
"/v1/tenant/:tenant_id/break",
|
||||
testing_api!("set tenant state to broken", handle_tenant_break),
|
||||
)
|
||||
.get("/v1/panic", |r| RequestSpan(always_panic_handler).handle(r))
|
||||
.get("/v1/panic", |r| request_span(r, always_panic_handler))
|
||||
.post(
|
||||
"/v1/tracing/event",
|
||||
testing_api!("emit a tracing event", post_tracing_event_handler),
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
mod auth;
|
||||
pub mod basebackup;
|
||||
pub mod broker_client;
|
||||
pub mod config;
|
||||
pub mod consumption_metrics;
|
||||
pub mod context;
|
||||
@@ -36,7 +35,7 @@ use tracing::info;
|
||||
/// backwards-compatible changes to the metadata format.
|
||||
pub const STORAGE_FORMAT_VERSION: u16 = 3;
|
||||
|
||||
pub const DEFAULT_PG_VERSION: u32 = 14;
|
||||
pub const DEFAULT_PG_VERSION: u32 = 15;
|
||||
|
||||
// Magic constants used to identify different kinds of files
|
||||
pub const IMAGE_FILE_MAGIC: u16 = 0x5A60;
|
||||
|
||||
@@ -50,7 +50,9 @@ use crate::import_datadir::import_wal_from_tar;
|
||||
use crate::metrics::{LIVE_CONNECTIONS_COUNT, SMGR_QUERY_TIME};
|
||||
use crate::task_mgr;
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant;
|
||||
use crate::tenant::mgr;
|
||||
use crate::tenant::mgr::GetTenantError;
|
||||
use crate::tenant::{Tenant, Timeline};
|
||||
use crate::trace::Tracer;
|
||||
|
||||
@@ -172,6 +174,7 @@ async fn read_tar_eof(mut reader: (impl AsyncRead + Unpin)) -> anyhow::Result<()
|
||||
///
|
||||
pub async fn libpq_listener_main(
|
||||
conf: &'static PageServerConf,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
listener: TcpListener,
|
||||
auth_type: AuthType,
|
||||
@@ -213,7 +216,14 @@ pub async fn libpq_listener_main(
|
||||
None,
|
||||
"serving compute connection task",
|
||||
false,
|
||||
page_service_conn_main(conf, local_auth, socket, auth_type, connection_ctx),
|
||||
page_service_conn_main(
|
||||
conf,
|
||||
broker_client.clone(),
|
||||
local_auth,
|
||||
socket,
|
||||
auth_type,
|
||||
connection_ctx,
|
||||
),
|
||||
);
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -230,6 +240,7 @@ pub async fn libpq_listener_main(
|
||||
|
||||
async fn page_service_conn_main(
|
||||
conf: &'static PageServerConf,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
socket: tokio::net::TcpStream,
|
||||
auth_type: AuthType,
|
||||
@@ -266,7 +277,7 @@ async fn page_service_conn_main(
|
||||
// and create a child per-query context when it invokes process_query.
|
||||
// But it's in a shared crate, so, we store connection_ctx inside PageServerHandler
|
||||
// and create the per-query context in process_query ourselves.
|
||||
let mut conn_handler = PageServerHandler::new(conf, auth, connection_ctx);
|
||||
let mut conn_handler = PageServerHandler::new(conf, broker_client, auth, connection_ctx);
|
||||
let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?;
|
||||
|
||||
match pgbackend
|
||||
@@ -324,6 +335,7 @@ impl PageRequestMetrics {
|
||||
|
||||
struct PageServerHandler {
|
||||
_conf: &'static PageServerConf,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
claims: Option<Claims>,
|
||||
|
||||
@@ -337,11 +349,13 @@ struct PageServerHandler {
|
||||
impl PageServerHandler {
|
||||
pub fn new(
|
||||
conf: &'static PageServerConf,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
connection_ctx: RequestContext,
|
||||
) -> Self {
|
||||
PageServerHandler {
|
||||
_conf: conf,
|
||||
broker_client,
|
||||
auth,
|
||||
claims: None,
|
||||
connection_ctx,
|
||||
@@ -494,7 +508,12 @@ impl PageServerHandler {
|
||||
|
||||
let mut copyin_reader = pin!(StreamReader::new(copyin_stream(pgb)));
|
||||
timeline
|
||||
.import_basebackup_from_tar(&mut copyin_reader, base_lsn, &ctx)
|
||||
.import_basebackup_from_tar(
|
||||
&mut copyin_reader,
|
||||
base_lsn,
|
||||
self.broker_client.clone(),
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Read the end of the tar archive.
|
||||
@@ -1131,7 +1150,9 @@ enum GetActiveTenantError {
|
||||
wait_time: Duration,
|
||||
},
|
||||
#[error(transparent)]
|
||||
Other(#[from] anyhow::Error),
|
||||
NotFound(GetTenantError),
|
||||
#[error(transparent)]
|
||||
WaitTenantActive(tenant::WaitToBecomeActiveError),
|
||||
}
|
||||
|
||||
impl From<GetActiveTenantError> for QueryError {
|
||||
@@ -1140,7 +1161,8 @@ impl From<GetActiveTenantError> for QueryError {
|
||||
GetActiveTenantError::WaitForActiveTimeout { .. } => QueryError::Disconnected(
|
||||
ConnectionError::Io(io::Error::new(io::ErrorKind::TimedOut, e.to_string())),
|
||||
),
|
||||
GetActiveTenantError::Other(e) => QueryError::Other(e),
|
||||
GetActiveTenantError::WaitTenantActive(e) => QueryError::Other(anyhow::Error::new(e)),
|
||||
GetActiveTenantError::NotFound(e) => QueryError::Other(anyhow::Error::new(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1156,13 +1178,16 @@ async fn get_active_tenant_with_timeout(
|
||||
) -> Result<Arc<Tenant>, GetActiveTenantError> {
|
||||
let tenant = match mgr::get_tenant(tenant_id, false).await {
|
||||
Ok(tenant) => tenant,
|
||||
Err(e) => return Err(GetActiveTenantError::Other(e.into())),
|
||||
Err(e @ GetTenantError::NotFound(_)) => return Err(GetActiveTenantError::NotFound(e)),
|
||||
Err(GetTenantError::NotActive(_)) => {
|
||||
unreachable!("we're calling get_tenant with active=false")
|
||||
}
|
||||
};
|
||||
let wait_time = Duration::from_secs(30);
|
||||
match tokio::time::timeout(wait_time, tenant.wait_to_become_active()).await {
|
||||
Ok(Ok(())) => Ok(tenant),
|
||||
// no .context(), the error message is good enough and some tests depend on it
|
||||
Ok(Err(wait_error)) => Err(GetActiveTenantError::Other(wait_error)),
|
||||
Ok(Err(e)) => Err(GetActiveTenantError::WaitTenantActive(e)),
|
||||
Err(_) => {
|
||||
let latest_state = tenant.current_state();
|
||||
if latest_state == TenantState::Active {
|
||||
@@ -1177,13 +1202,34 @@ async fn get_active_tenant_with_timeout(
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
enum GetActiveTimelineError {
|
||||
#[error(transparent)]
|
||||
Tenant(GetActiveTenantError),
|
||||
#[error(transparent)]
|
||||
Timeline(anyhow::Error),
|
||||
}
|
||||
|
||||
impl From<GetActiveTimelineError> for QueryError {
|
||||
fn from(e: GetActiveTimelineError) -> Self {
|
||||
match e {
|
||||
GetActiveTimelineError::Tenant(e) => e.into(),
|
||||
GetActiveTimelineError::Timeline(e) => QueryError::Other(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shorthand for getting a reference to a Timeline of an Active tenant.
|
||||
async fn get_active_tenant_timeline(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Arc<Timeline>, GetActiveTenantError> {
|
||||
let tenant = get_active_tenant_with_timeout(tenant_id, ctx).await?;
|
||||
let timeline = tenant.get_timeline(timeline_id, true)?;
|
||||
) -> Result<Arc<Timeline>, GetActiveTimelineError> {
|
||||
let tenant = get_active_tenant_with_timeout(tenant_id, ctx)
|
||||
.await
|
||||
.map_err(GetActiveTimelineError::Tenant)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
.map_err(GetActiveTimelineError::Timeline)?;
|
||||
Ok(timeline)
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ use futures::FutureExt;
|
||||
use pageserver_api::models::TimelineState;
|
||||
use remote_storage::DownloadError;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use tokio::sync::watch;
|
||||
use tokio::task::JoinSet;
|
||||
use tracing::*;
|
||||
@@ -238,6 +239,7 @@ impl UninitializedTimeline<'_> {
|
||||
self,
|
||||
copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
|
||||
base_lsn: Lsn,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<Timeline>> {
|
||||
let raw_timeline = self.raw_timeline()?;
|
||||
@@ -264,7 +266,7 @@ impl UninitializedTimeline<'_> {
|
||||
// updated it for the layers that we created during the import.
|
||||
let mut timelines = self.owning_tenant.timelines.lock().unwrap();
|
||||
let tl = self.initialize_with_lock(ctx, &mut timelines, false)?;
|
||||
tl.activate(ctx)?;
|
||||
tl.activate(broker_client, ctx);
|
||||
Ok(tl)
|
||||
}
|
||||
|
||||
@@ -450,6 +452,34 @@ struct RemoteStartupData {
|
||||
remote_metadata: TimelineMetadata,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum WaitToBecomeActiveError {
|
||||
WillNotBecomeActive {
|
||||
tenant_id: TenantId,
|
||||
state: TenantState,
|
||||
},
|
||||
TenantDropped {
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
}
|
||||
|
||||
impl std::fmt::Display for WaitToBecomeActiveError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
WaitToBecomeActiveError::WillNotBecomeActive { tenant_id, state } => {
|
||||
write!(
|
||||
f,
|
||||
"Tenant {} will not become active. Current state: {:?}",
|
||||
tenant_id, state
|
||||
)
|
||||
}
|
||||
WaitToBecomeActiveError::TenantDropped { tenant_id } => {
|
||||
write!(f, "Tenant {tenant_id} will not become active (dropped)")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Tenant {
|
||||
/// Yet another helper for timeline initialization.
|
||||
/// Contains the common part of `load_local_timeline` and `load_remote_timeline`.
|
||||
@@ -585,6 +615,7 @@ impl Tenant {
|
||||
pub(crate) fn spawn_attach(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
@@ -616,7 +647,7 @@ impl Tenant {
|
||||
async move {
|
||||
let doit = async {
|
||||
tenant_clone.attach(&ctx).await?;
|
||||
tenant_clone.activate(&ctx)?;
|
||||
tenant_clone.activate(broker_client, &ctx)?;
|
||||
anyhow::Ok(())
|
||||
};
|
||||
match doit.await {
|
||||
@@ -854,6 +885,7 @@ impl Tenant {
|
||||
pub fn spawn_load(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
ctx: &RequestContext,
|
||||
) -> Arc<Tenant> {
|
||||
@@ -890,7 +922,7 @@ impl Tenant {
|
||||
async move {
|
||||
let doit = async {
|
||||
tenant_clone.load(&ctx).await?;
|
||||
tenant_clone.activate(&ctx)?;
|
||||
tenant_clone.activate(broker_client, &ctx)?;
|
||||
anyhow::Ok(())
|
||||
};
|
||||
match doit.await {
|
||||
@@ -1234,6 +1266,7 @@ impl Tenant {
|
||||
ancestor_timeline_id: Option<TimelineId>,
|
||||
mut ancestor_start_lsn: Option<Lsn>,
|
||||
pg_version: u32,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Option<Arc<Timeline>>> {
|
||||
anyhow::ensure!(
|
||||
@@ -1300,7 +1333,7 @@ impl Tenant {
|
||||
}
|
||||
};
|
||||
|
||||
loaded_timeline.activate(ctx).context("activate timeline")?;
|
||||
loaded_timeline.activate(broker_client, ctx);
|
||||
|
||||
if let Some(remote_client) = loaded_timeline.remote_client.as_ref() {
|
||||
// Wait for the upload of the 'index_part.json` file to finish, so that when we return
|
||||
@@ -1403,7 +1436,11 @@ impl Tenant {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Removes timeline-related in-memory data
|
||||
/// Shuts down a timeline's tasks, removes its in-memory structures, and deletes its
|
||||
/// data from disk.
|
||||
///
|
||||
/// This doesn't currently delete all data from S3, but sets a flag in its
|
||||
/// index_part.json file to mark it as deleted.
|
||||
pub async fn delete_timeline(
|
||||
&self,
|
||||
timeline_id: TimelineId,
|
||||
@@ -1413,7 +1450,11 @@ impl Tenant {
|
||||
|
||||
// Transition the timeline into TimelineState::Stopping.
|
||||
// This should prevent new operations from starting.
|
||||
let timeline = {
|
||||
//
|
||||
// Also grab the Timeline's delete_lock to prevent another deletion from starting.
|
||||
let timeline;
|
||||
let mut delete_lock_guard;
|
||||
{
|
||||
let mut timelines = self.timelines.lock().unwrap();
|
||||
|
||||
// Ensure that there are no child timelines **attached to that pageserver**,
|
||||
@@ -1431,24 +1472,43 @@ impl Tenant {
|
||||
Entry::Vacant(_) => return Err(DeleteTimelineError::NotFound),
|
||||
};
|
||||
|
||||
let timeline = Arc::clone(timeline_entry.get());
|
||||
timeline = Arc::clone(timeline_entry.get());
|
||||
|
||||
// Prevent two tasks from trying to delete the timeline at the same time.
|
||||
//
|
||||
// XXX: We should perhaps return an HTTP "202 Accepted" to signal that the caller
|
||||
// needs to poll until the operation has finished. But for now, we return an
|
||||
// error, because the control plane knows to retry errors.
|
||||
delete_lock_guard = timeline.delete_lock.try_lock().map_err(|_| {
|
||||
DeleteTimelineError::Other(anyhow::anyhow!(
|
||||
"timeline deletion is already in progress"
|
||||
))
|
||||
})?;
|
||||
|
||||
// If another task finished the deletion just before we acquired the lock,
|
||||
// return success.
|
||||
if *delete_lock_guard {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
timeline.set_state(TimelineState::Stopping);
|
||||
|
||||
drop(timelines);
|
||||
timeline
|
||||
};
|
||||
}
|
||||
|
||||
// Now that the Timeline is in Stopping state, request all the related tasks to
|
||||
// shut down.
|
||||
//
|
||||
// NB: If you call delete_timeline multiple times concurrently, they will
|
||||
// all go through the motions here. Make sure the code here is idempotent,
|
||||
// and don't error out if some of the shutdown tasks have already been
|
||||
// completed!
|
||||
// NB: If this fails half-way through, and is retried, the retry will go through
|
||||
// all the same steps again. Make sure the code here is idempotent, and don't
|
||||
// error out if some of the shutdown tasks have already been completed!
|
||||
|
||||
// Stop the walreceiver first.
|
||||
debug!("waiting for wal receiver to shutdown");
|
||||
timeline.walreceiver.stop().await;
|
||||
let maybe_started_walreceiver = { timeline.walreceiver.lock().unwrap().take() };
|
||||
if let Some(walreceiver) = maybe_started_walreceiver {
|
||||
walreceiver.stop().await;
|
||||
}
|
||||
debug!("wal receiver shutdown confirmed");
|
||||
|
||||
// Prevent new uploads from starting.
|
||||
@@ -1482,6 +1542,10 @@ impl Tenant {
|
||||
// If we (now, or already) marked it successfully as deleted, we can proceed
|
||||
Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
|
||||
// Bail out otherwise
|
||||
//
|
||||
// AlreadyInProgress shouldn't happen, because the 'delete_lock' prevents
|
||||
// two tasks from performing the deletion at the same time. The first task
|
||||
// that starts deletion should run it to completion.
|
||||
Err(e @ PersistIndexPartWithDeletedFlagError::AlreadyInProgress(_))
|
||||
| Err(e @ PersistIndexPartWithDeletedFlagError::Other(_)) => {
|
||||
return Err(DeleteTimelineError::Other(anyhow::anyhow!(e)));
|
||||
@@ -1492,14 +1556,12 @@ impl Tenant {
|
||||
{
|
||||
// Grab the layer_removal_cs lock, and actually perform the deletion.
|
||||
//
|
||||
// This lock prevents multiple concurrent delete_timeline calls from
|
||||
// stepping on each other's toes, while deleting the files. It also
|
||||
// prevents GC or compaction from running at the same time.
|
||||
// This lock prevents prevents GC or compaction from running at the same time.
|
||||
// The GC task doesn't register itself with the timeline it's operating on,
|
||||
// so it might still be running even though we called `shutdown_tasks`.
|
||||
//
|
||||
// Note that there are still other race conditions between
|
||||
// GC, compaction and timeline deletion. GC task doesn't
|
||||
// register itself properly with the timeline it's
|
||||
// operating on. See
|
||||
// GC, compaction and timeline deletion. See
|
||||
// https://github.com/neondatabase/neon/issues/2671
|
||||
//
|
||||
// No timeout here, GC & Compaction should be responsive to the
|
||||
@@ -1561,37 +1623,27 @@ impl Tenant {
|
||||
});
|
||||
|
||||
// Remove the timeline from the map.
|
||||
let mut timelines = self.timelines.lock().unwrap();
|
||||
let children_exist = timelines
|
||||
.iter()
|
||||
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline_id));
|
||||
// XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
|
||||
// We already deleted the layer files, so it's probably best to panic.
|
||||
// (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
|
||||
if children_exist {
|
||||
panic!("Timeline grew children while we removed layer files");
|
||||
{
|
||||
let mut timelines = self.timelines.lock().unwrap();
|
||||
|
||||
let children_exist = timelines
|
||||
.iter()
|
||||
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline_id));
|
||||
// XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
|
||||
// We already deleted the layer files, so it's probably best to panic.
|
||||
// (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
|
||||
if children_exist {
|
||||
panic!("Timeline grew children while we removed layer files");
|
||||
}
|
||||
|
||||
timelines.remove(&timeline_id).expect(
|
||||
"timeline that we were deleting was concurrently removed from 'timelines' map",
|
||||
);
|
||||
}
|
||||
let removed_timeline = timelines.remove(&timeline_id);
|
||||
if removed_timeline.is_none() {
|
||||
// This can legitimately happen if there's a concurrent call to this function.
|
||||
// T1 T2
|
||||
// lock
|
||||
// unlock
|
||||
// lock
|
||||
// unlock
|
||||
// remove files
|
||||
// lock
|
||||
// remove from map
|
||||
// unlock
|
||||
// return
|
||||
// remove files
|
||||
// lock
|
||||
// remove from map observes empty map
|
||||
// unlock
|
||||
// return
|
||||
debug!("concurrent call to this function won the race");
|
||||
}
|
||||
drop(timelines);
|
||||
|
||||
// All done! Mark the deletion as completed and release the delete_lock
|
||||
*delete_lock_guard = true;
|
||||
drop(delete_lock_guard);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1605,7 +1657,11 @@ impl Tenant {
|
||||
}
|
||||
|
||||
/// Changes tenant status to active, unless shutdown was already requested.
|
||||
fn activate(&self, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
fn activate(
|
||||
self: &Arc<Self>,
|
||||
broker_client: BrokerClientChannel,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
let mut result = Ok(());
|
||||
@@ -1638,33 +1694,13 @@ impl Tenant {
|
||||
|
||||
// Spawn gc and compaction loops. The loops will shut themselves
|
||||
// down when they notice that the tenant is inactive.
|
||||
tasks::start_background_loops(self.tenant_id);
|
||||
tasks::start_background_loops(self);
|
||||
|
||||
let mut activated_timelines = 0;
|
||||
let mut timelines_broken_during_activation = 0;
|
||||
|
||||
for timeline in not_broken_timelines {
|
||||
match timeline
|
||||
.activate(ctx)
|
||||
.context("timeline activation for activating tenant")
|
||||
{
|
||||
Ok(()) => {
|
||||
activated_timelines += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Failed to activate timeline {}: {:#}",
|
||||
timeline.timeline_id, e
|
||||
);
|
||||
timeline.set_state(TimelineState::Broken);
|
||||
*current_state = TenantState::broken_from_reason(format!(
|
||||
"failed to activate timeline {}: {}",
|
||||
timeline.timeline_id, e
|
||||
));
|
||||
|
||||
timelines_broken_during_activation += 1;
|
||||
}
|
||||
}
|
||||
timeline.activate(broker_client.clone(), ctx);
|
||||
activated_timelines += 1;
|
||||
}
|
||||
|
||||
let elapsed = self.loading_started_at.elapsed();
|
||||
@@ -1676,7 +1712,6 @@ impl Tenant {
|
||||
since_creation_millis = elapsed.as_millis(),
|
||||
tenant_id = %self.tenant_id,
|
||||
activated_timelines,
|
||||
timelines_broken_during_activation,
|
||||
total_timelines,
|
||||
post_state = <&'static str>::from(&*current_state),
|
||||
"activation attempt finished"
|
||||
@@ -1753,25 +1788,30 @@ impl Tenant {
|
||||
self.state.subscribe()
|
||||
}
|
||||
|
||||
pub async fn wait_to_become_active(&self) -> anyhow::Result<()> {
|
||||
pub(crate) async fn wait_to_become_active(&self) -> Result<(), WaitToBecomeActiveError> {
|
||||
let mut receiver = self.state.subscribe();
|
||||
loop {
|
||||
let current_state = receiver.borrow_and_update().clone();
|
||||
match current_state {
|
||||
TenantState::Loading | TenantState::Attaching => {
|
||||
// in these states, there's a chance that we can reach ::Active
|
||||
receiver.changed().await?;
|
||||
receiver.changed().await.map_err(
|
||||
|_e: tokio::sync::watch::error::RecvError| {
|
||||
WaitToBecomeActiveError::TenantDropped {
|
||||
tenant_id: self.tenant_id,
|
||||
}
|
||||
},
|
||||
)?;
|
||||
}
|
||||
TenantState::Active { .. } => {
|
||||
return Ok(());
|
||||
}
|
||||
TenantState::Broken { .. } | TenantState::Stopping => {
|
||||
// There's no chance the tenant can transition back into ::Active
|
||||
anyhow::bail!(
|
||||
"Tenant {} will not become active. Current state: {:?}",
|
||||
self.tenant_id,
|
||||
¤t_state,
|
||||
);
|
||||
return Err(WaitToBecomeActiveError::WillNotBecomeActive {
|
||||
tenant_id: self.tenant_id,
|
||||
state: current_state,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,7 +56,6 @@ use std::collections::VecDeque;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
use utils::lsn::Lsn;
|
||||
use tracing::*;
|
||||
|
||||
use historic_layer_coverage::BufferedHistoricLayerCoverage;
|
||||
pub use historic_layer_coverage::Replacement;
|
||||
@@ -276,14 +275,11 @@ where
|
||||
///
|
||||
pub(self) fn insert_historic_noflush(&mut self, layer: Arc<L>) {
|
||||
// TODO: See #3869, resulting #4088, attempted fix and repro #4094
|
||||
let key = historic_layer_coverage::LayerKey::from(&*layer);
|
||||
if self.historic.contains(&key) {
|
||||
error!(
|
||||
"Attempt to insert duplicate layer {} in layer map",
|
||||
layer.short_id()
|
||||
);
|
||||
}
|
||||
self.historic.insert(key, Arc::clone(&layer));
|
||||
self.historic.insert(
|
||||
historic_layer_coverage::LayerKey::from(&*layer),
|
||||
Arc::clone(&layer),
|
||||
);
|
||||
|
||||
if Self::is_l0(&layer) {
|
||||
self.l0_delta_layers.push(layer);
|
||||
}
|
||||
|
||||
@@ -417,15 +417,7 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains(&self, layer_key: &LayerKey) -> bool {
|
||||
match self.buffer.get(layer_key) {
|
||||
Some(None) => false, // layer remove was buffered
|
||||
Some(_) => true, // layer insert was buffered
|
||||
None => self.layers.contains_key(layer_key), // no buffered ops for this layer
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, layer_key: LayerKey, value: Value) {
|
||||
pub fn insert(&mut self, layer_key: LayerKey, value: Value) {
|
||||
self.buffer.insert(layer_key, Some(value));
|
||||
}
|
||||
|
||||
|
||||
@@ -58,9 +58,10 @@ static TENANTS: Lazy<RwLock<TenantsMap>> = Lazy::new(|| RwLock::new(TenantsMap::
|
||||
/// Initialize repositories with locally available timelines.
|
||||
/// Timelines that are only partially available locally (remote storage has more data than this pageserver)
|
||||
/// are scheduled for download and added to the tenant once download is completed.
|
||||
#[instrument(skip(conf, remote_storage))]
|
||||
#[instrument(skip_all)]
|
||||
pub async fn init_tenant_mgr(
|
||||
conf: &'static PageServerConf,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
) -> anyhow::Result<()> {
|
||||
// Scan local filesystem for attached tenants
|
||||
@@ -116,6 +117,7 @@ pub async fn init_tenant_mgr(
|
||||
match schedule_local_tenant_processing(
|
||||
conf,
|
||||
&tenant_dir_path,
|
||||
broker_client.clone(),
|
||||
remote_storage.clone(),
|
||||
&ctx,
|
||||
) {
|
||||
@@ -150,6 +152,7 @@ pub async fn init_tenant_mgr(
|
||||
pub fn schedule_local_tenant_processing(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_path: &Path,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
@@ -186,7 +189,7 @@ pub fn schedule_local_tenant_processing(
|
||||
let tenant = if conf.tenant_attaching_mark_file_path(&tenant_id).exists() {
|
||||
info!("tenant {tenant_id} has attaching mark file, resuming its attach operation");
|
||||
if let Some(remote_storage) = remote_storage {
|
||||
match Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx) {
|
||||
match Tenant::spawn_attach(conf, tenant_id, broker_client, remote_storage, ctx) {
|
||||
Ok(tenant) => tenant,
|
||||
Err(e) => {
|
||||
error!("Failed to spawn_attach tenant {tenant_id}, reason: {e:#}");
|
||||
@@ -204,7 +207,7 @@ pub fn schedule_local_tenant_processing(
|
||||
} else {
|
||||
info!("tenant {tenant_id} is assumed to be loadable, starting load operation");
|
||||
// Start loading the tenant into memory. It will initially be in Loading state.
|
||||
Tenant::spawn_load(conf, tenant_id, remote_storage, ctx)
|
||||
Tenant::spawn_load(conf, tenant_id, broker_client, remote_storage, ctx)
|
||||
};
|
||||
Ok(tenant)
|
||||
}
|
||||
@@ -275,6 +278,7 @@ pub async fn create_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
tenant_id: TenantId,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Arc<Tenant>, TenantMapInsertError> {
|
||||
@@ -287,7 +291,7 @@ pub async fn create_tenant(
|
||||
// See https://github.com/neondatabase/neon/issues/4233
|
||||
|
||||
let created_tenant =
|
||||
schedule_local_tenant_processing(conf, &tenant_directory, remote_storage, ctx)?;
|
||||
schedule_local_tenant_processing(conf, &tenant_directory, broker_client, remote_storage, ctx)?;
|
||||
// TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
|
||||
// See https://github.com/neondatabase/neon/issues/4233
|
||||
|
||||
@@ -300,11 +304,19 @@ pub async fn create_tenant(
|
||||
}).await
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum SetNewTenantConfigError {
|
||||
#[error(transparent)]
|
||||
GetTenant(#[from] GetTenantError),
|
||||
#[error(transparent)]
|
||||
Persist(anyhow::Error),
|
||||
}
|
||||
|
||||
pub async fn set_new_tenant_config(
|
||||
conf: &'static PageServerConf,
|
||||
new_tenant_conf: TenantConfOpt,
|
||||
tenant_id: TenantId,
|
||||
) -> Result<(), TenantStateError> {
|
||||
) -> Result<(), SetNewTenantConfigError> {
|
||||
info!("configuring tenant {tenant_id}");
|
||||
let tenant = get_tenant(tenant_id, true).await?;
|
||||
|
||||
@@ -314,23 +326,32 @@ pub async fn set_new_tenant_config(
|
||||
&tenant_config_path,
|
||||
new_tenant_conf,
|
||||
false,
|
||||
)?;
|
||||
)
|
||||
.map_err(SetNewTenantConfigError::Persist)?;
|
||||
tenant.set_new_tenant_config(new_tenant_conf);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum GetTenantError {
|
||||
#[error("Tenant {0} not found")]
|
||||
NotFound(TenantId),
|
||||
#[error("Tenant {0} is not active")]
|
||||
NotActive(TenantId),
|
||||
}
|
||||
|
||||
/// Gets the tenant from the in-memory data, erroring if it's absent or is not fitting to the query.
|
||||
/// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
|
||||
pub async fn get_tenant(
|
||||
tenant_id: TenantId,
|
||||
active_only: bool,
|
||||
) -> Result<Arc<Tenant>, TenantStateError> {
|
||||
) -> Result<Arc<Tenant>, GetTenantError> {
|
||||
let m = TENANTS.read().await;
|
||||
let tenant = m
|
||||
.get(&tenant_id)
|
||||
.ok_or(TenantStateError::NotFound(tenant_id))?;
|
||||
.ok_or(GetTenantError::NotFound(tenant_id))?;
|
||||
if active_only && !tenant.is_active() {
|
||||
Err(TenantStateError::NotActive(tenant_id))
|
||||
Err(GetTenantError::NotActive(tenant_id))
|
||||
} else {
|
||||
Ok(Arc::clone(tenant))
|
||||
}
|
||||
@@ -339,7 +360,7 @@ pub async fn get_tenant(
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum DeleteTimelineError {
|
||||
#[error("Tenant {0}")]
|
||||
Tenant(#[from] TenantStateError),
|
||||
Tenant(#[from] GetTenantError),
|
||||
|
||||
#[error("Timeline {0}")]
|
||||
Timeline(#[from] crate::tenant::DeleteTimelineError),
|
||||
@@ -404,6 +425,7 @@ pub async fn detach_tenant(
|
||||
pub async fn load_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
@@ -415,7 +437,7 @@ pub async fn load_tenant(
|
||||
.with_context(|| format!("Failed to remove tenant ignore mark {tenant_ignore_mark:?} during tenant loading"))?;
|
||||
}
|
||||
|
||||
let new_tenant = schedule_local_tenant_processing(conf, &tenant_path, remote_storage, ctx)
|
||||
let new_tenant = schedule_local_tenant_processing(conf, &tenant_path, broker_client, remote_storage, ctx)
|
||||
.with_context(|| {
|
||||
format!("Failed to schedule tenant processing in path {tenant_path:?}")
|
||||
})?;
|
||||
@@ -472,6 +494,7 @@ pub async fn attach_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
tenant_conf: TenantConfOpt,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), TenantMapInsertError> {
|
||||
@@ -487,7 +510,7 @@ pub async fn attach_tenant(
|
||||
.context("check for attach marker file existence")?;
|
||||
anyhow::ensure!(marker_file_exists, "create_tenant_files should have created the attach marker file");
|
||||
|
||||
let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, Some(remote_storage), ctx)?;
|
||||
let attached_tenant = schedule_local_tenant_processing(conf, &tenant_dir, broker_client, Some(remote_storage), ctx)?;
|
||||
// TODO: tenant object & its background loops remain, untracked in tenant map, if we fail here.
|
||||
// See https://github.com/neondatabase/neon/issues/4233
|
||||
|
||||
|
||||
@@ -19,14 +19,8 @@ fn parallel_worker(paths: &[PathBuf], next_path_idx: &AtomicUsize) -> io::Result
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn par_fsync(paths: &[PathBuf]) -> io::Result<()> {
|
||||
const PARALLEL_PATH_THRESHOLD: usize = 1;
|
||||
if paths.len() <= PARALLEL_PATH_THRESHOLD {
|
||||
for path in paths {
|
||||
fsync_path(path)?;
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
fn fsync_in_thread_pool(paths: &[PathBuf]) -> io::Result<()> {
|
||||
// TODO: remove this function in favor of `par_fsync_async` once we asyncify everything.
|
||||
|
||||
/// Use at most this number of threads.
|
||||
/// Increasing this limit will
|
||||
@@ -36,11 +30,11 @@ pub fn par_fsync(paths: &[PathBuf]) -> io::Result<()> {
|
||||
let num_threads = paths.len().min(MAX_NUM_THREADS);
|
||||
let next_path_idx = AtomicUsize::new(0);
|
||||
|
||||
crossbeam_utils::thread::scope(|s| -> io::Result<()> {
|
||||
std::thread::scope(|s| -> io::Result<()> {
|
||||
let mut handles = vec![];
|
||||
// Spawn `num_threads - 1`, as the current thread is also a worker.
|
||||
for _ in 1..num_threads {
|
||||
handles.push(s.spawn(|_| parallel_worker(paths, &next_path_idx)));
|
||||
handles.push(s.spawn(|| parallel_worker(paths, &next_path_idx)));
|
||||
}
|
||||
|
||||
parallel_worker(paths, &next_path_idx)?;
|
||||
@@ -51,5 +45,41 @@ pub fn par_fsync(paths: &[PathBuf]) -> io::Result<()> {
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Parallel fsync all files. Can be used in non-async context as it is using rayon thread pool.
|
||||
pub fn par_fsync(paths: &[PathBuf]) -> io::Result<()> {
|
||||
if paths.len() == 1 {
|
||||
fsync_path(&paths[0])?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
fsync_in_thread_pool(paths)
|
||||
}
|
||||
|
||||
/// Parallel fsync asynchronously. If number of files are less than PARALLEL_PATH_THRESHOLD, fsync is done in the current
|
||||
/// execution thread. Otherwise, we will spawn_blocking and run it in tokio.
|
||||
pub async fn par_fsync_async(paths: &[PathBuf]) -> io::Result<()> {
|
||||
const MAX_CONCURRENT_FSYNC: usize = 64;
|
||||
let mut next = paths.iter().peekable();
|
||||
let mut js = tokio::task::JoinSet::new();
|
||||
loop {
|
||||
while js.len() < MAX_CONCURRENT_FSYNC && next.peek().is_some() {
|
||||
let next = next.next().expect("just peeked");
|
||||
let next = next.to_owned();
|
||||
js.spawn_blocking(move || fsync_path(&next));
|
||||
}
|
||||
|
||||
// now the joinset has been filled up, wait for next to complete
|
||||
if let Some(res) = js.join_next().await {
|
||||
res??;
|
||||
} else {
|
||||
// last item had already completed
|
||||
assert!(
|
||||
next.peek().is_none(),
|
||||
"joinset emptied, we shouldn't have more work"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,13 +9,12 @@ use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::metrics::TENANT_TASK_EVENTS;
|
||||
use crate::task_mgr;
|
||||
use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
|
||||
use crate::tenant::mgr;
|
||||
use crate::tenant::{Tenant, TenantState};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::id::TenantId;
|
||||
|
||||
pub fn start_background_loops(tenant_id: TenantId) {
|
||||
pub fn start_background_loops(tenant: &Arc<Tenant>) {
|
||||
let tenant_id = tenant.tenant_id;
|
||||
task_mgr::spawn(
|
||||
BACKGROUND_RUNTIME.handle(),
|
||||
TaskKind::Compaction,
|
||||
@@ -23,11 +22,14 @@ pub fn start_background_loops(tenant_id: TenantId) {
|
||||
None,
|
||||
&format!("compactor for tenant {tenant_id}"),
|
||||
false,
|
||||
async move {
|
||||
compaction_loop(tenant_id)
|
||||
.instrument(info_span!("compaction_loop", tenant_id = %tenant_id))
|
||||
.await;
|
||||
Ok(())
|
||||
{
|
||||
let tenant = Arc::clone(tenant);
|
||||
async move {
|
||||
compaction_loop(tenant)
|
||||
.instrument(info_span!("compaction_loop", tenant_id = %tenant_id))
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
);
|
||||
task_mgr::spawn(
|
||||
@@ -37,11 +39,14 @@ pub fn start_background_loops(tenant_id: TenantId) {
|
||||
None,
|
||||
&format!("garbage collector for tenant {tenant_id}"),
|
||||
false,
|
||||
async move {
|
||||
gc_loop(tenant_id)
|
||||
.instrument(info_span!("gc_loop", tenant_id = %tenant_id))
|
||||
.await;
|
||||
Ok(())
|
||||
{
|
||||
let tenant = Arc::clone(tenant);
|
||||
async move {
|
||||
gc_loop(tenant)
|
||||
.instrument(info_span!("gc_loop", tenant_id = %tenant_id))
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
@@ -49,7 +54,7 @@ pub fn start_background_loops(tenant_id: TenantId) {
|
||||
///
|
||||
/// Compaction task's main loop
|
||||
///
|
||||
async fn compaction_loop(tenant_id: TenantId) {
|
||||
async fn compaction_loop(tenant: Arc<Tenant>) {
|
||||
let wait_duration = Duration::from_secs(2);
|
||||
info!("starting");
|
||||
TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
|
||||
@@ -60,16 +65,16 @@ async fn compaction_loop(tenant_id: TenantId) {
|
||||
loop {
|
||||
trace!("waking up");
|
||||
|
||||
let tenant = tokio::select! {
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {
|
||||
info!("received cancellation request");
|
||||
return;
|
||||
},
|
||||
tenant_wait_result = wait_for_active_tenant(tenant_id, wait_duration) => match tenant_wait_result {
|
||||
tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
|
||||
ControlFlow::Break(()) => return,
|
||||
ControlFlow::Continue(tenant) => tenant,
|
||||
ControlFlow::Continue(()) => (),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let period = tenant.get_compaction_period();
|
||||
|
||||
@@ -119,7 +124,7 @@ async fn compaction_loop(tenant_id: TenantId) {
|
||||
///
|
||||
/// GC task's main loop
|
||||
///
|
||||
async fn gc_loop(tenant_id: TenantId) {
|
||||
async fn gc_loop(tenant: Arc<Tenant>) {
|
||||
let wait_duration = Duration::from_secs(2);
|
||||
info!("starting");
|
||||
TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
|
||||
@@ -127,21 +132,22 @@ async fn gc_loop(tenant_id: TenantId) {
|
||||
let cancel = task_mgr::shutdown_token();
|
||||
// GC might require downloading, to find the cutoff LSN that corresponds to the
|
||||
// cutoff specified as time.
|
||||
let ctx = RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
|
||||
let ctx =
|
||||
RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
|
||||
let mut first = true;
|
||||
loop {
|
||||
trace!("waking up");
|
||||
|
||||
let tenant = tokio::select! {
|
||||
tokio::select! {
|
||||
_ = cancel.cancelled() => {
|
||||
info!("received cancellation request");
|
||||
return;
|
||||
},
|
||||
tenant_wait_result = wait_for_active_tenant(tenant_id, wait_duration) => match tenant_wait_result {
|
||||
tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
|
||||
ControlFlow::Break(()) => return,
|
||||
ControlFlow::Continue(tenant) => tenant,
|
||||
ControlFlow::Continue(()) => (),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let period = tenant.get_gc_period();
|
||||
|
||||
@@ -161,7 +167,9 @@ async fn gc_loop(tenant_id: TenantId) {
|
||||
Duration::from_secs(10)
|
||||
} else {
|
||||
// Run gc
|
||||
let res = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &ctx).await;
|
||||
let res = tenant
|
||||
.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &ctx)
|
||||
.await;
|
||||
if let Err(e) = res {
|
||||
error!("Gc failed, retrying in {:?}: {e:?}", wait_duration);
|
||||
wait_duration
|
||||
@@ -187,23 +195,10 @@ async fn gc_loop(tenant_id: TenantId) {
|
||||
trace!("GC loop stopped.");
|
||||
}
|
||||
|
||||
async fn wait_for_active_tenant(
|
||||
tenant_id: TenantId,
|
||||
wait: Duration,
|
||||
) -> ControlFlow<(), Arc<Tenant>> {
|
||||
let tenant = loop {
|
||||
match mgr::get_tenant(tenant_id, false).await {
|
||||
Ok(tenant) => break tenant,
|
||||
Err(e) => {
|
||||
error!("Failed to get a tenant {tenant_id}: {e:#}");
|
||||
tokio::time::sleep(wait).await;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
async fn wait_for_active_tenant(tenant: &Arc<Tenant>) -> ControlFlow<()> {
|
||||
// if the tenant has a proper status already, no need to wait for anything
|
||||
if tenant.current_state() == TenantState::Active {
|
||||
ControlFlow::Continue(tenant)
|
||||
ControlFlow::Continue(())
|
||||
} else {
|
||||
let mut tenant_state_updates = tenant.subscribe_for_state_updates();
|
||||
loop {
|
||||
@@ -213,7 +208,7 @@ async fn wait_for_active_tenant(
|
||||
match new_state {
|
||||
TenantState::Active => {
|
||||
debug!("Tenant state changed to active, continuing the task loop");
|
||||
return ControlFlow::Continue(tenant);
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
state => {
|
||||
debug!("Not running the task loop, tenant is not active: {state:?}");
|
||||
|
||||
@@ -31,7 +31,6 @@ use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering};
|
||||
use std::sync::{Arc, Mutex, MutexGuard, RwLock, Weak};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use crate::broker_client::{get_broker_client, is_broker_client_initialized};
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::tenant::remote_timeline_client::{self, index::LayerFileMetadata};
|
||||
use crate::tenant::storage_layer::{
|
||||
@@ -196,8 +195,9 @@ pub struct Timeline {
|
||||
/// Layer removal lock.
|
||||
/// A lock to ensure that no layer of the timeline is removed concurrently by other tasks.
|
||||
/// This lock is acquired in [`Timeline::gc`], [`Timeline::compact`],
|
||||
/// and [`Tenant::delete_timeline`].
|
||||
pub(super) layer_removal_cs: tokio::sync::Mutex<()>,
|
||||
/// and [`Tenant::delete_timeline`]. This is an `Arc<Mutex>` lock because we need an owned
|
||||
/// lock guard in functions that will be spawned to tokio I/O pool (which requires `'static`).
|
||||
pub(super) layer_removal_cs: Arc<tokio::sync::Mutex<()>>,
|
||||
|
||||
// Needed to ensure that we can't create a branch at a point that was already garbage collected
|
||||
pub latest_gc_cutoff_lsn: Rcu<Lsn>,
|
||||
@@ -227,7 +227,7 @@ pub struct Timeline {
|
||||
/// or None if WAL receiver has not received anything for this timeline
|
||||
/// yet.
|
||||
pub last_received_wal: Mutex<Option<WalReceiverInfo>>,
|
||||
pub walreceiver: WalReceiver,
|
||||
pub walreceiver: Mutex<Option<WalReceiver>>,
|
||||
|
||||
/// Relation size cache
|
||||
pub rel_size_cache: RwLock<HashMap<RelTag, (Lsn, BlockNumber)>>,
|
||||
@@ -236,6 +236,10 @@ pub struct Timeline {
|
||||
|
||||
state: watch::Sender<TimelineState>,
|
||||
|
||||
/// Prevent two tasks from deleting the timeline at the same time. If held, the
|
||||
/// timeline is being deleted. If 'true', the timeline has already been deleted.
|
||||
pub delete_lock: tokio::sync::Mutex<bool>,
|
||||
|
||||
eviction_task_timeline_state: tokio::sync::Mutex<EvictionTaskTimelineState>,
|
||||
}
|
||||
|
||||
@@ -622,17 +626,27 @@ impl Timeline {
|
||||
.await
|
||||
{
|
||||
Ok(()) => Ok(()),
|
||||
seqwait_error => {
|
||||
Err(e) => {
|
||||
// don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo
|
||||
drop(_timer);
|
||||
let walreceiver_status = self.walreceiver.status().await;
|
||||
seqwait_error.with_context(|| format!(
|
||||
"Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, {}",
|
||||
lsn,
|
||||
self.get_last_record_lsn(),
|
||||
self.get_disk_consistent_lsn(),
|
||||
walreceiver_status.map(|status| status.to_human_readable_string())
|
||||
.unwrap_or_else(|| "WalReceiver status: Not active".to_string()),
|
||||
))
|
||||
let walreceiver_status = {
|
||||
match &*self.walreceiver.lock().unwrap() {
|
||||
None => "stopping or stopped".to_string(),
|
||||
Some(walreceiver) => match walreceiver.status() {
|
||||
Some(status) => status.to_human_readable_string(),
|
||||
None => "Not active".to_string(),
|
||||
},
|
||||
}
|
||||
};
|
||||
Err(anyhow::Error::new(e).context({
|
||||
format!(
|
||||
"Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}",
|
||||
lsn,
|
||||
self.get_last_record_lsn(),
|
||||
self.get_disk_consistent_lsn(),
|
||||
walreceiver_status,
|
||||
)
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -660,7 +674,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
/// Outermost timeline compaction operation; downloads needed layers.
|
||||
pub async fn compact(&self, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
pub async fn compact(self: &Arc<Self>, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
const ROUNDS: usize = 2;
|
||||
|
||||
let last_record_lsn = self.get_last_record_lsn();
|
||||
@@ -749,7 +763,7 @@ impl Timeline {
|
||||
}
|
||||
|
||||
/// Compaction which might need to be retried after downloading remote layers.
|
||||
async fn compact_inner(&self, ctx: &RequestContext) -> Result<(), CompactionError> {
|
||||
async fn compact_inner(self: &Arc<Self>, ctx: &RequestContext) -> Result<(), CompactionError> {
|
||||
//
|
||||
// High level strategy for compaction / image creation:
|
||||
//
|
||||
@@ -784,7 +798,7 @@ impl Timeline {
|
||||
// Below are functions compact_level0() and create_image_layers()
|
||||
// but they are a bit ad hoc and don't quite work like it's explained
|
||||
// above. Rewrite it.
|
||||
let layer_removal_cs = self.layer_removal_cs.lock().await;
|
||||
let layer_removal_cs = Arc::new(self.layer_removal_cs.clone().lock_owned().await);
|
||||
// Is the timeline being deleted?
|
||||
let state = *self.state.borrow();
|
||||
if state == TimelineState::Stopping {
|
||||
@@ -818,7 +832,7 @@ impl Timeline {
|
||||
|
||||
// 3. Compact
|
||||
let timer = self.metrics.compact_time_histo.start_timer();
|
||||
self.compact_level0(&layer_removal_cs, target_file_size, ctx)
|
||||
self.compact_level0(layer_removal_cs.clone(), target_file_size, ctx)
|
||||
.await?;
|
||||
timer.stop_and_record();
|
||||
}
|
||||
@@ -907,18 +921,10 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn activate(self: &Arc<Self>, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
if is_broker_client_initialized() {
|
||||
self.launch_wal_receiver(ctx, get_broker_client().clone())?;
|
||||
} else if cfg!(test) {
|
||||
info!("not launching WAL receiver because broker client hasn't been initialized");
|
||||
} else {
|
||||
anyhow::bail!("broker client not initialized");
|
||||
}
|
||||
|
||||
pub fn activate(self: &Arc<Self>, broker_client: BrokerClientChannel, ctx: &RequestContext) {
|
||||
self.launch_wal_receiver(ctx, broker_client);
|
||||
self.set_state(TimelineState::Active);
|
||||
self.launch_eviction_task();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn set_state(&self, new_state: TimelineState) {
|
||||
@@ -1327,15 +1333,7 @@ impl Timeline {
|
||||
let (layer_flush_done_tx, _) = tokio::sync::watch::channel((0, Ok(())));
|
||||
|
||||
let tenant_conf_guard = tenant_conf.read().unwrap();
|
||||
let wal_connect_timeout = tenant_conf_guard
|
||||
.walreceiver_connect_timeout
|
||||
.unwrap_or(conf.default_tenant_conf.walreceiver_connect_timeout);
|
||||
let lagging_wal_timeout = tenant_conf_guard
|
||||
.lagging_wal_timeout
|
||||
.unwrap_or(conf.default_tenant_conf.lagging_wal_timeout);
|
||||
let max_lsn_wal_lag = tenant_conf_guard
|
||||
.max_lsn_wal_lag
|
||||
.unwrap_or(conf.default_tenant_conf.max_lsn_wal_lag);
|
||||
|
||||
let evictions_low_residence_duration_metric_threshold =
|
||||
Self::get_evictions_low_residence_duration_metric_threshold(
|
||||
&tenant_conf_guard,
|
||||
@@ -1344,18 +1342,6 @@ impl Timeline {
|
||||
drop(tenant_conf_guard);
|
||||
|
||||
Arc::new_cyclic(|myself| {
|
||||
let walreceiver = WalReceiver::new(
|
||||
TenantTimelineId::new(tenant_id, timeline_id),
|
||||
Weak::clone(myself),
|
||||
WalReceiverConf {
|
||||
wal_connect_timeout,
|
||||
lagging_wal_timeout,
|
||||
max_lsn_wal_lag,
|
||||
auth_token: crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(),
|
||||
availability_zone: conf.availability_zone.clone(),
|
||||
},
|
||||
);
|
||||
|
||||
let mut result = Timeline {
|
||||
conf,
|
||||
tenant_conf,
|
||||
@@ -1367,7 +1353,7 @@ impl Timeline {
|
||||
wanted_image_layers: Mutex::new(None),
|
||||
|
||||
walredo_mgr,
|
||||
walreceiver,
|
||||
walreceiver: Mutex::new(None),
|
||||
|
||||
remote_client: remote_client.map(Arc::new),
|
||||
|
||||
@@ -1432,6 +1418,7 @@ impl Timeline {
|
||||
eviction_task_timeline_state: tokio::sync::Mutex::new(
|
||||
EvictionTaskTimelineState::default(),
|
||||
),
|
||||
delete_lock: tokio::sync::Mutex::new(false),
|
||||
};
|
||||
result.repartition_threshold = result.get_checkpoint_distance() / 10;
|
||||
result
|
||||
@@ -1487,17 +1474,49 @@ impl Timeline {
|
||||
*flush_loop_state = FlushLoopState::Running;
|
||||
}
|
||||
|
||||
pub(super) fn launch_wal_receiver(
|
||||
&self,
|
||||
/// Creates and starts the wal receiver.
|
||||
///
|
||||
/// This function is expected to be called at most once per Timeline's lifecycle
|
||||
/// when the timeline is activated.
|
||||
fn launch_wal_receiver(
|
||||
self: &Arc<Self>,
|
||||
ctx: &RequestContext,
|
||||
broker_client: BrokerClientChannel,
|
||||
) -> anyhow::Result<()> {
|
||||
) {
|
||||
info!(
|
||||
"launching WAL receiver for timeline {} of tenant {}",
|
||||
self.timeline_id, self.tenant_id
|
||||
);
|
||||
self.walreceiver.start(ctx, broker_client)?;
|
||||
Ok(())
|
||||
|
||||
let tenant_conf_guard = self.tenant_conf.read().unwrap();
|
||||
let wal_connect_timeout = tenant_conf_guard
|
||||
.walreceiver_connect_timeout
|
||||
.unwrap_or(self.conf.default_tenant_conf.walreceiver_connect_timeout);
|
||||
let lagging_wal_timeout = tenant_conf_guard
|
||||
.lagging_wal_timeout
|
||||
.unwrap_or(self.conf.default_tenant_conf.lagging_wal_timeout);
|
||||
let max_lsn_wal_lag = tenant_conf_guard
|
||||
.max_lsn_wal_lag
|
||||
.unwrap_or(self.conf.default_tenant_conf.max_lsn_wal_lag);
|
||||
drop(tenant_conf_guard);
|
||||
|
||||
let mut guard = self.walreceiver.lock().unwrap();
|
||||
assert!(
|
||||
guard.is_none(),
|
||||
"multiple launches / re-launches of WAL receiver are not supported"
|
||||
);
|
||||
*guard = Some(WalReceiver::start(
|
||||
Arc::clone(self),
|
||||
WalReceiverConf {
|
||||
wal_connect_timeout,
|
||||
lagging_wal_timeout,
|
||||
max_lsn_wal_lag,
|
||||
auth_token: crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(),
|
||||
availability_zone: self.conf.availability_zone.clone(),
|
||||
},
|
||||
broker_client,
|
||||
ctx,
|
||||
));
|
||||
}
|
||||
|
||||
///
|
||||
@@ -2155,7 +2174,7 @@ impl Timeline {
|
||||
fn delete_historic_layer(
|
||||
&self,
|
||||
// we cannot remove layers otherwise, since gc and compaction will race
|
||||
_layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
|
||||
_layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
|
||||
layer: Arc<dyn PersistentLayer>,
|
||||
updates: &mut BatchedUpdates<'_, dyn PersistentLayer>,
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -2619,7 +2638,7 @@ impl Timeline {
|
||||
|
||||
/// Layer flusher task's main loop.
|
||||
async fn flush_loop(
|
||||
&self,
|
||||
self: &Arc<Self>,
|
||||
mut layer_flush_start_rx: tokio::sync::watch::Receiver<u64>,
|
||||
ctx: &RequestContext,
|
||||
) {
|
||||
@@ -2710,7 +2729,7 @@ impl Timeline {
|
||||
/// Flush one frozen in-memory layer to disk, as a new delta layer.
|
||||
#[instrument(skip(self, frozen_layer, ctx), fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id, layer=%frozen_layer.short_id()))]
|
||||
async fn flush_frozen_layer(
|
||||
&self,
|
||||
self: &Arc<Self>,
|
||||
frozen_layer: Arc<InMemoryLayer>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -2730,7 +2749,11 @@ impl Timeline {
|
||||
.await?
|
||||
} else {
|
||||
// normal case, write out a L0 delta layer file.
|
||||
let (delta_path, metadata) = self.create_delta_layer(&frozen_layer)?;
|
||||
let this = self.clone();
|
||||
let frozen_layer = frozen_layer.clone();
|
||||
let (delta_path, metadata) =
|
||||
tokio::task::spawn_blocking(move || this.create_delta_layer(&frozen_layer))
|
||||
.await??;
|
||||
HashMap::from([(delta_path, metadata)])
|
||||
};
|
||||
|
||||
@@ -2834,7 +2857,7 @@ impl Timeline {
|
||||
|
||||
// Write out the given frozen in-memory layer as a new L0 delta file
|
||||
fn create_delta_layer(
|
||||
&self,
|
||||
self: &Arc<Self>,
|
||||
frozen_layer: &InMemoryLayer,
|
||||
) -> anyhow::Result<(LayerFileName, LayerFileMetadata)> {
|
||||
// Write it out
|
||||
@@ -2850,10 +2873,13 @@ impl Timeline {
|
||||
// TODO: If we're running inside 'flush_frozen_layers' and there are multiple
|
||||
// files to flush, it might be better to first write them all, and then fsync
|
||||
// them all in parallel.
|
||||
par_fsync::par_fsync(&[
|
||||
new_delta_path.clone(),
|
||||
self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
||||
])?;
|
||||
|
||||
// First sync the delta layer. We still use par_fsync here to keep everything consistent. Feel free to replace
|
||||
// this with a single fsync in future refactors.
|
||||
par_fsync::par_fsync(&[new_delta_path.clone()]).context("fsync of delta layer")?;
|
||||
// Then sync the parent directory.
|
||||
par_fsync::par_fsync(&[self.conf.timeline_path(&self.timeline_id, &self.tenant_id)])
|
||||
.context("fsync of timeline dir")?;
|
||||
|
||||
// Add it to the layer map
|
||||
let l = Arc::new(new_delta);
|
||||
@@ -3077,11 +3103,15 @@ impl Timeline {
|
||||
let all_paths = image_layers
|
||||
.iter()
|
||||
.map(|layer| layer.path())
|
||||
.chain(std::iter::once(
|
||||
self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
||||
))
|
||||
.collect::<Vec<_>>();
|
||||
par_fsync::par_fsync(&all_paths).context("fsync of newly created layer files")?;
|
||||
|
||||
par_fsync::par_fsync_async(&all_paths)
|
||||
.await
|
||||
.context("fsync of newly created layer files")?;
|
||||
|
||||
par_fsync::par_fsync_async(&[self.conf.timeline_path(&self.timeline_id, &self.tenant_id)])
|
||||
.await
|
||||
.context("fsync of timeline dir")?;
|
||||
|
||||
let mut layer_paths_to_upload = HashMap::with_capacity(image_layers.len());
|
||||
|
||||
@@ -3146,9 +3176,9 @@ impl Timeline {
|
||||
/// This method takes the `_layer_removal_cs` guard to highlight it required downloads are
|
||||
/// returned as an error. If the `layer_removal_cs` boundary is changed not to be taken in the
|
||||
/// start of level0 files compaction, the on-demand download should be revisited as well.
|
||||
async fn compact_level0_phase1(
|
||||
fn compact_level0_phase1(
|
||||
&self,
|
||||
_layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
|
||||
_layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
|
||||
target_file_size: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<CompactLevel0Phase1Result, CompactionError> {
|
||||
@@ -3461,22 +3491,18 @@ impl Timeline {
|
||||
if !new_layers.is_empty() {
|
||||
let mut layer_paths: Vec<PathBuf> = new_layers.iter().map(|l| l.path()).collect();
|
||||
|
||||
// also sync the directory
|
||||
layer_paths.push(self.conf.timeline_path(&self.timeline_id, &self.tenant_id));
|
||||
|
||||
// Fsync all the layer files and directory using multiple threads to
|
||||
// minimize latency.
|
||||
par_fsync::par_fsync(&layer_paths).context("fsync all new layers")?;
|
||||
|
||||
par_fsync::par_fsync(&[self.conf.timeline_path(&self.timeline_id, &self.tenant_id)])
|
||||
.context("fsync of timeline dir")?;
|
||||
|
||||
layer_paths.pop().unwrap();
|
||||
}
|
||||
|
||||
drop(all_keys_iter); // So that deltas_to_compact is no longer borrowed
|
||||
|
||||
fail_point!("compact-level0-phase1-finish", |_| {
|
||||
Err(anyhow::anyhow!("failpoint compact-level0-phase1-finish").into())
|
||||
});
|
||||
|
||||
Ok(CompactLevel0Phase1Result {
|
||||
new_layers,
|
||||
deltas_to_compact,
|
||||
@@ -3488,17 +3514,22 @@ impl Timeline {
|
||||
/// as Level 1 files.
|
||||
///
|
||||
async fn compact_level0(
|
||||
&self,
|
||||
layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
|
||||
self: &Arc<Self>,
|
||||
layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
|
||||
target_file_size: u64,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), CompactionError> {
|
||||
let this = self.clone();
|
||||
let ctx_inner = ctx.clone();
|
||||
let layer_removal_cs_inner = layer_removal_cs.clone();
|
||||
let CompactLevel0Phase1Result {
|
||||
new_layers,
|
||||
deltas_to_compact,
|
||||
} = self
|
||||
.compact_level0_phase1(layer_removal_cs, target_file_size, ctx)
|
||||
.await?;
|
||||
} = tokio::task::spawn_blocking(move || {
|
||||
this.compact_level0_phase1(layer_removal_cs_inner, target_file_size, &ctx_inner)
|
||||
})
|
||||
.await
|
||||
.unwrap()?;
|
||||
|
||||
if new_layers.is_empty() && deltas_to_compact.is_empty() {
|
||||
// nothing to do
|
||||
@@ -3556,7 +3587,7 @@ impl Timeline {
|
||||
let mut layer_names_to_delete = Vec::with_capacity(deltas_to_compact.len());
|
||||
for l in deltas_to_compact {
|
||||
layer_names_to_delete.push(l.filename());
|
||||
self.delete_historic_layer(layer_removal_cs, l, &mut updates)?;
|
||||
self.delete_historic_layer(layer_removal_cs.clone(), l, &mut updates)?;
|
||||
}
|
||||
updates.flush();
|
||||
drop(layers);
|
||||
@@ -3676,7 +3707,7 @@ impl Timeline {
|
||||
|
||||
fail_point!("before-timeline-gc");
|
||||
|
||||
let layer_removal_cs = self.layer_removal_cs.lock().await;
|
||||
let layer_removal_cs = Arc::new(self.layer_removal_cs.clone().lock_owned().await);
|
||||
// Is the timeline being deleted?
|
||||
let state = *self.state.borrow();
|
||||
if state == TimelineState::Stopping {
|
||||
@@ -3696,7 +3727,7 @@ impl Timeline {
|
||||
|
||||
let res = self
|
||||
.gc_timeline(
|
||||
&layer_removal_cs,
|
||||
layer_removal_cs.clone(),
|
||||
horizon_cutoff,
|
||||
pitr_cutoff,
|
||||
retain_lsns,
|
||||
@@ -3715,7 +3746,7 @@ impl Timeline {
|
||||
|
||||
async fn gc_timeline(
|
||||
&self,
|
||||
layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
|
||||
layer_removal_cs: Arc<tokio::sync::OwnedMutexGuard<()>>,
|
||||
horizon_cutoff: Lsn,
|
||||
pitr_cutoff: Lsn,
|
||||
retain_lsns: Vec<Lsn>,
|
||||
@@ -3888,7 +3919,11 @@ impl Timeline {
|
||||
{
|
||||
for doomed_layer in layers_to_remove {
|
||||
layer_names_to_delete.push(doomed_layer.filename());
|
||||
self.delete_historic_layer(layer_removal_cs, doomed_layer, &mut updates)?; // FIXME: schedule succeeded deletions before returning?
|
||||
self.delete_historic_layer(
|
||||
layer_removal_cs.clone(),
|
||||
doomed_layer,
|
||||
&mut updates,
|
||||
)?; // FIXME: schedule succeeded deletions before returning?
|
||||
result.layers_removed += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,16 +29,14 @@ use crate::tenant::timeline::walreceiver::connection_manager::{
|
||||
connection_manager_loop_step, ConnectionManagerState,
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use std::future::Future;
|
||||
use std::num::NonZeroU64;
|
||||
use std::ops::ControlFlow;
|
||||
use std::sync::atomic::{self, AtomicBool};
|
||||
use std::sync::{Arc, Weak};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use tokio::select;
|
||||
use tokio::sync::{watch, RwLock};
|
||||
use tokio::sync::watch;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
|
||||
@@ -62,46 +60,23 @@ pub struct WalReceiverConf {
|
||||
|
||||
pub struct WalReceiver {
|
||||
timeline: TenantTimelineId,
|
||||
timeline_ref: Weak<Timeline>,
|
||||
conf: WalReceiverConf,
|
||||
started: AtomicBool,
|
||||
manager_status: Arc<RwLock<Option<ConnectionManagerStatus>>>,
|
||||
manager_status: Arc<std::sync::RwLock<Option<ConnectionManagerStatus>>>,
|
||||
}
|
||||
|
||||
impl WalReceiver {
|
||||
pub fn new(
|
||||
timeline: TenantTimelineId,
|
||||
timeline_ref: Weak<Timeline>,
|
||||
conf: WalReceiverConf,
|
||||
) -> Self {
|
||||
Self {
|
||||
timeline,
|
||||
timeline_ref,
|
||||
conf,
|
||||
started: AtomicBool::new(false),
|
||||
manager_status: Arc::new(RwLock::new(None)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
timeline: Arc<Timeline>,
|
||||
conf: WalReceiverConf,
|
||||
mut broker_client: BrokerClientChannel,
|
||||
) -> anyhow::Result<()> {
|
||||
if self.started.load(atomic::Ordering::Acquire) {
|
||||
anyhow::bail!("Wal receiver is already started");
|
||||
}
|
||||
|
||||
let timeline = self.timeline_ref.upgrade().with_context(|| {
|
||||
format!("walreceiver start on a dropped timeline {}", self.timeline)
|
||||
})?;
|
||||
|
||||
ctx: &RequestContext,
|
||||
) -> Self {
|
||||
let tenant_id = timeline.tenant_id;
|
||||
let timeline_id = timeline.timeline_id;
|
||||
let walreceiver_ctx =
|
||||
ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
|
||||
let wal_receiver_conf = self.conf.clone();
|
||||
let loop_status = Arc::clone(&self.manager_status);
|
||||
|
||||
let loop_status = Arc::new(std::sync::RwLock::new(None));
|
||||
let manager_status = Arc::clone(&loop_status);
|
||||
task_mgr::spawn(
|
||||
WALRECEIVER_RUNTIME.handle(),
|
||||
TaskKind::WalReceiverManager,
|
||||
@@ -113,7 +88,7 @@ impl WalReceiver {
|
||||
info!("WAL receiver manager started, connecting to broker");
|
||||
let mut connection_manager_state = ConnectionManagerState::new(
|
||||
timeline,
|
||||
wal_receiver_conf,
|
||||
conf,
|
||||
);
|
||||
loop {
|
||||
select! {
|
||||
@@ -137,29 +112,29 @@ impl WalReceiver {
|
||||
}
|
||||
|
||||
connection_manager_state.shutdown().await;
|
||||
*loop_status.write().await = None;
|
||||
*loop_status.write().unwrap() = None;
|
||||
Ok(())
|
||||
}
|
||||
.instrument(info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id))
|
||||
);
|
||||
|
||||
self.started.store(true, atomic::Ordering::Release);
|
||||
|
||||
Ok(())
|
||||
Self {
|
||||
timeline: TenantTimelineId::new(tenant_id, timeline_id),
|
||||
manager_status,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn stop(&self) {
|
||||
pub async fn stop(self) {
|
||||
task_mgr::shutdown_tasks(
|
||||
Some(TaskKind::WalReceiverManager),
|
||||
Some(self.timeline.tenant_id),
|
||||
Some(self.timeline.timeline_id),
|
||||
)
|
||||
.await;
|
||||
self.started.store(false, atomic::Ordering::Release);
|
||||
}
|
||||
|
||||
pub(super) async fn status(&self) -> Option<ConnectionManagerStatus> {
|
||||
self.manager_status.read().await.clone()
|
||||
pub(super) fn status(&self) -> Option<ConnectionManagerStatus> {
|
||||
self.manager_status.read().unwrap().clone()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,6 @@ use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use storage_broker::Streaming;
|
||||
use tokio::select;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::*;
|
||||
|
||||
use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
|
||||
@@ -48,7 +47,7 @@ pub(super) async fn connection_manager_loop_step(
|
||||
broker_client: &mut BrokerClientChannel,
|
||||
connection_manager_state: &mut ConnectionManagerState,
|
||||
ctx: &RequestContext,
|
||||
manager_status: &RwLock<Option<ConnectionManagerStatus>>,
|
||||
manager_status: &std::sync::RwLock<Option<ConnectionManagerStatus>>,
|
||||
) -> ControlFlow<(), ()> {
|
||||
match connection_manager_state
|
||||
.timeline
|
||||
@@ -195,7 +194,7 @@ pub(super) async fn connection_manager_loop_step(
|
||||
.change_connection(new_candidate, ctx)
|
||||
.await
|
||||
}
|
||||
*manager_status.write().await = Some(connection_manager_state.manager_status());
|
||||
*manager_status.write().unwrap() = Some(connection_manager_state.manager_status());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -11,10 +11,12 @@ OBJS = \
|
||||
pagestore_smgr.o \
|
||||
relsize_cache.o \
|
||||
walproposer.o \
|
||||
walproposer_utils.o
|
||||
walproposer_utils.o \
|
||||
control_plane_connector.o
|
||||
|
||||
PG_CPPFLAGS = -I$(libpq_srcdir)
|
||||
SHLIB_LINK_INTERNAL = $(libpq)
|
||||
SHLIB_LINK = -lcurl
|
||||
|
||||
EXTENSION = neon
|
||||
DATA = neon--1.0.sql
|
||||
|
||||
830
pgxn/neon/control_plane_connector.c
Normal file
830
pgxn/neon/control_plane_connector.c
Normal file
@@ -0,0 +1,830 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* control_plane_connector.c
|
||||
* Captures updates to roles/databases using ProcessUtility_hook and
|
||||
* sends them to the control ProcessUtility_hook. The changes are sent
|
||||
* via HTTP to the URL specified by the GUC neon.console_url when the
|
||||
* transaction commits. Forwarding may be disabled temporarily by
|
||||
* setting neon.forward_ddl to false.
|
||||
*
|
||||
* Currently, the transaction may abort AFTER
|
||||
* changes have already been forwarded, and that case is not handled.
|
||||
* Subtransactions are handled using a stack of hash tables, which
|
||||
* accumulate changes. On subtransaction commit, the top of the stack
|
||||
* is merged with the table below it.
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* contrib/neon/control_plane_connector.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
#include "tcop/pquery.h"
|
||||
#include "tcop/utility.h"
|
||||
#include "access/xact.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "miscadmin.h"
|
||||
#include "utils/acl.h"
|
||||
#include "fmgr.h"
|
||||
#include "utils/guc.h"
|
||||
#include "port.h"
|
||||
#include <curl/curl.h>
|
||||
#include "utils/jsonb.h"
|
||||
|
||||
static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;
|
||||
|
||||
/* GUCs */
|
||||
static char *ConsoleURL = NULL;
|
||||
static bool ForwardDDL = true;
|
||||
|
||||
/* Curl structures for sending the HTTP requests */
|
||||
static CURL * CurlHandle;
|
||||
static struct curl_slist *ContentHeader = NULL;
|
||||
|
||||
/*
|
||||
* CURL docs say that this buffer must exist until we call curl_easy_cleanup
|
||||
* (which we never do), so we make this a static
|
||||
*/
|
||||
static char CurlErrorBuf[CURL_ERROR_SIZE];
|
||||
|
||||
typedef enum
|
||||
{
|
||||
Op_Set, /* An upsert: Either a creation or an alter */
|
||||
Op_Delete,
|
||||
} OpType;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char name[NAMEDATALEN];
|
||||
Oid owner;
|
||||
char old_name[NAMEDATALEN];
|
||||
OpType type;
|
||||
} DbEntry;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char name[NAMEDATALEN];
|
||||
char old_name[NAMEDATALEN];
|
||||
const char *password;
|
||||
OpType type;
|
||||
} RoleEntry;
|
||||
|
||||
/*
|
||||
* We keep one of these for each subtransaction in a stack. When a subtransaction
|
||||
* commits, we merge the top of the stack into the table below it. It is allocated in the
|
||||
* subtransaction's context.
|
||||
*/
|
||||
typedef struct DdlHashTable
|
||||
{
|
||||
struct DdlHashTable *prev_table;
|
||||
HTAB *db_table;
|
||||
HTAB *role_table;
|
||||
} DdlHashTable;
|
||||
|
||||
static DdlHashTable RootTable;
|
||||
static DdlHashTable * CurrentDdlTable = &RootTable;
|
||||
|
||||
static void
|
||||
PushKeyValue(JsonbParseState **state, char *key, char *value)
|
||||
{
|
||||
JsonbValue k,
|
||||
v;
|
||||
|
||||
k.type = jbvString;
|
||||
k.val.string.len = strlen(key);
|
||||
k.val.string.val = key;
|
||||
v.type = jbvString;
|
||||
v.val.string.len = strlen(value);
|
||||
v.val.string.val = value;
|
||||
pushJsonbValue(state, WJB_KEY, &k);
|
||||
pushJsonbValue(state, WJB_VALUE, &v);
|
||||
}
|
||||
|
||||
static char *
|
||||
ConstructDeltaMessage()
|
||||
{
|
||||
JsonbParseState *state = NULL;
|
||||
|
||||
pushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL);
|
||||
if (RootTable.db_table)
|
||||
{
|
||||
JsonbValue dbs;
|
||||
|
||||
dbs.type = jbvString;
|
||||
dbs.val.string.val = "dbs";
|
||||
dbs.val.string.len = strlen(dbs.val.string.val);
|
||||
pushJsonbValue(&state, WJB_KEY, &dbs);
|
||||
pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);
|
||||
|
||||
HASH_SEQ_STATUS status;
|
||||
DbEntry *entry;
|
||||
|
||||
hash_seq_init(&status, RootTable.db_table);
|
||||
while ((entry = hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
pushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL);
|
||||
PushKeyValue(&state, "op", entry->type == Op_Set ? "set" : "del");
|
||||
PushKeyValue(&state, "name", entry->name);
|
||||
if (entry->owner != InvalidOid)
|
||||
{
|
||||
PushKeyValue(&state, "owner", GetUserNameFromId(entry->owner, false));
|
||||
}
|
||||
if (entry->old_name[0] != '\0')
|
||||
{
|
||||
PushKeyValue(&state, "old_name", entry->old_name);
|
||||
}
|
||||
pushJsonbValue(&state, WJB_END_OBJECT, NULL);
|
||||
}
|
||||
pushJsonbValue(&state, WJB_END_ARRAY, NULL);
|
||||
}
|
||||
|
||||
if (RootTable.role_table)
|
||||
{
|
||||
JsonbValue roles;
|
||||
|
||||
roles.type = jbvString;
|
||||
roles.val.string.val = "roles";
|
||||
roles.val.string.len = strlen(roles.val.string.val);
|
||||
pushJsonbValue(&state, WJB_KEY, &roles);
|
||||
pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);
|
||||
|
||||
HASH_SEQ_STATUS status;
|
||||
RoleEntry *entry;
|
||||
|
||||
hash_seq_init(&status, RootTable.role_table);
|
||||
while ((entry = hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
pushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL);
|
||||
PushKeyValue(&state, "op", entry->type == Op_Set ? "set" : "del");
|
||||
PushKeyValue(&state, "name", entry->name);
|
||||
if (entry->password)
|
||||
{
|
||||
PushKeyValue(&state, "password", (char *) entry->password);
|
||||
}
|
||||
if (entry->old_name[0] != '\0')
|
||||
{
|
||||
PushKeyValue(&state, "old_name", entry->old_name);
|
||||
}
|
||||
pushJsonbValue(&state, WJB_END_OBJECT, NULL);
|
||||
}
|
||||
pushJsonbValue(&state, WJB_END_ARRAY, NULL);
|
||||
}
|
||||
JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);
|
||||
Jsonb *jsonb = JsonbValueToJsonb(result);
|
||||
|
||||
return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );
|
||||
}
|
||||
|
||||
#define ERROR_SIZE 1024
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char str[ERROR_SIZE];
|
||||
size_t size;
|
||||
} ErrorString;
|
||||
|
||||
static size_t
|
||||
ErrorWriteCallback(char *ptr, size_t size, size_t nmemb, void *userdata)
|
||||
{
|
||||
/* Docs say size is always 1 */
|
||||
ErrorString *str = userdata;
|
||||
|
||||
size_t to_write = nmemb;
|
||||
|
||||
/* +1 for null terminator */
|
||||
if (str->size + nmemb + 1 >= ERROR_SIZE)
|
||||
to_write = ERROR_SIZE - str->size - 1;
|
||||
|
||||
/* Ignore everyrthing past the first ERROR_SIZE bytes */
|
||||
if (to_write == 0)
|
||||
return nmemb;
|
||||
memcpy(str->str + str->size, ptr, to_write);
|
||||
str->size += to_write;
|
||||
str->str[str->size] = '\0';
|
||||
return nmemb;
|
||||
}
|
||||
|
||||
static void
|
||||
SendDeltasToControlPlane()
|
||||
{
|
||||
if (!RootTable.db_table && !RootTable.role_table)
|
||||
return;
|
||||
if (!ConsoleURL)
|
||||
{
|
||||
elog(LOG, "ConsoleURL not set, skipping forwarding");
|
||||
return;
|
||||
}
|
||||
if (!ForwardDDL)
|
||||
return;
|
||||
|
||||
char *message = ConstructDeltaMessage();
|
||||
ErrorString str = {};
|
||||
|
||||
curl_easy_setopt(CurlHandle, CURLOPT_CUSTOMREQUEST, "PATCH");
|
||||
curl_easy_setopt(CurlHandle, CURLOPT_HTTPHEADER, ContentHeader);
|
||||
curl_easy_setopt(CurlHandle, CURLOPT_POSTFIELDS, message);
|
||||
curl_easy_setopt(CurlHandle, CURLOPT_URL, ConsoleURL);
|
||||
curl_easy_setopt(CurlHandle, CURLOPT_ERRORBUFFER, CurlErrorBuf);
|
||||
curl_easy_setopt(CurlHandle, CURLOPT_TIMEOUT, 3L /* seconds */ );
|
||||
curl_easy_setopt(CurlHandle, CURLOPT_WRITEDATA, &str);
|
||||
curl_easy_setopt(CurlHandle, CURLOPT_WRITEFUNCTION, ErrorWriteCallback);
|
||||
|
||||
const int num_retries = 5;
|
||||
int curl_status;
|
||||
|
||||
for (int i = 0; i < num_retries; i++)
|
||||
{
|
||||
if ((curl_status = curl_easy_perform(CurlHandle)) == 0)
|
||||
break;
|
||||
elog(LOG, "Curl request failed on attempt %d: %s", i, CurlErrorBuf);
|
||||
pg_usleep(1000 * 1000);
|
||||
}
|
||||
if (curl_status != 0)
|
||||
{
|
||||
elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
|
||||
}
|
||||
else
|
||||
{
|
||||
long response_code;
|
||||
|
||||
if (curl_easy_getinfo(CurlHandle, CURLINFO_RESPONSE_CODE, &response_code) != CURLE_UNKNOWN_OPTION)
|
||||
{
|
||||
bool error_exists = str.size != 0;
|
||||
|
||||
if (response_code != 200)
|
||||
{
|
||||
if (error_exists)
|
||||
{
|
||||
elog(ERROR,
|
||||
"Received HTTP code %ld from control plane: %s",
|
||||
response_code,
|
||||
str.str);
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(ERROR,
|
||||
"Received HTTP code %ld from control plane",
|
||||
response_code);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
InitDbTableIfNeeded()
|
||||
{
|
||||
if (!CurrentDdlTable->db_table)
|
||||
{
|
||||
HASHCTL db_ctl = {};
|
||||
|
||||
db_ctl.keysize = NAMEDATALEN;
|
||||
db_ctl.entrysize = sizeof(DbEntry);
|
||||
db_ctl.hcxt = CurTransactionContext;
|
||||
CurrentDdlTable->db_table = hash_create(
|
||||
"Dbs Created",
|
||||
4,
|
||||
&db_ctl,
|
||||
HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
InitRoleTableIfNeeded()
|
||||
{
|
||||
if (!CurrentDdlTable->role_table)
|
||||
{
|
||||
HASHCTL role_ctl = {};
|
||||
|
||||
role_ctl.keysize = NAMEDATALEN;
|
||||
role_ctl.entrysize = sizeof(RoleEntry);
|
||||
role_ctl.hcxt = CurTransactionContext;
|
||||
CurrentDdlTable->role_table = hash_create(
|
||||
"Roles Created",
|
||||
4,
|
||||
&role_ctl,
|
||||
HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
PushTable()
|
||||
{
|
||||
DdlHashTable *new_table = MemoryContextAlloc(CurTransactionContext, sizeof(DdlHashTable));
|
||||
|
||||
new_table->prev_table = CurrentDdlTable;
|
||||
new_table->role_table = NULL;
|
||||
new_table->db_table = NULL;
|
||||
CurrentDdlTable = new_table;
|
||||
}
|
||||
|
||||
static void
|
||||
MergeTable()
|
||||
{
|
||||
DdlHashTable *old_table = CurrentDdlTable;
|
||||
|
||||
CurrentDdlTable = old_table->prev_table;
|
||||
|
||||
if (old_table->db_table)
|
||||
{
|
||||
InitDbTableIfNeeded();
|
||||
DbEntry *entry;
|
||||
HASH_SEQ_STATUS status;
|
||||
|
||||
hash_seq_init(&status, old_table->db_table);
|
||||
while ((entry = hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
DbEntry *to_write = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
entry->name,
|
||||
HASH_ENTER,
|
||||
NULL);
|
||||
|
||||
to_write->type = entry->type;
|
||||
if (entry->owner != InvalidOid)
|
||||
to_write->owner = entry->owner;
|
||||
strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);
|
||||
if (entry->old_name[0] != '\0')
|
||||
{
|
||||
bool found_old = false;
|
||||
DbEntry *old = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
entry->old_name,
|
||||
HASH_FIND,
|
||||
&found_old);
|
||||
|
||||
if (found_old)
|
||||
{
|
||||
if (old->old_name[0] != '\0')
|
||||
strlcpy(to_write->old_name, old->old_name, NAMEDATALEN);
|
||||
else
|
||||
strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);
|
||||
hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
entry->old_name,
|
||||
HASH_REMOVE,
|
||||
NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
hash_destroy(old_table->db_table);
|
||||
}
|
||||
|
||||
if (old_table->role_table)
|
||||
{
|
||||
InitRoleTableIfNeeded();
|
||||
RoleEntry *entry;
|
||||
HASH_SEQ_STATUS status;
|
||||
|
||||
hash_seq_init(&status, old_table->role_table);
|
||||
while ((entry = hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
RoleEntry *to_write = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
entry->name,
|
||||
HASH_ENTER,
|
||||
NULL);
|
||||
|
||||
to_write->type = entry->type;
|
||||
if (entry->password)
|
||||
to_write->password = entry->password;
|
||||
strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);
|
||||
if (entry->old_name[0] != '\0')
|
||||
{
|
||||
bool found_old = false;
|
||||
RoleEntry *old = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
entry->old_name,
|
||||
HASH_FIND,
|
||||
&found_old);
|
||||
|
||||
if (found_old)
|
||||
{
|
||||
if (old->old_name[0] != '\0')
|
||||
strlcpy(to_write->old_name, old->old_name, NAMEDATALEN);
|
||||
else
|
||||
strlcpy(to_write->old_name, entry->old_name, NAMEDATALEN);
|
||||
hash_search(CurrentDdlTable->role_table,
|
||||
entry->old_name,
|
||||
HASH_REMOVE,
|
||||
NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
hash_destroy(old_table->role_table);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
PopTable()
|
||||
{
|
||||
/*
|
||||
* Current table gets freed because it is allocated in aborted
|
||||
* subtransaction's memory context.
|
||||
*/
|
||||
CurrentDdlTable = CurrentDdlTable->prev_table;
|
||||
}
|
||||
|
||||
static void
|
||||
NeonSubXactCallback(
|
||||
SubXactEvent event,
|
||||
SubTransactionId mySubid,
|
||||
SubTransactionId parentSubid,
|
||||
void *arg)
|
||||
{
|
||||
switch (event)
|
||||
{
|
||||
case SUBXACT_EVENT_START_SUB:
|
||||
return PushTable();
|
||||
case SUBXACT_EVENT_COMMIT_SUB:
|
||||
return MergeTable();
|
||||
case SUBXACT_EVENT_ABORT_SUB:
|
||||
return PopTable();
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
NeonXactCallback(XactEvent event, void *arg)
|
||||
{
|
||||
if (event == XACT_EVENT_PRE_COMMIT || event == XACT_EVENT_PARALLEL_PRE_COMMIT)
|
||||
{
|
||||
SendDeltasToControlPlane();
|
||||
}
|
||||
RootTable.role_table = NULL;
|
||||
RootTable.db_table = NULL;
|
||||
Assert(CurrentDdlTable == &RootTable);
|
||||
}
|
||||
|
||||
static void
|
||||
HandleCreateDb(CreatedbStmt *stmt)
|
||||
{
|
||||
InitDbTableIfNeeded();
|
||||
DefElem *downer = NULL;
|
||||
ListCell *option;
|
||||
|
||||
foreach(option, stmt->options)
|
||||
{
|
||||
DefElem *defel = lfirst(option);
|
||||
|
||||
if (strcmp(defel->defname, "owner") == 0)
|
||||
downer = defel;
|
||||
}
|
||||
bool found = false;
|
||||
DbEntry *entry = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
stmt->dbname,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
|
||||
if (!found)
|
||||
memset(entry->old_name, 0, sizeof(entry->old_name));
|
||||
|
||||
entry->type = Op_Set;
|
||||
if (downer && downer->arg)
|
||||
entry->owner = get_role_oid(defGetString(downer), false);
|
||||
else
|
||||
entry->owner = GetUserId();
|
||||
}
|
||||
|
||||
static void
|
||||
HandleAlterOwner(AlterOwnerStmt *stmt)
|
||||
{
|
||||
if (stmt->objectType != OBJECT_DATABASE)
|
||||
return;
|
||||
InitDbTableIfNeeded();
|
||||
const char *name = strVal(stmt->object);
|
||||
bool found = false;
|
||||
DbEntry *entry = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
name,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
|
||||
if (!found)
|
||||
memset(entry->old_name, 0, sizeof(entry->old_name));
|
||||
|
||||
entry->owner = get_role_oid(get_rolespec_name(stmt->newowner), false);
|
||||
entry->type = Op_Set;
|
||||
}
|
||||
|
||||
static void
|
||||
HandleDbRename(RenameStmt *stmt)
|
||||
{
|
||||
Assert(stmt->renameType == OBJECT_DATABASE);
|
||||
InitDbTableIfNeeded();
|
||||
bool found = false;
|
||||
DbEntry *entry = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
stmt->subname,
|
||||
HASH_FIND,
|
||||
&found);
|
||||
DbEntry *entry_for_new_name = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
stmt->newname,
|
||||
HASH_ENTER,
|
||||
NULL);
|
||||
|
||||
entry_for_new_name->type = Op_Set;
|
||||
if (found)
|
||||
{
|
||||
if (entry->old_name[0] != '\0')
|
||||
strlcpy(entry_for_new_name->old_name, entry->old_name, NAMEDATALEN);
|
||||
else
|
||||
strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);
|
||||
entry_for_new_name->owner = entry->owner;
|
||||
hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
stmt->subname,
|
||||
HASH_REMOVE,
|
||||
NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
strlcpy(entry_for_new_name->old_name, stmt->subname, NAMEDATALEN);
|
||||
entry_for_new_name->owner = InvalidOid;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
HandleDropDb(DropdbStmt *stmt)
|
||||
{
|
||||
InitDbTableIfNeeded();
|
||||
bool found = false;
|
||||
DbEntry *entry = hash_search(
|
||||
CurrentDdlTable->db_table,
|
||||
stmt->dbname,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
|
||||
entry->type = Op_Delete;
|
||||
entry->owner = InvalidOid;
|
||||
if (!found)
|
||||
memset(entry->old_name, 0, sizeof(entry->old_name));
|
||||
}
|
||||
|
||||
static void
|
||||
HandleCreateRole(CreateRoleStmt *stmt)
|
||||
{
|
||||
InitRoleTableIfNeeded();
|
||||
bool found = false;
|
||||
RoleEntry *entry = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
stmt->role,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
DefElem *dpass = NULL;
|
||||
ListCell *option;
|
||||
|
||||
foreach(option, stmt->options)
|
||||
{
|
||||
DefElem *defel = lfirst(option);
|
||||
|
||||
if (strcmp(defel->defname, "password") == 0)
|
||||
dpass = defel;
|
||||
}
|
||||
if (!found)
|
||||
memset(entry->old_name, 0, sizeof(entry->old_name));
|
||||
if (dpass && dpass->arg)
|
||||
entry->password = MemoryContextStrdup(CurTransactionContext, strVal(dpass->arg));
|
||||
else
|
||||
entry->password = NULL;
|
||||
entry->type = Op_Set;
|
||||
}
|
||||
|
||||
static void
|
||||
HandleAlterRole(AlterRoleStmt *stmt)
|
||||
{
|
||||
InitRoleTableIfNeeded();
|
||||
DefElem *dpass = NULL;
|
||||
ListCell *option;
|
||||
|
||||
foreach(option, stmt->options)
|
||||
{
|
||||
DefElem *defel = lfirst(option);
|
||||
|
||||
if (strcmp(defel->defname, "password") == 0)
|
||||
dpass = defel;
|
||||
}
|
||||
/* We only care about updates to the password */
|
||||
if (!dpass)
|
||||
return;
|
||||
bool found = false;
|
||||
RoleEntry *entry = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
stmt->role->rolename,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
|
||||
if (!found)
|
||||
memset(entry->old_name, 0, sizeof(entry->old_name));
|
||||
if (dpass->arg)
|
||||
entry->password = MemoryContextStrdup(CurTransactionContext, strVal(dpass->arg));
|
||||
else
|
||||
entry->password = NULL;
|
||||
entry->type = Op_Set;
|
||||
}
|
||||
|
||||
static void
|
||||
HandleRoleRename(RenameStmt *stmt)
|
||||
{
|
||||
InitRoleTableIfNeeded();
|
||||
Assert(stmt->renameType == OBJECT_ROLE);
|
||||
bool found = false;
|
||||
RoleEntry *entry = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
stmt->subname,
|
||||
HASH_FIND,
|
||||
&found);
|
||||
|
||||
RoleEntry *entry_for_new_name = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
stmt->newname,
|
||||
HASH_ENTER,
|
||||
NULL);
|
||||
|
||||
entry_for_new_name->type = Op_Set;
|
||||
if (found)
|
||||
{
|
||||
if (entry->old_name[0] != '\0')
|
||||
strlcpy(entry_for_new_name->old_name, entry->old_name, NAMEDATALEN);
|
||||
else
|
||||
strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);
|
||||
entry_for_new_name->password = entry->password;
|
||||
hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
entry->name,
|
||||
HASH_REMOVE,
|
||||
NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
strlcpy(entry_for_new_name->old_name, stmt->subname, NAMEDATALEN);
|
||||
entry_for_new_name->password = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
HandleDropRole(DropRoleStmt *stmt)
|
||||
{
|
||||
InitRoleTableIfNeeded();
|
||||
ListCell *item;
|
||||
|
||||
foreach(item, stmt->roles)
|
||||
{
|
||||
RoleSpec *spec = lfirst(item);
|
||||
bool found = false;
|
||||
RoleEntry *entry = hash_search(
|
||||
CurrentDdlTable->role_table,
|
||||
spec->rolename,
|
||||
HASH_ENTER,
|
||||
&found);
|
||||
|
||||
entry->type = Op_Delete;
|
||||
entry->password = NULL;
|
||||
if (!found)
|
||||
memset(entry->old_name, 0, sizeof(entry));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
HandleRename(RenameStmt *stmt)
|
||||
{
|
||||
if (stmt->renameType == OBJECT_DATABASE)
|
||||
return HandleDbRename(stmt);
|
||||
else if (stmt->renameType == OBJECT_ROLE)
|
||||
return HandleRoleRename(stmt);
|
||||
}
|
||||
|
||||
static void
|
||||
NeonProcessUtility(
|
||||
PlannedStmt *pstmt,
|
||||
const char *queryString,
|
||||
bool readOnlyTree,
|
||||
ProcessUtilityContext context,
|
||||
ParamListInfo params,
|
||||
QueryEnvironment *queryEnv,
|
||||
DestReceiver *dest,
|
||||
QueryCompletion *qc)
|
||||
{
|
||||
Node *parseTree = pstmt->utilityStmt;
|
||||
|
||||
switch (nodeTag(parseTree))
|
||||
{
|
||||
case T_CreatedbStmt:
|
||||
HandleCreateDb(castNode(CreatedbStmt, parseTree));
|
||||
break;
|
||||
case T_AlterOwnerStmt:
|
||||
HandleAlterOwner(castNode(AlterOwnerStmt, parseTree));
|
||||
break;
|
||||
case T_RenameStmt:
|
||||
HandleRename(castNode(RenameStmt, parseTree));
|
||||
break;
|
||||
case T_DropdbStmt:
|
||||
HandleDropDb(castNode(DropdbStmt, parseTree));
|
||||
break;
|
||||
case T_CreateRoleStmt:
|
||||
HandleCreateRole(castNode(CreateRoleStmt, parseTree));
|
||||
break;
|
||||
case T_AlterRoleStmt:
|
||||
HandleAlterRole(castNode(AlterRoleStmt, parseTree));
|
||||
break;
|
||||
case T_DropRoleStmt:
|
||||
HandleDropRole(castNode(DropRoleStmt, parseTree));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (PreviousProcessUtilityHook)
|
||||
{
|
||||
PreviousProcessUtilityHook(
|
||||
pstmt,
|
||||
queryString,
|
||||
readOnlyTree,
|
||||
context,
|
||||
params,
|
||||
queryEnv,
|
||||
dest,
|
||||
qc);
|
||||
}
|
||||
else
|
||||
{
|
||||
standard_ProcessUtility(
|
||||
pstmt,
|
||||
queryString,
|
||||
readOnlyTree,
|
||||
context,
|
||||
params,
|
||||
queryEnv,
|
||||
dest,
|
||||
qc);
|
||||
}
|
||||
}
|
||||
|
||||
extern void
|
||||
InitControlPlaneConnector()
|
||||
{
|
||||
PreviousProcessUtilityHook = ProcessUtility_hook;
|
||||
ProcessUtility_hook = NeonProcessUtility;
|
||||
RegisterXactCallback(NeonXactCallback, NULL);
|
||||
RegisterSubXactCallback(NeonSubXactCallback, NULL);
|
||||
|
||||
DefineCustomStringVariable(
|
||||
"neon.console_url",
|
||||
"URL of the Neon Console, which will be forwarded changes to dbs and roles",
|
||||
NULL,
|
||||
&ConsoleURL,
|
||||
NULL,
|
||||
PGC_POSTMASTER,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
DefineCustomBoolVariable(
|
||||
"neon.forward_ddl",
|
||||
"Controls whether to forward DDL to the control plane",
|
||||
NULL,
|
||||
&ForwardDDL,
|
||||
true,
|
||||
PGC_SUSET,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
const char *jwt_token = getenv("NEON_CONTROL_PLANE_TOKEN");
|
||||
|
||||
if (!jwt_token)
|
||||
{
|
||||
elog(LOG, "Missing NEON_CONTROL_PLANE_TOKEN environment variable, forwarding will not be authenticated");
|
||||
}
|
||||
|
||||
if (curl_global_init(CURL_GLOBAL_DEFAULT))
|
||||
{
|
||||
elog(ERROR, "Failed to initialize curl");
|
||||
}
|
||||
if ((CurlHandle = curl_easy_init()) == NULL)
|
||||
{
|
||||
elog(ERROR, "Failed to initialize curl handle");
|
||||
}
|
||||
if ((ContentHeader = curl_slist_append(ContentHeader, "Content-Type: application/json")) == NULL)
|
||||
{
|
||||
elog(ERROR, "Failed to initialize content header");
|
||||
}
|
||||
|
||||
if (jwt_token)
|
||||
{
|
||||
char auth_header[8192];
|
||||
|
||||
snprintf(auth_header, sizeof(auth_header), "Authorization: Bearer %s", jwt_token);
|
||||
if ((ContentHeader = curl_slist_append(ContentHeader, auth_header)) == NULL)
|
||||
{
|
||||
elog(ERROR, "Failed to initialize authorization header");
|
||||
}
|
||||
}
|
||||
}
|
||||
6
pgxn/neon/control_plane_connector.h
Normal file
6
pgxn/neon/control_plane_connector.h
Normal file
@@ -0,0 +1,6 @@
|
||||
#ifndef CONTROL_PLANE_CONNECTOR_H
|
||||
#define CONTROL_PLANE_CONNECTOR_H
|
||||
|
||||
void InitControlPlaneConnector();
|
||||
|
||||
#endif
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "neon.h"
|
||||
#include "walproposer.h"
|
||||
#include "pagestore_client.h"
|
||||
#include "control_plane_connector.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
void _PG_init(void);
|
||||
@@ -34,7 +35,11 @@ _PG_init(void)
|
||||
{
|
||||
pg_init_libpagestore();
|
||||
pg_init_walproposer();
|
||||
InitControlPlaneConnector();
|
||||
|
||||
// Important: This must happen after other parts of the extension
|
||||
// are loaded, otherwise any settings to GUCs that were set before
|
||||
// the extension was loaded will be removed.
|
||||
EmitWarningsOnPlaceholders("neon");
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// The script parses Allure reports and posts a comment with a summary of the test results to the PR.
|
||||
// The script parses Allure reports and posts a comment with a summary of the test results to the PR or to the latest commit in the branch.
|
||||
//
|
||||
// The comment is updated on each run with the latest results.
|
||||
//
|
||||
@@ -7,7 +7,7 @@
|
||||
// - uses: actions/github-script@v6
|
||||
// with:
|
||||
// script: |
|
||||
// const script = require("./scripts/pr-comment-test-report.js")
|
||||
// const script = require("./scripts/comment-test-report.js")
|
||||
// await script({
|
||||
// github,
|
||||
// context,
|
||||
@@ -35,8 +35,12 @@ class DefaultMap extends Map {
|
||||
module.exports = async ({ github, context, fetch, report }) => {
|
||||
// Marker to find the comment in the subsequent runs
|
||||
const startMarker = `<!--AUTOMATIC COMMENT START #${context.payload.number}-->`
|
||||
// If we run the script in the PR or in the branch (main/release/...)
|
||||
const isPullRequest = !!context.payload.pull_request
|
||||
// Latest commit in PR or in the branch
|
||||
const commitSha = isPullRequest ? context.payload.pull_request.head.sha : context.sha
|
||||
// Let users know that the comment is updated automatically
|
||||
const autoupdateNotice = `<div align="right"><sub>The comment gets automatically updated with the latest test results<br>${context.payload.pull_request.head.sha} at ${new Date().toISOString()} :recycle:</sub></div>`
|
||||
const autoupdateNotice = `<div align="right"><sub>The comment gets automatically updated with the latest test results<br>${commitSha} at ${new Date().toISOString()} :recycle:</sub></div>`
|
||||
// GitHub bot id taken from (https://api.github.com/users/github-actions[bot])
|
||||
const githubActionsBotId = 41898282
|
||||
// Commend body itself
|
||||
@@ -166,22 +170,39 @@ module.exports = async ({ github, context, fetch, report }) => {
|
||||
|
||||
commentBody += autoupdateNotice
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
issue_number: context.payload.number,
|
||||
let createCommentFn, listCommentsFn, updateCommentFn, issueNumberOrSha
|
||||
if (isPullRequest) {
|
||||
createCommentFn = github.rest.issues.createComment
|
||||
listCommentsFn = github.rest.issues.listComments
|
||||
updateCommentFn = github.rest.issues.updateComment
|
||||
issueNumberOrSha = {
|
||||
issue_number: context.payload.number,
|
||||
}
|
||||
} else {
|
||||
updateCommentFn = github.rest.repos.updateCommitComment
|
||||
listCommentsFn = github.rest.repos.listCommentsForCommit
|
||||
createCommentFn = github.rest.repos.createCommitComment
|
||||
issueNumberOrSha = {
|
||||
commit_sha: commitSha,
|
||||
}
|
||||
}
|
||||
|
||||
const { data: comments } = await listCommentsFn({
|
||||
...issueNumberOrSha,
|
||||
...ownerRepoParams,
|
||||
})
|
||||
|
||||
const comment = comments.find(comment => comment.user.id === githubActionsBotId && comment.body.startsWith(startMarker))
|
||||
if (comment) {
|
||||
await github.rest.issues.updateComment({
|
||||
await updateCommentFn({
|
||||
comment_id: comment.id,
|
||||
body: commentBody,
|
||||
...ownerRepoParams,
|
||||
})
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
issue_number: context.payload.number,
|
||||
await createCommentFn({
|
||||
body: commentBody,
|
||||
...issueNumberOrSha,
|
||||
...ownerRepoParams,
|
||||
})
|
||||
}
|
||||
@@ -535,8 +535,8 @@ def export_timeline(
|
||||
|
||||
|
||||
def main(args: argparse.Namespace):
|
||||
# any psql version will do here. use current DEFAULT_PG_VERSION = 14
|
||||
psql_path = str(Path(args.pg_distrib_dir) / "v14" / "bin" / "psql")
|
||||
# any psql version will do here. use current DEFAULT_PG_VERSION = 15
|
||||
psql_path = str(Path(args.pg_distrib_dir) / "v15" / "bin" / "psql")
|
||||
|
||||
old_pageserver_host = args.old_pageserver_host
|
||||
new_pageserver_host = args.new_pageserver_host
|
||||
|
||||
@@ -40,6 +40,9 @@ pub type BrokerClientChannel = BrokerServiceClient<Channel>;
|
||||
// Create connection object configured to run TLS if schema starts with https://
|
||||
// and plain text otherwise. Connection is lazy, only endpoint sanity is
|
||||
// validated here.
|
||||
//
|
||||
// NB: this function is not async, but still must be run on a tokio runtime thread
|
||||
// because that's a requirement of tonic_endpoint.connect_lazy()'s Channel::new call.
|
||||
pub fn connect<U>(endpoint: U, keepalive_interval: Duration) -> anyhow::Result<BrokerClientChannel>
|
||||
where
|
||||
U: std::convert::TryInto<Uri>,
|
||||
|
||||
@@ -1603,8 +1603,6 @@ class NeonPageserver(PgProtocol):
|
||||
# https://github.com/neondatabase/neon/issues/2442
|
||||
".*could not remove ephemeral file.*No such file or directory.*",
|
||||
# FIXME: These need investigation
|
||||
".*gc_loop.*Failed to get a tenant .* Tenant .* not found.*",
|
||||
".*compaction_loop.*Failed to get a tenant .* Tenant .* not found.*",
|
||||
".*manual_gc.*is_shutdown_requested\\(\\) called in an unexpected task or thread.*",
|
||||
".*tenant_list: timeline is not found in remote index while it is present in the tenants registry.*",
|
||||
".*Removing intermediate uninit mark file.*",
|
||||
@@ -1621,6 +1619,8 @@ class NeonPageserver(PgProtocol):
|
||||
".*task iteration took longer than the configured period.*",
|
||||
# this is until #3501
|
||||
".*Compaction failed, retrying in [^:]+: Cannot run compaction iteration on inactive tenant",
|
||||
# these can happen anytime we do compactions from background task and shutdown pageserver
|
||||
r".*ERROR.*ancestor timeline \S+ is being stopped",
|
||||
]
|
||||
|
||||
def start(
|
||||
|
||||
@@ -155,14 +155,14 @@ class PageserverHttpClient(requests.Session):
|
||||
return res_json
|
||||
|
||||
def tenant_create(
|
||||
self, new_tenant_id: Optional[TenantId] = None, conf: Optional[Dict[str, Any]] = None
|
||||
self, new_tenant_id: TenantId, conf: Optional[Dict[str, Any]] = None
|
||||
) -> TenantId:
|
||||
if conf is not None:
|
||||
assert "new_tenant_id" not in conf.keys()
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/tenant",
|
||||
json={
|
||||
"new_tenant_id": str(new_tenant_id) if new_tenant_id else None,
|
||||
"new_tenant_id": str(new_tenant_id),
|
||||
**(conf or {}),
|
||||
},
|
||||
)
|
||||
@@ -293,13 +293,13 @@ class PageserverHttpClient(requests.Session):
|
||||
self,
|
||||
pg_version: PgVersion,
|
||||
tenant_id: TenantId,
|
||||
new_timeline_id: Optional[TimelineId] = None,
|
||||
new_timeline_id: TimelineId,
|
||||
ancestor_timeline_id: Optional[TimelineId] = None,
|
||||
ancestor_start_lsn: Optional[Lsn] = None,
|
||||
**kwargs,
|
||||
) -> Dict[Any, Any]:
|
||||
body: Dict[str, Any] = {
|
||||
"new_timeline_id": str(new_timeline_id) if new_timeline_id else None,
|
||||
"new_timeline_id": str(new_timeline_id),
|
||||
"ancestor_start_lsn": str(ancestor_start_lsn) if ancestor_start_lsn else None,
|
||||
"ancestor_timeline_id": str(ancestor_timeline_id) if ancestor_timeline_id else None,
|
||||
}
|
||||
|
||||
@@ -27,6 +27,10 @@ class PgVersion(str, enum.Enum):
|
||||
def __repr__(self) -> str:
|
||||
return f"'{self.value}'"
|
||||
|
||||
# Make this explicit for Python 3.11 compatibility, which changes the behavior of enums
|
||||
def __str__(self) -> str:
|
||||
return self.value
|
||||
|
||||
# In GitHub workflows we use Postgres version with v-prefix (e.g. v14 instead of just 14),
|
||||
# sometime we need to do so in tests.
|
||||
@property
|
||||
@@ -78,11 +82,11 @@ def pytest_addoption(parser: Parser):
|
||||
@pytest.fixture(scope="session")
|
||||
def pg_version(request: FixtureRequest) -> Iterator[PgVersion]:
|
||||
if v := request.config.getoption("--pg-version"):
|
||||
version, source = v, "from --pg-version commad-line argument"
|
||||
version, source = v, "from --pg-version command-line argument"
|
||||
elif v := os.environ.get("DEFAULT_PG_VERSION"):
|
||||
version, source = PgVersion(v), "from DEFAULT_PG_VERSION environment variable"
|
||||
else:
|
||||
version, source = DEFAULT_VERSION, "default verson"
|
||||
version, source = DEFAULT_VERSION, "default version"
|
||||
|
||||
log.info(f"pg_version is {version} ({source})")
|
||||
yield version
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
|
||||
|
||||
|
||||
# Test duplicate layer detection
|
||||
#
|
||||
# This test sets fail point at the end of first compaction phase:
|
||||
# after flushing new L1 layers but before deletion of L0 layes
|
||||
# It should cause generation of duplicate L1 layer by compaction after restart
|
||||
@pytest.mark.timeout(600)
|
||||
def test_duplicate_layers(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
# These warnings are expected, when the pageserver is restarted abruptly
|
||||
env.pageserver.allowed_errors.append(".*found future image layer.*")
|
||||
env.pageserver.allowed_errors.append(".*found future delta layer.*")
|
||||
#env.pageserver.allowed_errors.append(".*duplicate layer.*")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
# Use aggressive compaction and checkpoint settings
|
||||
tenant_id, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
"checkpoint_distance": f"{1024 ** 2}",
|
||||
"compaction_target_size": f"{1024 ** 2}",
|
||||
"compaction_period": "5 s",
|
||||
"compaction_threshold": "5",
|
||||
}
|
||||
)
|
||||
endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
|
||||
connstr = endpoint.connstr(options="-csynchronous_commit=off")
|
||||
pg_bin.run_capture(["pgbench", "-i", "-s10", connstr])
|
||||
|
||||
pageserver_http.configure_failpoints(("compact-level0-phase1-finish", "exit"))
|
||||
|
||||
with pytest.raises(Exception):
|
||||
pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr])
|
||||
time.sleep(6) # let compaction to be performed
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
time.sleep(6) # let compaction to be performed
|
||||
@@ -3,7 +3,7 @@ from contextlib import closing
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, PgProtocol
|
||||
from fixtures.pageserver.http import PageserverApiException
|
||||
from fixtures.types import TenantId
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
|
||||
|
||||
def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
|
||||
@@ -25,21 +25,19 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
|
||||
ps.safe_psql("set FOO", password=tenant_token)
|
||||
ps.safe_psql("set FOO", password=pageserver_token)
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch(
|
||||
"test_pageserver_auth", tenant_id=env.initial_tenant
|
||||
)
|
||||
|
||||
# tenant can create branches
|
||||
tenant_http_client.timeline_create(
|
||||
pg_version=env.pg_version,
|
||||
tenant_id=env.initial_tenant,
|
||||
ancestor_timeline_id=new_timeline_id,
|
||||
new_timeline_id=TimelineId.generate(),
|
||||
ancestor_timeline_id=env.initial_timeline,
|
||||
)
|
||||
# console can create branches for tenant
|
||||
pageserver_http_client.timeline_create(
|
||||
pg_version=env.pg_version,
|
||||
tenant_id=env.initial_tenant,
|
||||
ancestor_timeline_id=new_timeline_id,
|
||||
new_timeline_id=TimelineId.generate(),
|
||||
ancestor_timeline_id=env.initial_timeline,
|
||||
)
|
||||
|
||||
# fail to create branch using token with different tenant_id
|
||||
@@ -49,18 +47,19 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
|
||||
invalid_tenant_http_client.timeline_create(
|
||||
pg_version=env.pg_version,
|
||||
tenant_id=env.initial_tenant,
|
||||
ancestor_timeline_id=new_timeline_id,
|
||||
new_timeline_id=TimelineId.generate(),
|
||||
ancestor_timeline_id=env.initial_timeline,
|
||||
)
|
||||
|
||||
# create tenant using management token
|
||||
pageserver_http_client.tenant_create()
|
||||
pageserver_http_client.tenant_create(TenantId.generate())
|
||||
|
||||
# fail to create tenant using tenant token
|
||||
with pytest.raises(
|
||||
PageserverApiException,
|
||||
match="Forbidden: Attempt to access management api with tenant scope. Permission denied",
|
||||
):
|
||||
tenant_http_client.tenant_create()
|
||||
tenant_http_client.tenant_create(TenantId.generate())
|
||||
|
||||
|
||||
def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
210
test_runner/regress/test_ddl_forwarding.py
Normal file
210
test_runner/regress/test_ddl_forwarding.py
Normal file
@@ -0,0 +1,210 @@
|
||||
from types import TracebackType
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type
|
||||
|
||||
import psycopg2
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import VanillaPostgres
|
||||
from pytest_httpserver import HTTPServer
|
||||
from werkzeug.wrappers.request import Request
|
||||
from werkzeug.wrappers.response import Response
|
||||
|
||||
|
||||
def handle_db(dbs, roles, operation):
|
||||
if operation["op"] == "set":
|
||||
if "old_name" in operation and operation["old_name"] in dbs:
|
||||
dbs[operation["name"]] = dbs[operation["old_name"]]
|
||||
dbs.pop(operation["old_name"])
|
||||
if "owner" in operation:
|
||||
dbs[operation["name"]] = operation["owner"]
|
||||
elif operation["op"] == "del":
|
||||
dbs.pop(operation["name"])
|
||||
else:
|
||||
raise ValueError("Invalid op")
|
||||
|
||||
|
||||
def handle_role(dbs, roles, operation):
|
||||
if operation["op"] == "set":
|
||||
if "old_name" in operation and operation["old_name"] in roles:
|
||||
roles[operation["name"]] = roles[operation["old_name"]]
|
||||
roles.pop(operation["old_name"])
|
||||
for db, owner in dbs.items():
|
||||
if owner == operation["old_name"]:
|
||||
dbs[db] = operation["name"]
|
||||
if "password" in operation:
|
||||
roles[operation["name"]] = operation["password"]
|
||||
elif operation["op"] == "del":
|
||||
if "old_name" in operation:
|
||||
roles.pop(operation["old_name"])
|
||||
roles.pop(operation["name"])
|
||||
else:
|
||||
raise ValueError("Invalid op")
|
||||
|
||||
|
||||
fail = False
|
||||
|
||||
|
||||
def ddl_forward_handler(request: Request, dbs: Dict[str, str], roles: Dict[str, str]) -> Response:
|
||||
log.info(f"Received request with data {request.get_data(as_text=True)}")
|
||||
if fail:
|
||||
log.info("FAILING")
|
||||
return Response(status=500, response="Failed just cuz")
|
||||
if request.json is None:
|
||||
log.info("Received invalid JSON")
|
||||
return Response(status=400)
|
||||
json = request.json
|
||||
# Handle roles first
|
||||
if "roles" in json:
|
||||
for operation in json["roles"]:
|
||||
handle_role(dbs, roles, operation)
|
||||
if "dbs" in json:
|
||||
for operation in json["dbs"]:
|
||||
handle_db(dbs, roles, operation)
|
||||
return Response(status=200)
|
||||
|
||||
|
||||
class DdlForwardingContext:
|
||||
def __init__(self, httpserver: HTTPServer, vanilla_pg: VanillaPostgres, host: str, port: int):
|
||||
self.server = httpserver
|
||||
self.pg = vanilla_pg
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.dbs: Dict[str, str] = {}
|
||||
self.roles: Dict[str, str] = {}
|
||||
endpoint = "/management/api/v2/roles_and_databases"
|
||||
ddl_url = f"http://{host}:{port}{endpoint}"
|
||||
self.pg.configure(
|
||||
[
|
||||
f"neon.console_url={ddl_url}",
|
||||
"shared_preload_libraries = 'neon'",
|
||||
]
|
||||
)
|
||||
log.info(f"Listening on {ddl_url}")
|
||||
self.server.expect_request(endpoint, method="PATCH").respond_with_handler(
|
||||
lambda request: ddl_forward_handler(request, self.dbs, self.roles)
|
||||
)
|
||||
|
||||
def __enter__(self):
|
||||
self.pg.start()
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: Optional[Type[BaseException]],
|
||||
exc: Optional[BaseException],
|
||||
tb: Optional[TracebackType],
|
||||
):
|
||||
self.pg.stop()
|
||||
|
||||
def send(self, query: str) -> List[Tuple[Any, ...]]:
|
||||
return self.pg.safe_psql(query)
|
||||
|
||||
def wait(self, timeout=3):
|
||||
self.server.wait(timeout=timeout)
|
||||
|
||||
def send_and_wait(self, query: str, timeout=3) -> List[Tuple[Any, ...]]:
|
||||
res = self.send(query)
|
||||
self.wait(timeout=timeout)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def ddl(
|
||||
httpserver: HTTPServer, vanilla_pg: VanillaPostgres, httpserver_listen_address: tuple[str, int]
|
||||
):
|
||||
(host, port) = httpserver_listen_address
|
||||
with DdlForwardingContext(httpserver, vanilla_pg, host, port) as ddl:
|
||||
yield ddl
|
||||
|
||||
|
||||
def test_ddl_forwarding(ddl: DdlForwardingContext):
|
||||
curr_user = ddl.send("SELECT current_user")[0][0]
|
||||
log.info(f"Current user is {curr_user}")
|
||||
ddl.send_and_wait("CREATE DATABASE bork")
|
||||
assert ddl.dbs == {"bork": curr_user}
|
||||
ddl.send_and_wait("CREATE ROLE volk WITH PASSWORD 'nu_zayats'")
|
||||
ddl.send_and_wait("ALTER DATABASE bork RENAME TO nu_pogodi")
|
||||
assert ddl.dbs == {"nu_pogodi": curr_user}
|
||||
ddl.send_and_wait("ALTER DATABASE nu_pogodi OWNER TO volk")
|
||||
assert ddl.dbs == {"nu_pogodi": "volk"}
|
||||
ddl.send_and_wait("DROP DATABASE nu_pogodi")
|
||||
assert ddl.dbs == {}
|
||||
ddl.send_and_wait("DROP ROLE volk")
|
||||
assert ddl.roles == {}
|
||||
|
||||
ddl.send_and_wait("CREATE ROLE tarzan WITH PASSWORD 'of_the_apes'")
|
||||
assert ddl.roles == {"tarzan": "of_the_apes"}
|
||||
ddl.send_and_wait("DROP ROLE tarzan")
|
||||
assert ddl.roles == {}
|
||||
ddl.send_and_wait("CREATE ROLE tarzan WITH PASSWORD 'of_the_apes'")
|
||||
assert ddl.roles == {"tarzan": "of_the_apes"}
|
||||
ddl.send_and_wait("ALTER ROLE tarzan WITH PASSWORD 'jungle_man'")
|
||||
assert ddl.roles == {"tarzan": "jungle_man"}
|
||||
ddl.send_and_wait("ALTER ROLE tarzan RENAME TO mowgli")
|
||||
assert ddl.roles == {"mowgli": "jungle_man"}
|
||||
ddl.send_and_wait("DROP ROLE mowgli")
|
||||
assert ddl.roles == {}
|
||||
|
||||
conn = ddl.pg.connect()
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("BEGIN")
|
||||
cur.execute("CREATE ROLE bork WITH PASSWORD 'cork'")
|
||||
cur.execute("COMMIT")
|
||||
ddl.wait()
|
||||
assert ddl.roles == {"bork": "cork"}
|
||||
cur.execute("BEGIN")
|
||||
cur.execute("CREATE ROLE stork WITH PASSWORD 'pork'")
|
||||
cur.execute("ABORT")
|
||||
ddl.wait()
|
||||
assert ("stork", "pork") not in ddl.roles.items()
|
||||
cur.execute("BEGIN")
|
||||
cur.execute("ALTER ROLE bork WITH PASSWORD 'pork'")
|
||||
cur.execute("ALTER ROLE bork RENAME TO stork")
|
||||
cur.execute("COMMIT")
|
||||
ddl.wait()
|
||||
assert ddl.roles == {"stork": "pork"}
|
||||
cur.execute("BEGIN")
|
||||
cur.execute("CREATE ROLE dork WITH PASSWORD 'york'")
|
||||
cur.execute("SAVEPOINT point")
|
||||
cur.execute("ALTER ROLE dork WITH PASSWORD 'zork'")
|
||||
cur.execute("ALTER ROLE dork RENAME TO fork")
|
||||
cur.execute("ROLLBACK TO SAVEPOINT point")
|
||||
cur.execute("ALTER ROLE dork WITH PASSWORD 'fork'")
|
||||
cur.execute("ALTER ROLE dork RENAME TO zork")
|
||||
cur.execute("RELEASE SAVEPOINT point")
|
||||
cur.execute("COMMIT")
|
||||
ddl.wait()
|
||||
assert ddl.roles == {"stork": "pork", "zork": "fork"}
|
||||
|
||||
cur.execute("DROP ROLE stork")
|
||||
cur.execute("DROP ROLE zork")
|
||||
ddl.wait()
|
||||
assert ddl.roles == {}
|
||||
|
||||
cur.execute("CREATE ROLE bork WITH PASSWORD 'dork'")
|
||||
cur.execute("CREATE ROLE stork WITH PASSWORD 'cork'")
|
||||
cur.execute("BEGIN")
|
||||
cur.execute("DROP ROLE bork")
|
||||
cur.execute("ALTER ROLE stork RENAME TO bork")
|
||||
cur.execute("COMMIT")
|
||||
ddl.wait()
|
||||
assert ddl.roles == {"bork": "cork"}
|
||||
|
||||
cur.execute("DROP ROLE bork")
|
||||
ddl.wait()
|
||||
assert ddl.roles == {}
|
||||
|
||||
cur.execute("CREATE ROLE bork WITH PASSWORD 'dork'")
|
||||
cur.execute("CREATE DATABASE stork WITH OWNER=bork")
|
||||
cur.execute("ALTER ROLE bork RENAME TO cork")
|
||||
ddl.wait()
|
||||
assert ddl.dbs == {"stork": "cork"}
|
||||
|
||||
with pytest.raises(psycopg2.InternalError):
|
||||
global fail
|
||||
fail = True
|
||||
cur.execute("CREATE DATABASE failure WITH OWNER=cork")
|
||||
ddl.wait()
|
||||
|
||||
conn.close()
|
||||
@@ -228,7 +228,6 @@ def proxy_with_metric_collector(
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_metric_collection(
|
||||
httpserver: HTTPServer,
|
||||
httpserver_listen_address,
|
||||
proxy_with_metric_collector: NeonProxy,
|
||||
vanilla_pg: VanillaPostgres,
|
||||
):
|
||||
|
||||
@@ -314,21 +314,22 @@ def test_pageserver_with_empty_tenants(
|
||||
|
||||
client = env.pageserver.http_client()
|
||||
|
||||
tenant_with_empty_timelines_dir = client.tenant_create()
|
||||
temp_timelines = client.timeline_list(tenant_with_empty_timelines_dir)
|
||||
tenant_with_empty_timelines = TenantId.generate()
|
||||
client.tenant_create(tenant_with_empty_timelines)
|
||||
temp_timelines = client.timeline_list(tenant_with_empty_timelines)
|
||||
for temp_timeline in temp_timelines:
|
||||
client.timeline_delete(
|
||||
tenant_with_empty_timelines_dir, TimelineId(temp_timeline["timeline_id"])
|
||||
tenant_with_empty_timelines, TimelineId(temp_timeline["timeline_id"])
|
||||
)
|
||||
files_in_timelines_dir = sum(
|
||||
1
|
||||
for _p in Path.iterdir(
|
||||
Path(env.repo_dir) / "tenants" / str(tenant_with_empty_timelines_dir) / "timelines"
|
||||
Path(env.repo_dir) / "tenants" / str(tenant_with_empty_timelines) / "timelines"
|
||||
)
|
||||
)
|
||||
assert (
|
||||
files_in_timelines_dir == 0
|
||||
), f"Tenant {tenant_with_empty_timelines_dir} should have an empty timelines/ directory"
|
||||
), f"Tenant {tenant_with_empty_timelines} should have an empty timelines/ directory"
|
||||
|
||||
# Trigger timeline re-initialization after pageserver restart
|
||||
env.endpoints.stop_all()
|
||||
@@ -356,15 +357,15 @@ def test_pageserver_with_empty_tenants(
|
||||
|
||||
assert env.pageserver.log_contains(".*Setting tenant as Broken state, reason:.*")
|
||||
|
||||
[loaded_tenant] = [t for t in tenants if t["id"] == str(tenant_with_empty_timelines_dir)]
|
||||
[loaded_tenant] = [t for t in tenants if t["id"] == str(tenant_with_empty_timelines)]
|
||||
assert (
|
||||
loaded_tenant["state"]["slug"] == "Active"
|
||||
), "Tenant {tenant_with_empty_timelines_dir} with empty timelines dir should be active and ready for timeline creation"
|
||||
), "Tenant {tenant_with_empty_timelines} with empty timelines dir should be active and ready for timeline creation"
|
||||
|
||||
loaded_tenant_status = client.tenant_status(tenant_with_empty_timelines_dir)
|
||||
loaded_tenant_status = client.tenant_status(tenant_with_empty_timelines)
|
||||
assert (
|
||||
loaded_tenant_status["state"]["slug"] == "Active"
|
||||
), f"Tenant {tenant_with_empty_timelines_dir} without timelines dir should be active"
|
||||
), f"Tenant {tenant_with_empty_timelines} without timelines dir should be active"
|
||||
|
||||
time.sleep(1) # to allow metrics propagation
|
||||
|
||||
@@ -374,7 +375,7 @@ def test_pageserver_with_empty_tenants(
|
||||
"state": "Broken",
|
||||
}
|
||||
active_tenants_metric_filter = {
|
||||
"tenant_id": str(tenant_with_empty_timelines_dir),
|
||||
"tenant_id": str(tenant_with_empty_timelines),
|
||||
"state": "Active",
|
||||
}
|
||||
|
||||
@@ -386,7 +387,7 @@ def test_pageserver_with_empty_tenants(
|
||||
|
||||
assert (
|
||||
tenant_active_count == 1
|
||||
), f"Tenant {tenant_with_empty_timelines_dir} should have metric as active"
|
||||
), f"Tenant {tenant_with_empty_timelines} should have metric as active"
|
||||
|
||||
tenant_broken_count = int(
|
||||
ps_metrics.query_one(
|
||||
|
||||
@@ -371,7 +371,7 @@ def test_concurrent_timeline_delete_if_first_stuck_at_index_upload(
|
||||
|
||||
# make the second call and assert behavior
|
||||
log.info("second call start")
|
||||
error_msg_re = "another task is already setting the deleted_flag, started at"
|
||||
error_msg_re = "timeline deletion is already in progress"
|
||||
with pytest.raises(PageserverApiException, match=error_msg_re) as second_call_err:
|
||||
ps_http.timeline_delete(env.initial_tenant, child_timeline_id)
|
||||
assert second_call_err.value.status_code == 500
|
||||
|
||||
@@ -27,7 +27,6 @@ futures-core = { version = "0.3" }
|
||||
futures-executor = { version = "0.3" }
|
||||
futures-sink = { version = "0.3" }
|
||||
futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
|
||||
hashbrown = { version = "0.12", features = ["raw"] }
|
||||
itertools = { version = "0.10" }
|
||||
libc = { version = "0.2", features = ["extra_traits"] }
|
||||
log = { version = "0.4", default-features = false, features = ["std"] }
|
||||
@@ -39,7 +38,7 @@ num-traits = { version = "0.2", features = ["i128"] }
|
||||
prost = { version = "0.11" }
|
||||
rand = { version = "0.8", features = ["small_rng"] }
|
||||
regex = { version = "1" }
|
||||
regex-syntax = { version = "0.6" }
|
||||
regex-syntax = { version = "0.7" }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "multipart", "rustls-tls"] }
|
||||
ring = { version = "0.16", features = ["std"] }
|
||||
rustls = { version = "0.20", features = ["dangerous_configuration"] }
|
||||
@@ -62,7 +61,6 @@ url = { version = "2", features = ["serde"] }
|
||||
anyhow = { version = "1", features = ["backtrace"] }
|
||||
bytes = { version = "1", features = ["serde"] }
|
||||
either = { version = "1" }
|
||||
hashbrown = { version = "0.12", features = ["raw"] }
|
||||
itertools = { version = "0.10" }
|
||||
libc = { version = "0.2", features = ["extra_traits"] }
|
||||
log = { version = "0.4", default-features = false, features = ["std"] }
|
||||
@@ -70,7 +68,7 @@ memchr = { version = "2" }
|
||||
nom = { version = "7" }
|
||||
prost = { version = "0.11" }
|
||||
regex = { version = "1" }
|
||||
regex-syntax = { version = "0.6" }
|
||||
regex-syntax = { version = "0.7" }
|
||||
serde = { version = "1", features = ["alloc", "derive"] }
|
||||
syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] }
|
||||
syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "full", "visit-mut"] }
|
||||
|
||||
Reference in New Issue
Block a user