mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
switch pytests and neon_local to control_plane_hooks_api (#11195)
We want to switch away from and deprecate the `--compute-hook-url` param for the storcon in favour of `--control-plane-url` because it allows us to construct urls with `notify-safekeepers`. This PR switches the pytests and neon_local from a `control_plane_compute_hook_api` to a new param named `control_plane_hooks_api` which is supposed to point to the parent of the `notify-attach` URL. We still support reading the old url from disk to not be too disruptive with existing deployments, but we just ignore it. Also add docs for the `notify-safekeepers` upcall API. Follow-up of #11173 Part of https://github.com/neondatabase/neon/issues/11163
This commit is contained in:
@@ -979,7 +979,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
|
||||
neon_distrib_dir: None,
|
||||
default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
|
||||
storage_controller: None,
|
||||
control_plane_compute_hook_api: None,
|
||||
control_plane_hooks_api: None,
|
||||
generate_local_ssl_certs: false,
|
||||
}
|
||||
};
|
||||
|
||||
@@ -72,9 +72,9 @@ pub struct LocalEnv {
|
||||
// be propagated into each pageserver's configuration.
|
||||
pub control_plane_api: Url,
|
||||
|
||||
// Control plane upcall API for storage controller. If set, this will be propagated into the
|
||||
// Control plane upcall APIs for storage controller. If set, this will be propagated into the
|
||||
// storage controller's configuration.
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
|
||||
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
|
||||
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
|
||||
@@ -104,6 +104,7 @@ pub struct OnDiskConfig {
|
||||
pub pageservers: Vec<PageServerConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
// Note: skip serializing because in compat tests old storage controller fails
|
||||
@@ -136,7 +137,7 @@ pub struct NeonLocalInitConf {
|
||||
pub pageservers: Vec<NeonLocalInitPageserverConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Option<Url>>,
|
||||
pub control_plane_hooks_api: Option<Url>,
|
||||
pub generate_local_ssl_certs: bool,
|
||||
}
|
||||
|
||||
@@ -573,7 +574,8 @@ impl LocalEnv {
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
control_plane_hooks_api,
|
||||
control_plane_compute_hook_api: _,
|
||||
branch_name_mappings,
|
||||
generate_local_ssl_certs,
|
||||
} = on_disk_config;
|
||||
@@ -588,7 +590,7 @@ impl LocalEnv {
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap(),
|
||||
control_plane_compute_hook_api,
|
||||
control_plane_hooks_api,
|
||||
branch_name_mappings,
|
||||
generate_local_ssl_certs,
|
||||
}
|
||||
@@ -695,7 +697,8 @@ impl LocalEnv {
|
||||
pageservers: vec![], // it's skip_serializing anyway
|
||||
safekeepers: self.safekeepers.clone(),
|
||||
control_plane_api: Some(self.control_plane_api.clone()),
|
||||
control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
|
||||
control_plane_hooks_api: self.control_plane_hooks_api.clone(),
|
||||
control_plane_compute_hook_api: None,
|
||||
branch_name_mappings: self.branch_name_mappings.clone(),
|
||||
generate_local_ssl_certs: self.generate_local_ssl_certs,
|
||||
},
|
||||
@@ -779,8 +782,8 @@ impl LocalEnv {
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
generate_local_ssl_certs,
|
||||
control_plane_hooks_api,
|
||||
} = conf;
|
||||
|
||||
// Find postgres binaries.
|
||||
@@ -827,7 +830,7 @@ impl LocalEnv {
|
||||
pageservers: pageservers.iter().map(Into::into).collect(),
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap(),
|
||||
control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
|
||||
control_plane_hooks_api,
|
||||
branch_name_mappings: Default::default(),
|
||||
generate_local_ssl_certs,
|
||||
};
|
||||
|
||||
@@ -558,10 +558,8 @@ impl StorageController {
|
||||
args.push(format!("--public-key=\"{public_key}\""));
|
||||
}
|
||||
|
||||
if let Some(control_plane_compute_hook_api) = &self.env.control_plane_compute_hook_api {
|
||||
args.push(format!(
|
||||
"--compute-hook-url={control_plane_compute_hook_api}"
|
||||
));
|
||||
if let Some(control_plane_hooks_api) = &self.env.control_plane_hooks_api {
|
||||
args.push(format!("--control-plane-url={control_plane_hooks_api}"));
|
||||
}
|
||||
|
||||
if let Some(split_threshold) = self.config.split_threshold.as_ref() {
|
||||
|
||||
@@ -101,15 +101,25 @@ changes such as a pageserver node becoming unavailable, or the tenant's shard co
|
||||
postgres clients to handle such changes, the storage controller calls an API hook when a tenant's pageserver
|
||||
location changes.
|
||||
|
||||
The hook is configured using the storage controller's `--control-plane-url` CLI option. If the hook requires
|
||||
JWT auth, the token may be provided with `--control-plane-jwt-token`. The hook will be invoked with a `PUT` request.
|
||||
The hook is configured using the storage controller's `--control-plane-url` CLI option, from which the hook URL is computed.
|
||||
|
||||
In the Neon cloud service, this hook is implemented by Neon's internal cloud control plane. In `neon_local` systems
|
||||
Currently, there is two hooks, each computed by appending the name to the provided control plane URL prefix:
|
||||
|
||||
- `notify-attach`, called whenever attachment for pageservers changes
|
||||
- `notify-safekeepers`, called whenever attachment for safekeepers changes
|
||||
|
||||
If the hooks require JWT auth, the token may be provided with `--control-plane-jwt-token`.
|
||||
The hooks will be invoked with a `PUT` request.
|
||||
|
||||
In the Neon cloud service, these hooks are implemented by Neon's internal cloud control plane. In `neon_local` systems,
|
||||
the storage controller integrates directly with neon_local to reconfigure local postgres processes instead of calling
|
||||
the compute hook.
|
||||
|
||||
When implementing an on-premise Neon deployment, you must implement a service that handles the compute hook. This is not complicated:
|
||||
the request body has format of the `ComputeHookNotifyRequest` structure, provided below for convenience.
|
||||
When implementing an on-premise Neon deployment, you must implement a service that handles the compute hooks. This is not complicated.
|
||||
|
||||
### `notify-attach` body
|
||||
|
||||
The `notify-attach` request body follows the format of the `ComputeHookNotifyRequest` structure, provided below for convenience.
|
||||
|
||||
```
|
||||
struct ComputeHookNotifyRequestShard {
|
||||
@@ -128,15 +138,15 @@ When a notification is received:
|
||||
|
||||
1. Modify postgres configuration for this tenant:
|
||||
|
||||
- set `neon.pageserver_connstr` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
|
||||
- set `neon.pageserver_connstring` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The
|
||||
shards identified by `NodeId` must be converted to the address+port of the node.
|
||||
- if stripe_size is not None, set `neon.stripe_size` to this value
|
||||
- if stripe_size is not None, set `neon.shard_stripe_size` to this value
|
||||
|
||||
2. Send SIGHUP to postgres to reload configuration
|
||||
3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
|
||||
will retry the notification until it succeeds..
|
||||
|
||||
### Example notification body
|
||||
Example body:
|
||||
|
||||
```
|
||||
{
|
||||
@@ -148,3 +158,34 @@ When a notification is received:
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
### `notify-safekeepers` body
|
||||
|
||||
The `notify-safekeepers` request body forllows the format of the `SafekeepersNotifyRequest` structure, provided below for convenience.
|
||||
|
||||
```
|
||||
pub struct SafekeeperInfo {
|
||||
pub id: NodeId,
|
||||
pub hostname: String,
|
||||
}
|
||||
|
||||
pub struct SafekeepersNotifyRequest {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub generation: u32,
|
||||
pub safekeepers: Vec<SafekeeperInfo>,
|
||||
}
|
||||
```
|
||||
|
||||
When a notification is received:
|
||||
|
||||
1. Modify postgres configuration for this tenant:
|
||||
|
||||
- set `neon.safekeeper_connstrings` to an array of postgres connection strings to safekeepers according to the `safekeepers` list. The
|
||||
safekeepers identified by `NodeId` must be converted to the address+port of the respective safekeeper.
|
||||
The hostname is provided for debugging purposes, so we reserve changes to how we pass it.
|
||||
- set `neon.safekeepers_generation` to the provided `generation` value.
|
||||
|
||||
2. Send SIGHUP to postgres to reload configuration
|
||||
3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller
|
||||
will retry the notification until it succeeds..
|
||||
@@ -19,7 +19,7 @@ if TYPE_CHECKING:
|
||||
class ComputeReconfigure:
|
||||
def __init__(self, server: HTTPServer):
|
||||
self.server = server
|
||||
self.control_plane_compute_hook_api = f"http://{server.host}:{server.port}/notify-attach"
|
||||
self.control_plane_hooks_api = f"http://{server.host}:{server.port}/"
|
||||
self.workloads: dict[TenantId, Any] = {}
|
||||
self.on_notify: Callable[[Any], None] | None = None
|
||||
|
||||
|
||||
@@ -460,7 +460,7 @@ class NeonEnvBuilder:
|
||||
self.overlay_mounts_created_by_us: list[tuple[str, Path]] = []
|
||||
self.config_init_force: str | None = None
|
||||
self.top_output_dir = top_output_dir
|
||||
self.control_plane_compute_hook_api: str | None = None
|
||||
self.control_plane_hooks_api: str | None = None
|
||||
self.storage_controller_config: dict[Any, Any] | None = None
|
||||
|
||||
# Flag to enable https listener in pageserver, generate local ssl certs,
|
||||
@@ -1116,7 +1116,7 @@ class NeonEnv:
|
||||
self.control_plane_api: str = self.storage_controller.upcall_api_endpoint()
|
||||
|
||||
# For testing this with a fake HTTP server, enable passing through a URL from config
|
||||
self.control_plane_compute_hook_api = config.control_plane_compute_hook_api
|
||||
self.control_plane_hooks_api = config.control_plane_hooks_api
|
||||
|
||||
self.pageserver_virtual_file_io_engine = config.pageserver_virtual_file_io_engine
|
||||
self.pageserver_virtual_file_io_mode = config.pageserver_virtual_file_io_mode
|
||||
@@ -1137,8 +1137,8 @@ class NeonEnv:
|
||||
if self.control_plane_api is not None:
|
||||
cfg["control_plane_api"] = self.control_plane_api
|
||||
|
||||
if self.control_plane_compute_hook_api is not None:
|
||||
cfg["control_plane_compute_hook_api"] = self.control_plane_compute_hook_api
|
||||
if self.control_plane_hooks_api is not None:
|
||||
cfg["control_plane_hooks_api"] = self.control_plane_hooks_api
|
||||
|
||||
storage_controller_config = self.storage_controller_config
|
||||
|
||||
|
||||
@@ -83,9 +83,7 @@ def test_storage_controller_many_tenants(
|
||||
"max_offline": "30s",
|
||||
"max_warming_up": "300s",
|
||||
}
|
||||
neon_env_builder.control_plane_compute_hook_api = (
|
||||
compute_reconfigure_listener.control_plane_compute_hook_api
|
||||
)
|
||||
neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api
|
||||
|
||||
AZS = ["alpha", "bravo", "charlie"]
|
||||
|
||||
|
||||
@@ -23,8 +23,8 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder, make_httpserver):
|
||||
)
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
neon_env_builder.control_plane_compute_hook_api = (
|
||||
f"http://{make_httpserver.host}:{make_httpserver.port}/notify-attach"
|
||||
neon_env_builder.control_plane_hooks_api = (
|
||||
f"http://{make_httpserver.host}:{make_httpserver.port}/"
|
||||
)
|
||||
|
||||
def ignore_notify(request: Request):
|
||||
|
||||
@@ -87,8 +87,8 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver,
|
||||
neon_env_builder.enable_pageserver_remote_storage(
|
||||
remote_storage_kind=s3_storage(),
|
||||
)
|
||||
neon_env_builder.control_plane_compute_hook_api = (
|
||||
f"http://{make_httpserver.host}:{make_httpserver.port}/notify-attach"
|
||||
neon_env_builder.control_plane_hooks_api = (
|
||||
f"http://{make_httpserver.host}:{make_httpserver.port}/"
|
||||
)
|
||||
|
||||
def ignore_notify(request: Request):
|
||||
|
||||
@@ -794,7 +794,7 @@ def test_sharding_split_stripe_size(
|
||||
Check that modifying stripe size inline with a shard split works as expected
|
||||
"""
|
||||
(host, port) = httpserver_listen_address
|
||||
neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify"
|
||||
neon_env_builder.control_plane_hooks_api = f"http://{host}:{port}"
|
||||
neon_env_builder.num_pageservers = 1
|
||||
|
||||
# Set up fake HTTP notify endpoint: we will use this to validate that we receive
|
||||
@@ -806,7 +806,7 @@ def test_sharding_split_stripe_size(
|
||||
notifications.append(request.json)
|
||||
return Response(status=200)
|
||||
|
||||
httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler)
|
||||
httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(handler)
|
||||
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_shard_count=1, initial_tenant_shard_stripe_size=initial_stripe_size
|
||||
@@ -1312,9 +1312,7 @@ def test_sharding_split_failures(
|
||||
failure: Failure,
|
||||
):
|
||||
neon_env_builder.num_pageservers = 4
|
||||
neon_env_builder.control_plane_compute_hook_api = (
|
||||
compute_reconfigure_listener.control_plane_compute_hook_api
|
||||
)
|
||||
neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api
|
||||
initial_shard_count = 2
|
||||
split_shard_count = 4
|
||||
|
||||
|
||||
@@ -605,7 +605,7 @@ def test_storage_controller_compute_hook(
|
||||
# when migrating.
|
||||
neon_env_builder.num_pageservers = 2
|
||||
(host, port) = httpserver_listen_address
|
||||
neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify"
|
||||
neon_env_builder.control_plane_hooks_api = f"http://{host}:{port}"
|
||||
|
||||
# Set up fake HTTP notify endpoint
|
||||
notifications = []
|
||||
@@ -618,7 +618,7 @@ def test_storage_controller_compute_hook(
|
||||
notifications.append(request.json)
|
||||
return Response(status=status)
|
||||
|
||||
httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler)
|
||||
httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(handler)
|
||||
|
||||
# Start running
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"})
|
||||
@@ -724,7 +724,7 @@ def test_storage_controller_stuck_compute_hook(
|
||||
|
||||
neon_env_builder.num_pageservers = 2
|
||||
(host, port) = httpserver_listen_address
|
||||
neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify"
|
||||
neon_env_builder.control_plane_hooks_api = f"http://{host}:{port}"
|
||||
|
||||
handle_params = {"status": 200}
|
||||
|
||||
@@ -736,7 +736,7 @@ def test_storage_controller_stuck_compute_hook(
|
||||
notifications.append(request.json)
|
||||
return Response(status=status)
|
||||
|
||||
httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler)
|
||||
httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(handler)
|
||||
|
||||
# Start running
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"})
|
||||
@@ -871,7 +871,7 @@ def test_storage_controller_compute_hook_retry(
|
||||
|
||||
neon_env_builder.num_pageservers = 2
|
||||
(host, port) = httpserver_listen_address
|
||||
neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify"
|
||||
neon_env_builder.control_plane_hooks_api = f"http://{host}:{port}"
|
||||
|
||||
handle_params = {"status": 200}
|
||||
|
||||
@@ -883,7 +883,7 @@ def test_storage_controller_compute_hook_retry(
|
||||
notifications.append(request.json)
|
||||
return Response(status=status)
|
||||
|
||||
httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler)
|
||||
httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(handler)
|
||||
|
||||
# Start running
|
||||
env = neon_env_builder.init_configs()
|
||||
@@ -993,7 +993,7 @@ def test_storage_controller_compute_hook_revert(
|
||||
# when migrating.
|
||||
neon_env_builder.num_pageservers = 2
|
||||
(host, port) = httpserver_listen_address
|
||||
neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify"
|
||||
neon_env_builder.control_plane_hooks_api = f"http://{host}:{port}"
|
||||
|
||||
# Set up fake HTTP notify endpoint
|
||||
notifications = []
|
||||
@@ -1006,7 +1006,7 @@ def test_storage_controller_compute_hook_revert(
|
||||
notifications.append(request.json)
|
||||
return Response(status=status)
|
||||
|
||||
httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler)
|
||||
httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(handler)
|
||||
|
||||
# Start running
|
||||
env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"})
|
||||
@@ -1395,9 +1395,7 @@ def test_storage_controller_tenant_deletion(
|
||||
"""
|
||||
neon_env_builder.num_pageservers = 4
|
||||
neon_env_builder.enable_pageserver_remote_storage(s3_storage())
|
||||
neon_env_builder.control_plane_compute_hook_api = (
|
||||
compute_reconfigure_listener.control_plane_compute_hook_api
|
||||
)
|
||||
neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api
|
||||
|
||||
env = neon_env_builder.init_configs()
|
||||
env.start()
|
||||
|
||||
Reference in New Issue
Block a user