Compare commits

..

1 Commits

Author SHA1 Message Date
github-actions[bot]
3f5a7bd5ae Compute release 2025-02-21 2025-02-21 07:00:48 +00:00
6 changed files with 46 additions and 79 deletions

View File

@@ -722,14 +722,14 @@ jobs:
- pg: v17
debian: bookworm
env:
VM_BUILDER_VERSION: v0.42.1
VM_BUILDER_VERSION: v0.37.1
steps:
- uses: actions/checkout@v4
- name: Downloading vm-builder
run: |
curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder-amd64 -o vm-builder
curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
chmod +x vm-builder
- uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
@@ -750,8 +750,7 @@ jobs:
-size=2G \
-spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \
-src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-target-arch linux/amd64
-dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
- name: Pushing vm-compute-node image
run: |

View File

@@ -395,22 +395,15 @@ RUN case "${PG_VERSION:?}" in \
cd plv8-src && \
if [[ "${PG_VERSION:?}" < "v17" ]]; then patch -p1 < /ext-src/plv8-3.1.10.patch; fi
# Step 1: Build the vendored V8 engine. It doesn't depend on PostgreSQL, so use
# 'build-deps' as the base. This enables caching and avoids unnecessary rebuilds.
# (The V8 engine takes a very long time to build)
FROM build-deps AS plv8-build
FROM pg-build AS plv8-build
ARG PG_VERSION
WORKDIR /ext-src/plv8-src
RUN apt update && \
apt install --no-install-recommends --no-install-suggests -y \
ninja-build python3-dev libncurses5 binutils clang \
&& apt clean && rm -rf /var/lib/apt/lists/*
COPY --from=plv8-src /ext-src/ /ext-src/
RUN make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) v8
# Step 2: Build the PostgreSQL-dependent parts
COPY --from=pg-build /usr/local/pgsql /usr/local/pgsql
ENV PATH="/usr/local/pgsql/bin:$PATH"
COPY --from=plv8-src /ext-src/ /ext-src/
WORKDIR /ext-src/plv8-src
RUN \
# generate and copy upgrade scripts
make generate_upgrades && \

View File

@@ -598,10 +598,7 @@ async fn handle_tenant_timeline_passthrough(
let _timer = latency.start_timer(labels.clone());
let client = mgmt_api::Client::new(
node.base_url(),
service.get_config().pageserver_jwt_token.as_deref(),
);
let client = mgmt_api::Client::new(node.base_url(), service.get_config().jwt_token.as_deref());
let resp = client.get_raw(path).await.map_err(|e|
// We return 503 here because if we can't successfully send a request to the pageserver,
// either we aren't available or the pageserver is unavailable.
@@ -1357,7 +1354,10 @@ async fn handle_safekeeper_scheduling_policy(
.set_safekeeper_scheduling_policy(id, body.scheduling_policy)
.await?;
json_response(StatusCode::OK, ())
Ok(Response::builder()
.status(StatusCode::NO_CONTENT)
.body(Body::empty())
.unwrap())
}
/// Common wrapper for request handlers that call into Service and will operate on tenants: they must only

View File

@@ -53,10 +53,6 @@ struct Cli {
#[arg(long)]
jwt_token: Option<String>,
/// Token for authenticating this service with the safekeepers it controls
#[arg(long)]
safekeeper_jwt_token: Option<String>,
/// Token for authenticating this service with the control plane, when calling
/// the compute notification endpoint
#[arg(long)]
@@ -157,8 +153,7 @@ impl Default for StrictMode {
struct Secrets {
database_url: String,
public_key: Option<JwtAuth>,
pageserver_jwt_token: Option<String>,
safekeeper_jwt_token: Option<String>,
jwt_token: Option<String>,
control_plane_jwt_token: Option<String>,
peer_jwt_token: Option<String>,
}
@@ -166,7 +161,6 @@ struct Secrets {
impl Secrets {
const DATABASE_URL_ENV: &'static str = "DATABASE_URL";
const PAGESERVER_JWT_TOKEN_ENV: &'static str = "PAGESERVER_JWT_TOKEN";
const SAFEKEEPER_JWT_TOKEN_ENV: &'static str = "SAFEKEEPER_JWT_TOKEN";
const CONTROL_PLANE_JWT_TOKEN_ENV: &'static str = "CONTROL_PLANE_JWT_TOKEN";
const PEER_JWT_TOKEN_ENV: &'static str = "PEER_JWT_TOKEN";
const PUBLIC_KEY_ENV: &'static str = "PUBLIC_KEY";
@@ -190,14 +184,7 @@ impl Secrets {
let this = Self {
database_url,
public_key,
pageserver_jwt_token: Self::load_secret(
&args.jwt_token,
Self::PAGESERVER_JWT_TOKEN_ENV,
),
safekeeper_jwt_token: Self::load_secret(
&args.safekeeper_jwt_token,
Self::SAFEKEEPER_JWT_TOKEN_ENV,
),
jwt_token: Self::load_secret(&args.jwt_token, Self::PAGESERVER_JWT_TOKEN_ENV),
control_plane_jwt_token: Self::load_secret(
&args.control_plane_jwt_token,
Self::CONTROL_PLANE_JWT_TOKEN_ENV,
@@ -277,17 +264,11 @@ async fn async_main() -> anyhow::Result<()> {
let secrets = Secrets::load(&args).await?;
// TODO: once we've rolled out the safekeeper JWT token everywhere, put it into the validation code below
tracing::info!(
"safekeeper_jwt_token set: {:?}",
secrets.safekeeper_jwt_token.is_some()
);
// Validate required secrets and arguments are provided in strict mode
match strict_mode {
StrictMode::Strict
if (secrets.public_key.is_none()
|| secrets.pageserver_jwt_token.is_none()
|| secrets.jwt_token.is_none()
|| secrets.control_plane_jwt_token.is_none()) =>
{
// Production systems should always have secrets configured: if public_key was not set
@@ -312,8 +293,7 @@ async fn async_main() -> anyhow::Result<()> {
}
let config = Config {
pageserver_jwt_token: secrets.pageserver_jwt_token,
safekeeper_jwt_token: secrets.safekeeper_jwt_token,
jwt_token: secrets.jwt_token,
control_plane_jwt_token: secrets.control_plane_jwt_token,
peer_jwt_token: secrets.peer_jwt_token,
compute_hook_url: args.compute_hook_url,

View File

@@ -296,7 +296,7 @@ impl Reconciler {
.location_config(tenant_shard_id, config.clone(), flush_ms, lazy)
.await
},
&self.service_config.pageserver_jwt_token,
&self.service_config.jwt_token,
1,
3,
timeout,
@@ -417,7 +417,7 @@ impl Reconciler {
let client = PageserverClient::new(
node.get_id(),
node.base_url(),
self.service_config.pageserver_jwt_token.as_deref(),
self.service_config.jwt_token.as_deref(),
);
client
@@ -440,7 +440,7 @@ impl Reconciler {
let client = PageserverClient::new(
node.get_id(),
node.base_url(),
self.service_config.pageserver_jwt_token.as_deref(),
self.service_config.jwt_token.as_deref(),
);
let timelines = client.timeline_list(&tenant_shard_id).await?;
@@ -478,7 +478,7 @@ impl Reconciler {
)
.await
},
&self.service_config.pageserver_jwt_token,
&self.service_config.jwt_token,
1,
3,
request_download_timeout * 2,
@@ -771,7 +771,7 @@ impl Reconciler {
let observed_conf = match attached_node
.with_client_retries(
|client| async move { client.get_location_config(tenant_shard_id).await },
&self.service_config.pageserver_jwt_token,
&self.service_config.jwt_token,
1,
1,
Duration::from_secs(5),
@@ -1099,7 +1099,7 @@ impl Reconciler {
match origin
.with_client_retries(
|client| async move { client.get_location_config(tenant_shard_id).await },
&self.service_config.pageserver_jwt_token,
&self.service_config.jwt_token,
1,
3,
Duration::from_secs(5),

View File

@@ -348,12 +348,7 @@ pub struct Config {
// All pageservers managed by one instance of this service must have
// the same public key. This JWT token will be used to authenticate
// this service to the pageservers it manages.
pub pageserver_jwt_token: Option<String>,
// All safekeepers managed by one instance of this service must have
// the same public key. This JWT token will be used to authenticate
// this service to the safekeepers it manages.
pub safekeeper_jwt_token: Option<String>,
pub jwt_token: Option<String>,
// This JWT token will be used to authenticate this service to the control plane.
pub control_plane_jwt_token: Option<String>,
@@ -887,7 +882,7 @@ impl Service {
let response = node
.with_client_retries(
|client| async move { client.list_location_config().await },
&self.config.pageserver_jwt_token,
&self.config.jwt_token,
1,
5,
timeout,
@@ -988,7 +983,7 @@ impl Service {
let client = PageserverClient::new(
node.get_id(),
node.base_url(),
self.config.pageserver_jwt_token.as_deref(),
self.config.jwt_token.as_deref(),
);
match client
.location_config(
@@ -1558,14 +1553,14 @@ impl Service {
let reconcilers_cancel = cancel.child_token();
let heartbeater_ps = Heartbeater::new(
config.pageserver_jwt_token.clone(),
config.jwt_token.clone(),
config.max_offline_interval,
config.max_warming_up_interval,
cancel.clone(),
);
let heartbeater_sk = Heartbeater::new(
config.safekeeper_jwt_token.clone(),
config.jwt_token.clone(),
config.max_offline_interval,
config.max_warming_up_interval,
cancel.clone(),
@@ -1912,7 +1907,7 @@ impl Service {
let configs = match node
.with_client_retries(
|client| async move { client.list_location_config().await },
&self.config.pageserver_jwt_token,
&self.config.jwt_token,
1,
5,
SHORT_RECONCILE_TIMEOUT,
@@ -1970,7 +1965,7 @@ impl Service {
.location_config(tenant_shard_id, config, None, false)
.await
},
&self.config.pageserver_jwt_token,
&self.config.jwt_token,
1,
5,
SHORT_RECONCILE_TIMEOUT,
@@ -3105,7 +3100,7 @@ impl Service {
let client = PageserverClient::new(
node.get_id(),
node.base_url(),
self.config.pageserver_jwt_token.as_deref(),
self.config.jwt_token.as_deref(),
);
tracing::info!("Doing time travel recovery for shard {tenant_shard_id}",);
@@ -3166,7 +3161,7 @@ impl Service {
let client = PageserverClient::new(
node.get_id(),
node.base_url(),
self.config.pageserver_jwt_token.as_deref(),
self.config.jwt_token.as_deref(),
);
futs.push(async move {
let result = client
@@ -3289,7 +3284,7 @@ impl Service {
.tenant_delete(TenantShardId::unsharded(tenant_id))
.await
},
&self.config.pageserver_jwt_token,
&self.config.jwt_token,
1,
3,
RECONCILE_TIMEOUT,
@@ -3508,7 +3503,7 @@ impl Service {
let timeline_info = create_one(
shard_zero_tid,
shard_zero_locations,
self.config.pageserver_jwt_token.clone(),
self.config.jwt_token.clone(),
create_req.clone(),
)
.await?;
@@ -3524,7 +3519,7 @@ impl Service {
// Create timeline on remaining shards with number >0
if !targets.0.is_empty() {
// If we had multiple shards, issue requests for the remainder now.
let jwt = &self.config.pageserver_jwt_token;
let jwt = &self.config.jwt_token;
self.tenant_for_shards(
targets
.0
@@ -3607,7 +3602,7 @@ impl Service {
tenant_shard_id,
timeline_id,
node,
self.config.pageserver_jwt_token.clone(),
self.config.jwt_token.clone(),
req.clone(),
))
})
@@ -3688,7 +3683,7 @@ impl Service {
tenant_shard_id,
timeline_id,
node,
self.config.pageserver_jwt_token.clone(),
self.config.jwt_token.clone(),
))
})
.await?;
@@ -3762,7 +3757,7 @@ impl Service {
tenant_shard_id,
timeline_id,
node,
self.config.pageserver_jwt_token.clone(),
self.config.jwt_token.clone(),
dir,
))
})
@@ -3877,7 +3872,7 @@ impl Service {
futs.push(async move {
node.with_client_retries(
|client| op(tenant_shard_id, client),
&self.config.pageserver_jwt_token,
&self.config.jwt_token,
warn_threshold,
max_retries,
timeout,
@@ -4126,7 +4121,7 @@ impl Service {
tenant_shard_id,
timeline_id,
node,
self.config.pageserver_jwt_token.clone(),
self.config.jwt_token.clone(),
))
})
.await?;
@@ -4148,7 +4143,7 @@ impl Service {
shard_zero_tid,
timeline_id,
shard_zero_locations.latest.node,
self.config.pageserver_jwt_token.clone(),
self.config.jwt_token.clone(),
)
.await?;
Ok(shard_zero_status)
@@ -4547,7 +4542,7 @@ impl Service {
client.location_config(child_id, config, None, false).await
},
&self.config.pageserver_jwt_token,
&self.config.jwt_token,
1,
10,
Duration::from_secs(5),
@@ -5147,7 +5142,7 @@ impl Service {
let client = PageserverClient::new(
node.get_id(),
node.base_url(),
self.config.pageserver_jwt_token.as_deref(),
self.config.jwt_token.as_deref(),
);
let response = client
.tenant_shard_split(
@@ -5473,7 +5468,7 @@ impl Service {
let client = PageserverClient::new(
node.get_id(),
node.base_url(),
self.config.pageserver_jwt_token.as_deref(),
self.config.jwt_token.as_deref(),
);
let scan_result = client
@@ -7099,7 +7094,7 @@ impl Service {
match attached_node
.with_client_retries(
|client| async move { client.tenant_heatmap_upload(tenant_shard_id).await },
&self.config.pageserver_jwt_token,
&self.config.jwt_token,
3,
10,
SHORT_RECONCILE_TIMEOUT,
@@ -7135,7 +7130,7 @@ impl Service {
)
.await
},
&self.config.pageserver_jwt_token,
&self.config.jwt_token,
3,
10,
SHORT_RECONCILE_TIMEOUT,
@@ -7190,7 +7185,7 @@ impl Service {
let request = request_ref.clone();
client.top_tenant_shards(request.clone()).await
},
&self.config.pageserver_jwt_token,
&self.config.jwt_token,
3,
3,
Duration::from_secs(5),
@@ -7363,7 +7358,7 @@ impl Service {
match node
.with_client_retries(
|client| async move { client.tenant_secondary_status(tenant_shard_id).await },
&self.config.pageserver_jwt_token,
&self.config.jwt_token,
1,
3,
Duration::from_millis(250),