Remove SSO_ACCOUNT_ID from scrubber docs and BucketConfig (#7774)

As of #6202 we support `AWS_PROFILE` as well, which is more convenient.
Change the docs to using it instead of `SSO_ACCOUNT_ID`. Also, remove
`SSO_ACCOUNT_ID` from BucketConfig as it is confusing to the code's
reader: it's not the "main" way of setting up authentication for the
scrubber any more.

It is a breaking change for the on-disk format as we persist `sso_account_id` to disk,
but it was quite inconsistent with the other methods which are not persistet. Also,
I don't think we want to support the case where one version writes the json and
another version reads it.

Related: #7667
This commit is contained in:
Arpad Müller
2024-05-16 19:35:13 +02:00
committed by GitHub
parent ec069dc45e
commit 4c5afb7b10
2 changed files with 11 additions and 25 deletions

View File

@@ -9,11 +9,13 @@ and `safekeeper`, and does housekeeping such as cleaning up objects for tenants
#### S3
Do `aws sso login --profile dev` to get the SSO access to the bucket to clean, get the SSO_ACCOUNT_ID for your profile (`cat ~/.aws/config` may help).
Do `aws sso login --profile dev` to get the SSO access to the bucket to clean.
Also, set the following environment variables:
- `SSO_ACCOUNT_ID`: Credentials id to use for accessing S3 buckets
- `AWS_PROFILE`: Profile name to use for accessing S3 buckets (e.g. `dev`)
- `REGION`: A region where the bucket is located at.
- `BUCKET`: Bucket name
- `BUCKET_PREFIX` (optional): Prefix inside the bucket
#### Console API
@@ -43,7 +45,7 @@ processing by the `purge-garbage` subcommand.
Example:
`env SSO_ACCOUNT_ID=123456 REGION=eu-west-1 BUCKET=my-dev-bucket CLOUD_ADMIN_API_TOKEN=${NEON_CLOUD_ADMIN_API_STAGING_KEY} CLOUD_ADMIN_API_URL=[url] cargo run --release -- find-garbage --node-kind=pageserver --depth=tenant --output-path=eu-west-1-garbage.json`
`env AWS_PROFILE=dev REGION=eu-west-1 BUCKET=my-dev-bucket CLOUD_ADMIN_API_TOKEN=${NEON_CLOUD_ADMIN_API_STAGING_KEY} CLOUD_ADMIN_API_URL=[url] cargo run --release -- find-garbage --node-kind=pageserver --depth=tenant --output-path=eu-west-1-garbage.json`
#### `purge-garbage`
@@ -59,7 +61,7 @@ to pass them on the command line
Example:
`env SSO_ACCOUNT_ID=123456 cargo run --release -- purge-garbage --node-kind=pageserver --depth=tenant --input-path=eu-west-1-garbage.json`
`env AWS_PROFILE=dev cargo run --release -- purge-garbage --node-kind=pageserver --depth=tenant --input-path=eu-west-1-garbage.json`
Add the `--delete` argument before `purge-garbage` to enable deletion. This is intentionally
not provided inline in the example above to avoid accidents. Without the `--delete` flag
@@ -72,7 +74,7 @@ Errors are logged to stderr and summary to stdout.
For pageserver:
```
env SSO_ACCOUNT_ID=123456 REGION=eu-west-1 BUCKET=my-dev-bucket CLOUD_ADMIN_API_TOKEN=${NEON_CLOUD_ADMIN_API_STAGING_KEY} CLOUD_ADMIN_API_URL=[url] cargo run --release -- scan-metadata --node-kind pageserver
env AWS_PROFILE=dev REGION=eu-west-1 BUCKET=my-dev-bucket CLOUD_ADMIN_API_TOKEN=${NEON_CLOUD_ADMIN_API_STAGING_KEY} CLOUD_ADMIN_API_URL=[url] cargo run --release -- scan-metadata --node-kind pageserver
Timelines: 31106
With errors: 3

View File

@@ -200,30 +200,15 @@ impl RootTarget {
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct BucketConfig {
pub region: String,
pub bucket: String,
pub prefix_in_bucket: Option<String>,
/// Use SSO if this is set, else rely on AWS_* environment vars
pub sso_account_id: Option<String>,
}
impl Display for BucketConfig {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}/{}/{}",
self.sso_account_id.as_deref().unwrap_or("<none>"),
self.region,
self.bucket
)
}
}
impl BucketConfig {
pub fn from_env() -> anyhow::Result<Self> {
let sso_account_id = env::var("SSO_ACCOUNT_ID").ok();
let region = env::var("REGION").context("'REGION' param retrieval")?;
let bucket = env::var("BUCKET").context("'BUCKET' param retrieval")?;
let prefix_in_bucket = env::var("BUCKET_PREFIX").ok();
@@ -232,7 +217,6 @@ impl BucketConfig {
region,
bucket,
prefix_in_bucket,
sso_account_id,
})
}
}
@@ -276,7 +260,7 @@ pub fn init_logging(file_name: &str) -> WorkerGuard {
guard
}
pub fn init_s3_client(account_id: Option<String>, bucket_region: Region) -> Client {
pub fn init_s3_client(bucket_region: Region) -> Client {
let credentials_provider = {
// uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
let chain = CredentialsProviderChain::first_try(
@@ -290,7 +274,7 @@ pub fn init_s3_client(account_id: Option<String>, bucket_region: Region) -> Clie
);
// Use SSO if we were given an account ID
match account_id {
match std::env::var("SSO_ACCOUNT_ID").ok() {
Some(sso_account) => chain.or_else(
"sso",
SsoCredentialsProvider::builder()
@@ -334,7 +318,7 @@ fn init_remote(
) -> anyhow::Result<(Arc<Client>, RootTarget)> {
let bucket_region = Region::new(bucket_config.region);
let delimiter = "/".to_string();
let s3_client = Arc::new(init_s3_client(bucket_config.sso_account_id, bucket_region));
let s3_client = Arc::new(init_s3_client(bucket_region));
let s3_root = match node_kind {
NodeKind::Pageserver => RootTarget::Pageserver(S3Target {