From aca221ac8bbcbef98f6e31b4eca889066ea9edee Mon Sep 17 00:00:00 2001 From: Sergey Melnikov Date: Wed, 16 Nov 2022 15:54:55 +0300 Subject: [PATCH 01/22] Switch old staging to new etcd (#2834) --- .github/ansible/staging.hosts.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ansible/staging.hosts.yaml b/.github/ansible/staging.hosts.yaml index 44d971455d..ae55f9223c 100644 --- a/.github/ansible/staging.hosts.yaml +++ b/.github/ansible/staging.hosts.yaml @@ -3,7 +3,7 @@ storage: bucket_name: zenith-staging-storage-us-east-1 bucket_region: us-east-1 console_mgmt_base_url: http://console-staging.local - etcd_endpoints: zenith-us-stage-etcd.local:2379 + etcd_endpoints: etcd-0.us-east-2.aws.neon.build:2379 pageserver_config_stub: pg_distrib_dir: /usr/local remote_storage: From 9ae4da4f31bbd871b2882c3f146860fdef511c39 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 16 Nov 2022 15:07:48 +0200 Subject: [PATCH 02/22] Silence test failure caused by race condition between GC and detach. Thanks to the race condition, GC sometimes fails with "no such file or directory" error, if the tenant is detached concurrently. That's a known issue, but it didn't cause test failures until we started to check for unexpected ERRORs in the log in commit 46d30bf054. We should fix the race condition, of course, but until we do, let's silence the failures. --- test_runner/regress/test_tenant_detach.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index f66bacc4f7..f049b9af20 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -21,6 +21,9 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): pageserver_http = env.pageserver.http_client() env.pageserver.allowed_errors.append(".*NotFound\\(Tenant .* not found in the local state") + # FIXME: we have a race condition between GC and detach. GC might fail with this + # error. Similar to https://github.com/neondatabase/neon/issues/2671 + env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*") # first check for non existing tenant tenant_id = TenantId.generate() From 5184685cedb3356918ef77f51682ef1a6c23d4d0 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Wed, 16 Nov 2022 17:10:36 +0200 Subject: [PATCH 03/22] Revert "Introduce aws-sdk-rust as rusoto S3 replacement (#2802)" (#2837) Despite tests working, on staging the library started to fail with the following error: ``` Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 2022-11-16T11:53:37.191211Z INFO init_tenant_mgr:local_tenant_timeline_files: Collected files for 16 tenants Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: thread 'main' panicked at 'A connector was not available. Either set a custom connector or enable the `rustls` and `native-tls` crate featu> Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: stack backtrace: Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 0: rust_begin_unwind Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/std/src/panicking.rs:584:5 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 1: core::panicking::panic_fmt Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/panicking.rs:142:14 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 2: core::panicking::panic_display Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/panicking.rs:72:5 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 3: core::panicking::panic_str Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/panicking.rs:56:5 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 4: core::option::expect_failed Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/option.rs:1854:5 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 5: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 6: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 7: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 8: ::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 9: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 10: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 11: ::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 12: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 13: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 14: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 15: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/future.rs:124:9 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 16: as tower_service::Service Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/aws-smithy-http-tower-0.51.0/src/parse_response.rs:109:34 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 17: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/mod.rs:91:19 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 18: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/tracing-0.1.37/src/instrument.rs:272:9 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 19: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/future.rs:124:9 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 20: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/aws-smithy-client-0.51.0/src/timeout.rs:189:70 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 21: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/tower-0.4.13/src/retry/future.rs:77:41 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 22: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/aws-smithy-client-0.51.0/src/timeout.rs:189:70 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 23: aws_smithy_client::Client::call_raw::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/aws-smithy-client-0.51.0/src/lib.rs:227:56 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 24: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/mod.rs:91:19 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 25: aws_smithy_client::Client::call::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/aws-smithy-client-0.51.0/src/lib.rs:184:29 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 26: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/mod.rs:91:19 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 27: aws_sdk_s3::client::fluent_builders::GetObject::send::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/aws-sdk-s3-0.21.0/src/client.rs:7735:40 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 28: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/mod.rs:91:19 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 29: remote_storage::s3_bucket::S3Bucket::download_object::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at libs/remote_storage/src/s3_bucket.rs:205:20 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 30: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/mod.rs:91:19 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 31: ::download::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at libs/remote_storage/src/s3_bucket.rs:399:11 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 32: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/mod.rs:91:19 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 33: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/future.rs:124:9 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 34: remote_storage::GenericRemoteStorage::download_storage_object::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at libs/remote_storage/src/lib.rs:264:55 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 35: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/mod.rs:91:19 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 36: pageserver::storage_sync::download::download_index_part::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at pageserver/src/storage_sync/download.rs:148:57 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 37: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/mod.rs:91:19 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 38: pageserver::storage_sync::download::download_index_parts::{{closure}}::{{closure}}::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at pageserver/src/storage_sync/download.rs:77:75 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 39: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/mod.rs:91:19 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 40: as futures_core::stream::Stream>::poll_next Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.24/src/stream/futures_unordered/mod.rs:514:17 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 41: futures_util::stream::stream::StreamExt::poll_next_unpin Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.24/src/stream/stream/mod.rs:1626:9 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 42: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.24/src/stream/stream/next.rs:32:9 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 43: pageserver::storage_sync::download::download_index_parts::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at pageserver/src/storage_sync/download.rs:80:69 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 44: as core::future::future::Future>::poll Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/future/mod.rs:91:19 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 45: tokio::park::thread::CachedParkThread::block_on::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.21.1/src/park/thread.rs:267:54 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 46: tokio::coop::with_budget::{{closure}} Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.21.1/src/coop.rs:102:9 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 47: std::thread::local::LocalKey::try_with Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/std/src/thread/local.rs:445:16 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 48: std::thread::local::LocalKey::with Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/std/src/thread/local.rs:421:9 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 49: tokio::coop::with_budget Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.21.1/src/coop.rs:95:5 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 50: tokio::coop::budget Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.21.1/src/coop.rs:72:5 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 51: tokio::park::thread::CachedParkThread::block_on Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.21.1/src/park/thread.rs:267:31 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 52: tokio::runtime::enter::Enter::block_on Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.21.1/src/runtime/enter.rs:152:13 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 53: tokio::runtime::scheduler::multi_thread::MultiThread::block_on Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.21.1/src/runtime/scheduler/multi_thread/mod.rs:79:9 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 54: tokio::runtime::Runtime::block_on Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /home/nonroot/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.21.1/src/runtime/mod.rs:492:44 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 55: pageserver::storage_sync::spawn_storage_sync_task Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at pageserver/src/storage_sync.rs:656:34 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 56: pageserver::tenant_mgr::init_tenant_mgr Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at pageserver/src/tenant_mgr.rs:88:13 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 57: pageserver::start_pageserver Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at pageserver/src/bin/pageserver.rs:269:9 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 58: pageserver::main Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at pageserver/src/bin/pageserver.rs:103:5 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: 59: core::ops::function::FnOnce::call_once Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: at /rustc/e092d0b6b43f2de967af0887873151bb1c0b18d3/library/core/src/ops/function.rs:248:5 Nov 16 11:53:37 pageserver-0.us-east-2.aws.neon.build pageserver[481974]: note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace. ``` Feels like better testing on the env is needed later, maybe more e2e tests have to be written (albeit we have download tests, so something else happens here, tls issues?) --- Cargo.lock | 676 ++++++++++++--------------- libs/remote_storage/Cargo.toml | 7 +- libs/remote_storage/src/s3_bucket.rs | 178 ++++--- workspace_hack/Cargo.toml | 6 +- 4 files changed, 389 insertions(+), 478 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a34a5b44f9..c112c05188 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -169,333 +169,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" -[[package]] -name = "aws-config" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56a636c44c77fa18bdba56126a34d30cfe5538fe88f7d34988fa731fee143ddd" -dependencies = [ - "aws-http", - "aws-sdk-sso", - "aws-sdk-sts", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-json", - "aws-smithy-types", - "aws-types", - "bytes", - "hex", - "http", - "hyper", - "ring", - "time 0.3.15", - "tokio", - "tower", - "tracing", - "zeroize", -] - -[[package]] -name = "aws-endpoint" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ca8f374874f6459aaa88dc861d7f5d834ca1ff97668eae190e97266b5f6c3fb" -dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "aws-types", - "http", - "regex", - "tracing", -] - -[[package]] -name = "aws-http" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78d41e19e779b73463f5f0c21b3aacc995f4ba783ab13a7ae9f5dfb159a551b4" -dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "aws-types", - "bytes", - "http", - "http-body", - "lazy_static", - "percent-encoding", - "pin-project-lite", - "tracing", -] - -[[package]] -name = "aws-sdk-s3" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9f08665c8e03aca8cb092ef01e617436ebfa977fddc1240e1b062488ab5d48a" -dependencies = [ - "aws-endpoint", - "aws-http", - "aws-sig-auth", - "aws-sigv4", - "aws-smithy-async", - "aws-smithy-checksums", - "aws-smithy-client", - "aws-smithy-eventstream", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-types", - "aws-smithy-xml", - "aws-types", - "bytes", - "bytes-utils", - "http", - "http-body", - "tokio-stream", - "tower", - "tracing", -] - -[[package]] -name = "aws-sdk-sso" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86dcb1cb71aa8763b327542ead410424515cff0cde5b753eedd2917e09c63734" -dependencies = [ - "aws-endpoint", - "aws-http", - "aws-sig-auth", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-json", - "aws-smithy-types", - "aws-types", - "bytes", - "http", - "tokio-stream", - "tower", -] - -[[package]] -name = "aws-sdk-sts" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdfcf584297c666f6b472d5368a78de3bc714b6e0a53d7fbf76c3e347c292ab1" -dependencies = [ - "aws-endpoint", - "aws-http", - "aws-sig-auth", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-query", - "aws-smithy-types", - "aws-smithy-xml", - "aws-types", - "bytes", - "http", - "tower", -] - -[[package]] -name = "aws-sig-auth" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12cbe7b2be9e185c1fbce27fc9c41c66b195b32d89aa099f98768d9544221308" -dependencies = [ - "aws-sigv4", - "aws-smithy-eventstream", - "aws-smithy-http", - "aws-types", - "http", - "tracing", -] - -[[package]] -name = "aws-sigv4" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ff4cff8c4a101962d593ba94e72cd83891aecd423f0c6e3146bff6fb92c9e3" -dependencies = [ - "aws-smithy-eventstream", - "aws-smithy-http", - "bytes", - "form_urlencoded", - "hex", - "http", - "once_cell", - "percent-encoding", - "regex", - "ring", - "time 0.3.15", - "tracing", -] - -[[package]] -name = "aws-smithy-async" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b3442b4c5d3fc39891a2e5e625735fba6b24694887d49c6518460fde98247a9" -dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", - "tokio-stream", -] - -[[package]] -name = "aws-smithy-checksums" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc227e36e346f45298288359f37123e1a92628d1cec6b11b5eb335553278bd9e" -dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "bytes", - "crc32c", - "crc32fast", - "hex", - "http", - "http-body", - "md-5", - "pin-project-lite", - "sha1", - "sha2", - "tracing", -] - -[[package]] -name = "aws-smithy-client" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff28d553714f8f54cd921227934fc13a536a1c03f106e56b362fd57e16d450ad" -dependencies = [ - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-types", - "bytes", - "fastrand", - "http", - "http-body", - "hyper", - "hyper-rustls", - "lazy_static", - "pin-project-lite", - "tokio", - "tower", - "tracing", -] - -[[package]] -name = "aws-smithy-eventstream" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7ea0df7161ce65b5c8ca6eb709a1a907376fa18226976e41c748ce02ccccf24" -dependencies = [ - "aws-smithy-types", - "bytes", - "crc32fast", -] - -[[package]] -name = "aws-smithy-http" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf58ed4fefa61dbf038e5421a521cbc2c448ef69deff0ab1d915d8a10eda5664" -dependencies = [ - "aws-smithy-eventstream", - "aws-smithy-types", - "bytes", - "bytes-utils", - "futures-core", - "http", - "http-body", - "hyper", - "once_cell", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "aws-smithy-http-tower" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20c96d7bd35e7cf96aca1134b2f81b1b59ffe493f7c6539c051791cbbf7a42d3" -dependencies = [ - "aws-smithy-http", - "bytes", - "http", - "http-body", - "pin-project-lite", - "tower", - "tracing", -] - -[[package]] -name = "aws-smithy-json" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8324ba98c8a94187723cc16c37aefa09504646ee65c3d2c3af495bab5ea701b" -dependencies = [ - "aws-smithy-types", -] - -[[package]] -name = "aws-smithy-query" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83834ed2ff69ea6f6657baf205267dc2c0abe940703503a3e5d60ce23be3d306" -dependencies = [ - "aws-smithy-types", - "urlencoding", -] - -[[package]] -name = "aws-smithy-types" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b02e06ea63498c43bc0217ea4d16605d4e58d85c12fc23f6572ff6d0a840c61" -dependencies = [ - "itoa", - "num-integer", - "ryu", - "time 0.3.15", -] - -[[package]] -name = "aws-smithy-xml" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "246e9f83dd1fdf5d347fa30ae4ad30a9d1d42ce4cd74a93d94afa874646f94cd" -dependencies = [ - "xmlparser", -] - -[[package]] -name = "aws-types" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05701d32da168b44f7ee63147781aed8723e792cc131cb9b18363b5393f17f70" -dependencies = [ - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-types", - "http", - "rustc_version 0.4.0", - "tracing", - "zeroize", -] - [[package]] name = "axum" version = "0.5.16" @@ -626,6 +299,15 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array", +] + [[package]] name = "block-buffer" version = "0.10.3" @@ -674,16 +356,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bytes-utils" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e47d3a8076e283f3acd27400535992edb3ba4b5bb72f8891ad8fbe7932a7d4b9" -dependencies = [ - "bytes", - "either", -] - [[package]] name = "cast" version = "0.3.0" @@ -1118,6 +790,16 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-mac" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1d1a86f49236c215f271d40892d5fc950490551400b02ef360692c29815c714" +dependencies = [ + "generic-array", + "subtle", +] + [[package]] name = "cxx" version = "1.0.79" @@ -1226,17 +908,47 @@ dependencies = [ "rusticata-macros", ] +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + [[package]] name = "digest" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adfbc57365a37acbd2ebf2b64d7e69bb766e2fea813521ed536f5d0520dcf86c" dependencies = [ - "block-buffer", + "block-buffer 0.10.3", "crypto-common", "subtle", ] +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "displaydoc" version = "0.2.3" @@ -1381,6 +1093,21 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.1.0" @@ -1645,13 +1372,23 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ebdb29d2ea9ed0083cd8cece49bbd968021bd99b0849edb4a9a7ee0fdf6a4e0" +[[package]] +name = "hmac" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2a2320eb7ec0ebe8da8f744d7812d9fc4cb4d09344ac01898dbcb6a20ae69b" +dependencies = [ + "crypto-mac", + "digest 0.9.0", +] + [[package]] name = "hmac" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" dependencies = [ - "digest", + "digest 0.10.5", ] [[package]] @@ -1742,9 +1479,7 @@ checksum = "d87c48c02e0dc5e3b849a2041db3029fd066650f8f717c07bf8ed78ccb895cac" dependencies = [ "http", "hyper", - "log", "rustls", - "rustls-native-certs", "tokio", "tokio-rustls", ] @@ -1761,6 +1496,19 @@ dependencies = [ "tokio-io-timeout", ] +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + [[package]] name = "iana-time-zone" version = "0.1.51" @@ -2001,13 +1749,24 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73cbba799671b762df5a175adf59ce145165747bb891505c43d09aefbbf38beb" +[[package]] +name = "md-5" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" +dependencies = [ + "block-buffer 0.9.0", + "digest 0.9.0", + "opaque-debug", +] + [[package]] name = "md-5" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6365506850d44bff6e2fbcb5176cf63650e48bd45ef2fe2665ae1570e0f4b9ca" dependencies = [ - "digest", + "digest 0.10.5", ] [[package]] @@ -2098,6 +1857,24 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +[[package]] +name = "native-tls" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd7e2f3618557f980e0b17e8856252eee3c97fa12c54dff0ca290fb6266ca4a9" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "nb" version = "0.1.3" @@ -2268,12 +2045,57 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + +[[package]] +name = "openssl" +version = "0.10.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12fc0523e3bd51a692c8850d075d74dc062ccf251c0110668cbd921917118a13" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "openssl-probe" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +[[package]] +name = "openssl-sys" +version = "0.9.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5230151e44c0f05157effb743e8d517472843121cf9243e8b81393edb5acd9ce" +dependencies = [ + "autocfg", + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "os_str_bytes" version = "6.3.0" @@ -2493,6 +2315,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" + [[package]] name = "plotters" version = "0.3.4" @@ -2543,12 +2371,12 @@ dependencies = [ "byteorder", "bytes", "fallible-iterator", - "hmac", + "hmac 0.12.1", "lazy_static", - "md-5", + "md-5 0.10.5", "memchr", "rand", - "sha2", + "sha2 0.10.6", "stringprep", ] @@ -2750,7 +2578,7 @@ dependencies = [ "git-version", "hashbrown", "hex", - "hmac", + "hmac 0.12.1", "hyper", "itertools", "md5", @@ -2769,7 +2597,7 @@ dependencies = [ "scopeguard", "serde", "serde_json", - "sha2", + "sha2 0.10.6", "socket2", "thiserror", "tokio", @@ -2888,6 +2716,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom", + "redox_syscall", + "thiserror", +] + [[package]] name = "regex" version = "1.6.0" @@ -2920,13 +2759,10 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "aws-config", - "aws-sdk-s3", - "aws-smithy-http", - "aws-types", - "hyper", "metrics", "once_cell", + "rusoto_core", + "rusoto_s3", "serde", "serde_json", "tempfile", @@ -3080,6 +2916,88 @@ dependencies = [ "syn", ] +[[package]] +name = "rusoto_core" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1db30db44ea73551326269adcf7a2169428a054f14faf9e1768f2163494f2fa2" +dependencies = [ + "async-trait", + "base64", + "bytes", + "crc32fast", + "futures", + "http", + "hyper", + "hyper-tls", + "lazy_static", + "log", + "rusoto_credential", + "rusoto_signature", + "rustc_version 0.4.0", + "serde", + "serde_json", + "tokio", + "xml-rs", +] + +[[package]] +name = "rusoto_credential" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee0a6c13db5aad6047b6a44ef023dbbc21a056b6dab5be3b79ce4283d5c02d05" +dependencies = [ + "async-trait", + "chrono", + "dirs-next", + "futures", + "hyper", + "serde", + "serde_json", + "shlex", + "tokio", + "zeroize", +] + +[[package]] +name = "rusoto_s3" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aae4677183411f6b0b412d66194ef5403293917d66e70ab118f07cc24c5b14d" +dependencies = [ + "async-trait", + "bytes", + "futures", + "rusoto_core", + "xml-rs", +] + +[[package]] +name = "rusoto_signature" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ae95491c8b4847931e291b151127eccd6ff8ca13f33603eb3d0035ecb05272" +dependencies = [ + "base64", + "bytes", + "chrono", + "digest 0.9.0", + "futures", + "hex", + "hmac 0.11.0", + "http", + "hyper", + "log", + "md-5 0.9.1", + "percent-encoding", + "pin-project-lite", + "rusoto_credential", + "rustc_version 0.4.0", + "serde", + "sha2 0.9.9", + "tokio", +] + [[package]] name = "rustc-demangle" version = "0.1.21" @@ -3131,18 +3049,6 @@ dependencies = [ "webpki", ] -[[package]] -name = "rustls-native-certs" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0167bac7a9f490495f3c33013e7722b53cb087ecbe082fb0c6387c96f634ea50" -dependencies = [ - "openssl-probe", - "rustls-pemfile", - "schannel", - "security-framework", -] - [[package]] name = "rustls-pemfile" version = "1.0.1" @@ -3384,14 +3290,16 @@ dependencies = [ ] [[package]] -name = "sha1" -version = "0.10.5" +name = "sha2" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" +checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" dependencies = [ + "block-buffer 0.9.0", "cfg-if", "cpufeatures", - "digest", + "digest 0.9.0", + "opaque-debug", ] [[package]] @@ -3402,7 +3310,7 @@ checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" dependencies = [ "cfg-if", "cpufeatures", - "digest", + "digest 0.10.5", ] [[package]] @@ -3791,6 +3699,16 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-postgres" version = "0.7.6" @@ -4128,12 +4046,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "urlencoding" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8db7427f936968176eaa7cdf81b7f98b980b18495ec28f1b5791ac3bfe3eea9" - [[package]] name = "utils" version = "0.1.0" @@ -4201,6 +4113,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77439c1b53d2303b20d9459b1ade71a83c716e3f9c34f3228c00e6f185d6c002" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.4" @@ -4466,6 +4384,7 @@ dependencies = [ "ahash", "anyhow", "bytes", + "chrono", "clap 4.0.15", "crossbeam-utils", "either", @@ -4494,7 +4413,6 @@ dependencies = [ "time 0.3.15", "tokio", "tokio-util", - "tower", "tracing", "tracing-core", ] @@ -4527,10 +4445,10 @@ dependencies = [ ] [[package]] -name = "xmlparser" -version = "0.13.5" +name = "xml-rs" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd" +checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" [[package]] name = "yasna" diff --git a/libs/remote_storage/Cargo.toml b/libs/remote_storage/Cargo.toml index 85a1104c46..f54d91905c 100644 --- a/libs/remote_storage/Cargo.toml +++ b/libs/remote_storage/Cargo.toml @@ -9,11 +9,8 @@ async-trait = "0.1" metrics = { version = "0.1", path = "../metrics" } utils = { version = "0.1", path = "../utils" } once_cell = "1.13.0" -aws-smithy-http = "0.51.0" -aws-types = "0.51.0" -aws-config = { version = "0.51.0", default-features = false } -aws-sdk-s3 = "0.21.0" -hyper = { version = "0.14", features = ["stream"] } +rusoto_core = "0.48" +rusoto_s3 = "0.48" serde = { version = "1.0", features = ["derive"] } serde_json = "1" tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] } diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs index 3869ef8557..74632430cd 100644 --- a/libs/remote_storage/src/s3_bucket.rs +++ b/libs/remote_storage/src/s3_bucket.rs @@ -5,32 +5,27 @@ //! their bucket prefixes are both specified and different. use std::path::{Path, PathBuf}; -use std::sync::Arc; use anyhow::Context; -use aws_config::{ - environment::credentials::EnvironmentVariableCredentialsProvider, - imds::credentials::ImdsCredentialsProvider, meta::credentials::CredentialsProviderChain, +use rusoto_core::{ + credential::{InstanceMetadataProvider, StaticProvider}, + HttpClient, Region, RusotoError, }; -use aws_sdk_s3::{ - config::Config, - error::{GetObjectError, GetObjectErrorKind}, - types::{ByteStream, SdkError}, - Client, Endpoint, Region, +use rusoto_s3::{ + DeleteObjectRequest, GetObjectError, GetObjectRequest, ListObjectsV2Request, PutObjectRequest, + S3Client, StreamingBody, S3, }; -use aws_smithy_http::body::SdkBody; -use aws_types::credentials::SharedCredentialsProvider; -use hyper::Body; use tokio::{io, sync::Semaphore}; use tokio_util::io::ReaderStream; use tracing::debug; -use super::StorageMetadata; use crate::{ strip_path_prefix, Download, DownloadError, RemoteObjectId, RemoteStorage, S3Config, REMOTE_STORAGE_PREFIX_SEPARATOR, }; +use super::StorageMetadata; + pub(super) mod metrics { use metrics::{register_int_counter_vec, IntCounterVec}; use once_cell::sync::Lazy; @@ -121,7 +116,7 @@ fn download_destination( /// AWS S3 storage. pub struct S3Bucket { workdir: PathBuf, - client: Client, + client: S3Client, bucket_name: String, prefix_in_bucket: Option, // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded. @@ -130,13 +125,6 @@ pub struct S3Bucket { concurrency_limiter: Semaphore, } -#[derive(Default)] -struct GetObjectRequest { - bucket: String, - key: String, - range: Option, -} - impl S3Bucket { /// Creates the S3 storage, errors if incorrect AWS S3 configuration provided. pub fn new(aws_config: &S3Config, workdir: PathBuf) -> anyhow::Result { @@ -144,25 +132,43 @@ impl S3Bucket { "Creating s3 remote storage for S3 bucket {}", aws_config.bucket_name ); - let provider = CredentialsProviderChain::first_try( - "Environment", - EnvironmentVariableCredentialsProvider::new(), - ) - .or_else("IAM", ImdsCredentialsProvider::builder().build()); + let region = match aws_config.endpoint.clone() { + Some(custom_endpoint) => Region::Custom { + name: aws_config.bucket_region.clone(), + endpoint: custom_endpoint, + }, + None => aws_config + .bucket_region + .parse::() + .context("Failed to parse the s3 region from config")?, + }; + let request_dispatcher = HttpClient::new().context("Failed to create S3 http client")?; - let mut config_builder = Config::builder() - .region(Region::new(aws_config.bucket_region.clone())) - .credentials_provider(SharedCredentialsProvider::new(provider)); + let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").ok(); + let secret_access_key = std::env::var("AWS_SECRET_ACCESS_KEY").ok(); + // session token is used when authorizing through sso + // which is typically the case when testing locally on developer machine + let session_token = std::env::var("AWS_SESSION_TOKEN").ok(); - if let Some(custom_endpoint) = aws_config.endpoint.clone() { - let endpoint = Endpoint::immutable( - custom_endpoint - .parse() - .expect("Failed to parse S3 custom endpoint"), + let client = if access_key_id.is_none() && secret_access_key.is_none() { + debug!("Using IAM-based AWS access"); + S3Client::new_with(request_dispatcher, InstanceMetadataProvider::new(), region) + } else { + debug!( + "Using credentials-based AWS access. Session token is set: {}", + session_token.is_some() ); - config_builder.set_endpoint_resolver(Some(Arc::new(endpoint))); - } - let client = Client::from_conf(config_builder.build()); + S3Client::new_with( + request_dispatcher, + StaticProvider::new( + access_key_id.unwrap_or_default(), + secret_access_key.unwrap_or_default(), + session_token, + None, + ), + region, + ) + }; let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| { let mut prefix = prefix; @@ -196,33 +202,20 @@ impl S3Bucket { metrics::inc_get_object(); - let get_object = self - .client - .get_object() - .bucket(request.bucket) - .key(request.key) - .set_range(request.range) - .send() - .await; - - match get_object { - Ok(object_output) => { - let metadata = object_output.metadata().cloned().map(StorageMetadata); - Ok(Download { - metadata, - download_stream: Box::pin(io::BufReader::new( - object_output.body.into_async_read(), - )), - }) - } - Err(SdkError::ServiceError { - err: - GetObjectError { - kind: GetObjectErrorKind::NoSuchKey(..), - .. - }, - .. - }) => Err(DownloadError::NotFound), + match self.client.get_object(request).await { + Ok(object_output) => match object_output.body { + None => { + metrics::inc_get_object_fail(); + Err(DownloadError::Other(anyhow::anyhow!( + "Got no body for the S3 object given" + ))) + } + Some(body) => Ok(Download { + metadata: object_output.metadata.map(StorageMetadata), + download_stream: Box::pin(io::BufReader::new(body.into_async_read())), + }), + }, + Err(RusotoError::Service(GetObjectError::NoSuchKey(_))) => Err(DownloadError::NotFound), Err(e) => { metrics::inc_get_object_fail(); Err(DownloadError::Other(anyhow::anyhow!( @@ -268,11 +261,12 @@ impl RemoteStorage for S3Bucket { let fetch_response = self .client - .list_objects_v2() - .bucket(self.bucket_name.clone()) - .set_prefix(self.prefix_in_bucket.clone()) - .set_continuation_token(continuation_token) - .send() + .list_objects_v2(ListObjectsV2Request { + bucket: self.bucket_name.clone(), + prefix: self.prefix_in_bucket.clone(), + continuation_token, + ..ListObjectsV2Request::default() + }) .await .map_err(|e| { metrics::inc_list_objects_fail(); @@ -328,12 +322,13 @@ impl RemoteStorage for S3Bucket { let fetch_response = self .client - .list_objects_v2() - .bucket(self.bucket_name.clone()) - .set_prefix(list_prefix.clone()) - .set_continuation_token(continuation_token) - .delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()) - .send() + .list_objects_v2(ListObjectsV2Request { + bucket: self.bucket_name.clone(), + prefix: list_prefix.clone(), + continuation_token, + delimiter: Some(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()), + ..ListObjectsV2Request::default() + }) .await .map_err(|e| { metrics::inc_list_objects_fail(); @@ -371,18 +366,17 @@ impl RemoteStorage for S3Bucket { .context("Concurrency limiter semaphore got closed during S3 upload")?; metrics::inc_put_object(); - - let body = Body::wrap_stream(ReaderStream::new(from)); - let bytes_stream = ByteStream::new(SdkBody::from(body)); - self.client - .put_object() - .bucket(self.bucket_name.clone()) - .key(to.0.to_owned()) - .set_metadata(metadata.map(|m| m.0)) - .content_length(from_size_bytes.try_into()?) - .body(bytes_stream) - .send() + .put_object(PutObjectRequest { + body: Some(StreamingBody::new_with_size( + ReaderStream::new(from), + from_size_bytes, + )), + bucket: self.bucket_name.clone(), + key: to.0.to_owned(), + metadata: metadata.map(|m| m.0), + ..PutObjectRequest::default() + }) .await .map_err(|e| { metrics::inc_put_object_fail(); @@ -418,6 +412,7 @@ impl RemoteStorage for S3Bucket { bucket: self.bucket_name.clone(), key: from.0.to_owned(), range, + ..GetObjectRequest::default() }) .await } @@ -432,10 +427,11 @@ impl RemoteStorage for S3Bucket { metrics::inc_delete_object(); self.client - .delete_object() - .bucket(self.bucket_name.clone()) - .key(remote_object_id.0.to_owned()) - .send() + .delete_object(DeleteObjectRequest { + bucket: self.bucket_name.clone(), + key: remote_object_id.0.to_owned(), + ..DeleteObjectRequest::default() + }) .await .map_err(|e| { metrics::inc_delete_object_fail(); @@ -604,7 +600,7 @@ mod tests { fn dummy_storage(workdir: PathBuf) -> S3Bucket { S3Bucket { workdir, - client: Client::new(&aws_config::SdkConfig::builder().build()), + client: S3Client::new("us-east-1".parse().unwrap()), bucket_name: "dummy-bucket".to_string(), prefix_in_bucket: Some("dummy_prefix/".to_string()), concurrency_limiter: Semaphore::new(1), diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index 67f9b19446..2daa08c9b6 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -16,13 +16,14 @@ publish = false ahash = { version = "0.7", features = ["std"] } anyhow = { version = "1", features = ["backtrace", "std"] } bytes = { version = "1", features = ["serde", "std"] } +chrono = { version = "0.4", features = ["clock", "iana-time-zone", "js-sys", "oldtime", "serde", "std", "time", "wasm-bindgen", "wasmbind", "winapi"] } clap = { version = "4", features = ["color", "error-context", "help", "std", "string", "suggestions", "usage"] } crossbeam-utils = { version = "0.8", features = ["once_cell", "std"] } either = { version = "1", features = ["use_std"] } fail = { version = "0.5", default-features = false, features = ["failpoints"] } futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] } futures-task = { version = "0.3", default-features = false, features = ["alloc", "std"] } -futures-util = { version = "0.3", features = ["alloc", "async-await", "async-await-macro", "channel", "futures-channel", "futures-io", "futures-macro", "futures-sink", "io", "memchr", "sink", "slab", "std"] } +futures-util = { version = "0.3", default-features = false, features = ["alloc", "async-await", "async-await-macro", "channel", "futures-channel", "futures-io", "futures-macro", "futures-sink", "io", "memchr", "sink", "slab", "std"] } hashbrown = { version = "0.12", features = ["ahash", "inline-more", "raw"] } indexmap = { version = "1", default-features = false, features = ["std"] } libc = { version = "0.2", features = ["extra_traits", "std"] } @@ -30,7 +31,7 @@ log = { version = "0.4", default-features = false, features = ["serde", "std"] } memchr = { version = "2", features = ["std"] } nom = { version = "7", features = ["alloc", "std"] } num-bigint = { version = "0.4", features = ["std"] } -num-integer = { version = "0.1", features = ["i128", "std"] } +num-integer = { version = "0.1", default-features = false, features = ["i128", "std"] } num-traits = { version = "0.2", features = ["i128", "libm", "std"] } prost = { version = "0.10", features = ["prost-derive", "std"] } rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] } @@ -43,7 +44,6 @@ stable_deref_trait = { version = "1", features = ["alloc", "std"] } time = { version = "0.3", features = ["alloc", "formatting", "itoa", "macros", "parsing", "std", "time-macros"] } tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "process", "rt", "rt-multi-thread", "signal-hook-registry", "socket2", "sync", "time", "tokio-macros"] } tokio-util = { version = "0.7", features = ["codec", "io", "io-util", "tracing"] } -tower = { version = "0.4", features = ["__common", "balance", "buffer", "discover", "futures-core", "futures-util", "indexmap", "limit", "load", "log", "make", "pin-project", "pin-project-lite", "rand", "ready-cache", "retry", "slab", "timeout", "tokio", "tokio-util", "tracing", "util"] } tracing = { version = "0.1", features = ["attributes", "log", "std", "tracing-attributes"] } tracing-core = { version = "0.1", features = ["once_cell", "std"] } From 2b728bc69e7b1791676382e35886caf112590986 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 16 Nov 2022 15:14:36 +0000 Subject: [PATCH 04/22] test_forward_compatibility: fix path to pg_distrib_dir (#2826) Set correct `pg_distrib_dir` in `pageserver.toml` and in neon_local `config`. `test_forward_compatibility` shows flakiness during `neon_local pg start`, so hopefully, the patch will help. ``` 2022-11-15 16:07:34.091 GMT [13338] LOG: starting with zenith basebackup at LSN 0/A6A9310, prev 0/0 2022-11-15 16:07:34.091 GMT [13338] FATAL: cannot start in read-write mode from this base backup 2022-11-15 16:07:34.091 GMT [13337] LOG: startup process (PID 13338) exited with exit code 1 ``` --- test_runner/regress/test_compatibility.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 9ad8cd393f..6b3324b7a7 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -2,7 +2,7 @@ import os import shutil import subprocess from pathlib import Path -from typing import Any +from typing import Any, Optional import pytest import toml # TODO: replace with tomllib for Python >= 3.11 @@ -160,6 +160,7 @@ def test_forward_compatibility( from_dir=compatibility_snapshot_dir, to_dir=test_output_dir / "compatibility_snapshot", port_distributor=port_distributor, + pg_distrib_dir=compatibility_postgres_distrib_dir, ) breaking_changes_allowed = ( @@ -189,7 +190,12 @@ def test_forward_compatibility( ), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage" -def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistributor): +def prepare_snapshot( + from_dir: Path, + to_dir: Path, + port_distributor: PortDistributor, + pg_distrib_dir: Optional[Path] = None, +): assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist" assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory" assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql" @@ -214,7 +220,7 @@ def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistrib # Update paths and ports in config files pageserver_toml = repo_dir / "pageserver.toml" pageserver_config = toml.load(pageserver_toml) - pageserver_config["remote_storage"]["local_path"] = repo_dir / "local_fs_remote_storage" + pageserver_config["remote_storage"]["local_path"] = str(repo_dir / "local_fs_remote_storage") pageserver_config["listen_http_addr"] = port_distributor.replace_with_new_port( pageserver_config["listen_http_addr"] ) @@ -225,6 +231,9 @@ def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistrib port_distributor.replace_with_new_port(ep) for ep in pageserver_config["broker_endpoints"] ] + if pg_distrib_dir: + pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir) + with pageserver_toml.open("w") as f: toml.dump(pageserver_config, f) @@ -244,7 +253,10 @@ def prepare_snapshot(from_dir: Path, to_dir: Path, port_distributor: PortDistrib sk["http_port"] = port_distributor.replace_with_new_port(sk["http_port"]) sk["pg_port"] = port_distributor.replace_with_new_port(sk["pg_port"]) - with (snapshot_config_toml).open("w") as f: + if pg_distrib_dir: + snapshot_config["pg_distrib_dir"] = str(pg_distrib_dir) + + with snapshot_config_toml.open("w") as f: toml.dump(snapshot_config, f) # Ensure that snapshot doesn't contain references to the original path From 150bddb9298e6b9bd3ac4a28d0d2eec029e53eea Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 16 Nov 2022 18:56:39 +0200 Subject: [PATCH 05/22] Clean up process start/stop handling * Poll more frequently when waiting for process start/stop. This speeds up startup and shutdown in tests. We did this already in commit 52ce1c9d53, which reduced the interval to 100 ms, but it was inadvertently increased back to 500 ms in commit d42700280f. Reduce it to 100 ms again, for both start and stop operations. * Harmonize the start and stop loops, printing the dots and notices the same way in both. I considered extracting the logic to a separate retry-function that takes a closure as argument that does the polling, but as long as we only have two copies, the code duplication isn't that bad. * Remove newline after "Starting pageserver" and "Starting etcd" messages, so that the progress-indicator dots that are printed once a second are printed on the same line. Before: Starting pageserver at '127.0.0.1:64000' in '.neon' ... pageserver started, pid: 2538937 After: Starting pageserver at '127.0.0.1:64000' in '.neon'... pageserver started, pid: 2538937 The "Starting safekeeper" message already got this right. * Update example output in README.md to match --- README.md | 23 ++++++------ control_plane/src/background_process.rs | 50 +++++++++++++++++-------- control_plane/src/etcd.rs | 2 +- control_plane/src/pageserver.rs | 2 +- 4 files changed, 47 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index e9c30668e0..01de24cdf6 100644 --- a/README.md +++ b/README.md @@ -125,24 +125,23 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r # Create repository in .neon with proper paths to binaries and data # Later that would be responsibility of a package install script > ./target/debug/neon_local init -Starting pageserver at '127.0.0.1:64000' in '.neon' - -Pageserver started -Successfully initialized timeline 7dd0907914ac399ff3be45fb252bfdb7 -Stopping pageserver gracefully...done! +Starting pageserver at '127.0.0.1:64000' in '.neon'. +pageserver started, pid: 2545906 +Successfully initialized timeline de200bd42b49cc1814412c7e592dd6e9 +Stopped pageserver 1 process with pid 2545906 # start pageserver and safekeeper > ./target/debug/neon_local start -Starting etcd broker using /usr/bin/etcd -Starting pageserver at '127.0.0.1:64000' in '.neon' - -Pageserver started -Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1' -Safekeeper started +Starting etcd broker using "/usr/bin/etcd" +etcd started, pid: 2545996 +Starting pageserver at '127.0.0.1:64000' in '.neon'. +pageserver started, pid: 2546005 +Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'. +safekeeper 1 started, pid: 2546041 # start postgres compute node > ./target/debug/neon_local pg start main -Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ... +Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ... Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432 Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres' diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs index 2f8098b7c9..c558c09854 100644 --- a/control_plane/src/background_process.rs +++ b/control_plane/src/background_process.rs @@ -26,8 +26,18 @@ use nix::unistd::Pid; use utils::lock_file; -const RETRIES: u32 = 15; -const RETRY_TIMEOUT_MILLIS: u64 = 500; +// These constants control the loop used to poll for process start / stop. +// +// The loop waits for at most 10 seconds, polling every 100 ms. +// Once a second, it prints a dot ("."), to give the user an indication that +// it's waiting. If the process hasn't started/stopped after 5 seconds, +// it prints a notice that it's taking long, but keeps waiting. +// +const RETRY_UNTIL_SECS: u64 = 10; +const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS; +const RETRY_INTERVAL_MILLIS: u64 = 100; +const DOT_EVERY_RETRIES: u64 = 10; +const NOTICE_AFTER_RETRIES: u64 = 50; /// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates /// it itself. @@ -107,16 +117,16 @@ where return Ok(spawned_process); } Ok(false) => { - if retries < 5 { + if retries == NOTICE_AFTER_RETRIES { + // The process is taking a long time to start up. Keep waiting, but + // print a message + print!("\n{process_name} has not started yet, continuing to wait"); + } + if retries % DOT_EVERY_RETRIES == 0 { print!("."); io::stdout().flush().unwrap(); - } else { - if retries == 5 { - println!() // put a line break after dots for second message - } - println!("{process_name} has not started yet, retrying ({retries})..."); } - thread::sleep(Duration::from_millis(RETRY_TIMEOUT_MILLIS)); + thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS)); } Err(e) => { println!("{process_name} failed to start: {e:#}"); @@ -127,7 +137,8 @@ where } } } - anyhow::bail!("{process_name} could not start in {RETRIES} attempts"); + println!(); + anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds"); } /// Stops the process, using the pid file given. Returns Ok also if the process is already not running. @@ -158,7 +169,7 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any } // Wait until process is gone - for _ in 0..RETRIES { + for retries in 0..RETRIES { match process_has_stopped(pid) { Ok(true) => { println!("\n{process_name} stopped"); @@ -170,9 +181,16 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any return Ok(()); } Ok(false) => { - print!("."); - io::stdout().flush().unwrap(); - thread::sleep(Duration::from_secs(1)) + if retries == NOTICE_AFTER_RETRIES { + // The process is taking a long time to start up. Keep waiting, but + // print a message + print!("\n{process_name} has not stopped yet, continuing to wait"); + } + if retries % DOT_EVERY_RETRIES == 0 { + print!("."); + io::stdout().flush().unwrap(); + } + thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS)); } Err(e) => { println!("{process_name} with pid {pid} failed to stop: {e:#}"); @@ -180,8 +198,8 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any } } } - - anyhow::bail!("{process_name} with pid {pid} failed to stop in {RETRIES} attempts"); + println!(); + anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds"); } fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command { diff --git a/control_plane/src/etcd.rs b/control_plane/src/etcd.rs index 4c15914e24..60aa5da780 100644 --- a/control_plane/src/etcd.rs +++ b/control_plane/src/etcd.rs @@ -6,7 +6,7 @@ use crate::{background_process, local_env}; pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> { let etcd_broker = &env.etcd_broker; - println!( + print!( "Starting etcd broker using {:?}", etcd_broker.etcd_binary_path ); diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 18d6aee68d..aec6f5bc2c 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -237,7 +237,7 @@ impl PageServerNode { datadir: &Path, update_config: bool, ) -> anyhow::Result { - println!( + print!( "Starting pageserver at '{}' in '{}'", self.pg_connection_config.raw_address(), datadir.display() From 0a87d712945d08619b5fa51f4364f4deb102ec3e Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 16 Nov 2022 17:57:48 +0000 Subject: [PATCH 06/22] test_runner: make proxy mgmt port mandatory (#2839) Make `mgmt` port mandatory argument for `NeonProxy` (and set it for `static_proxy`) to avoid port collision when tests run in parallel. --- test_runner/fixtures/neon_fixtures.py | 5 ++++- test_runner/regress/test_proxy.py | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 051c140836..236330e16a 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -2073,9 +2073,9 @@ class NeonProxy(PgProtocol): self, proxy_port: int, http_port: int, + mgmt_port: int, neon_binpath: Path, auth_endpoint=None, - mgmt_port=None, ): super().__init__(dsn=auth_endpoint, port=proxy_port) self.host = "127.0.0.1" @@ -2099,6 +2099,7 @@ class NeonProxy(PgProtocol): str(self.neon_binpath / "proxy"), *["--http", f"{self.host}:{self.http_port}"], *["--proxy", f"{self.host}:{self.proxy_port}"], + *["--mgmt", f"{self.host}:{self.mgmt_port}"], *["--auth-backend", "postgres"], *["--auth-endpoint", self.auth_endpoint], ] @@ -2175,11 +2176,13 @@ def static_proxy( auth_endpoint = f"postgres://proxy:password@{host}:{port}/{dbname}" proxy_port = port_distributor.get_port() + mgmt_port = port_distributor.get_port() http_port = port_distributor.get_port() with NeonProxy( proxy_port=proxy_port, http_port=http_port, + mgmt_port=mgmt_port, neon_binpath=neon_binpath, auth_endpoint=auth_endpoint, ) as proxy: diff --git a/test_runner/regress/test_proxy.py b/test_runner/regress/test_proxy.py index b4647ebbe9..b8cfb21a5b 100644 --- a/test_runner/regress/test_proxy.py +++ b/test_runner/regress/test_proxy.py @@ -8,11 +8,11 @@ from fixtures.log_helper import log from fixtures.neon_fixtures import PSQL, NeonProxy, VanillaPostgres -def test_proxy_select_1(static_proxy): +def test_proxy_select_1(static_proxy: NeonProxy): static_proxy.safe_psql("select 1", options="project=generic-project-name") -def test_password_hack(static_proxy): +def test_password_hack(static_proxy: NeonProxy): user = "borat" password = "password" static_proxy.safe_psql( @@ -24,7 +24,7 @@ def test_password_hack(static_proxy): static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic) # Must also check that invalid magic won't be accepted. - with pytest.raises(psycopg2.errors.OperationalError): + with pytest.raises(psycopg2.OperationalError): magic = "broken" static_proxy.safe_psql("select 1", sslsni=0, user=user, password=magic) @@ -135,7 +135,7 @@ async def test_psql_session_id(vanilla_pg: VanillaPostgres, link_proxy: NeonProx # Pass extra options to the server. -def test_proxy_options(static_proxy): +def test_proxy_options(static_proxy: NeonProxy): with static_proxy.connect(options="project=irrelevant -cproxytest.option=value") as conn: with conn.cursor() as cur: cur.execute("SHOW proxytest.option") From 24d3ed09524b156e6726853aaa14902c695bf039 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 17 Nov 2022 01:24:37 +0200 Subject: [PATCH 07/22] Ignore another ERROR that's expected in test. Got a test failure in CI because of this. --- test_runner/regress/test_tenant_size.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 03e7129ff7..71778fa64c 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -263,6 +263,8 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder except PageserverApiException as e: # compaction is ok but just retry if this fails; related to #2442 if "cannot lock compaction critical section" in str(e): + # also ignore it in the log + env.pageserver.allowed_errors.append(".*cannot lock compaction critical section.*") time.sleep(1) continue raise From 4a60051b0d6c8fa7d4786b6dc0f6bf38cc082898 Mon Sep 17 00:00:00 2001 From: MMeent Date: Thu, 17 Nov 2022 15:31:34 +0100 Subject: [PATCH 08/22] Add codeowners section for /vendor/ (#2849) After this, consent of @neondatabase/compute is required to update the vendored PostgreSQL versions. --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/CODEOWNERS b/CODEOWNERS index 4c8c8924d6..6b1273520d 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -8,3 +8,4 @@ /pgxn/ @neondatabase/compute /proxy/ @neondatabase/control-plane /safekeeper/ @neondatabase/safekeepers +/vendor/ @neondatabase/compute From 60ac2271962490cb36b713bc5b2085c80e908801 Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Thu, 17 Nov 2022 16:48:21 +0200 Subject: [PATCH 09/22] Use modern flex and bison in macOS compilations (#2847) --- Makefile | 24 +++++++++++++----------- README.md | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 6e8b659171..4b7d349b00 100644 --- a/Makefile +++ b/Makefile @@ -20,18 +20,18 @@ else $(error Bad build type '$(BUILD_TYPE)', see Makefile for options) endif -# Seccomp BPF is only available for Linux UNAME_S := $(shell uname -s) ifeq ($(UNAME_S),Linux) + # Seccomp BPF is only available for Linux PG_CONFIGURE_OPTS += --with-libseccomp -endif - -# macOS with brew-installed openssl requires explicit paths -# It can be configured with OPENSSL_PREFIX variable -UNAME_S := $(shell uname -s) -ifeq ($(UNAME_S),Darwin) - OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3) - PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib +else ifeq ($(UNAME_S),Darwin) + # macOS with brew-installed openssl requires explicit paths + # It can be configured with OPENSSL_PREFIX variable + OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3) + PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib + # macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure + # brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage + EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/: endif # Use -C option so that when PostgreSQL "make install" installs the @@ -73,7 +73,8 @@ $(POSTGRES_INSTALL_DIR)/build/v14/config.status: +@echo "Configuring Postgres v14 build" mkdir -p $(POSTGRES_INSTALL_DIR)/build/v14 (cd $(POSTGRES_INSTALL_DIR)/build/v14 && \ - $(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure CFLAGS='$(PG_CFLAGS)' \ + env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v14/configure \ + CFLAGS='$(PG_CFLAGS)' \ $(PG_CONFIGURE_OPTS) \ --prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v14 > configure.log) @@ -81,7 +82,8 @@ $(POSTGRES_INSTALL_DIR)/build/v15/config.status: +@echo "Configuring Postgres v15 build" mkdir -p $(POSTGRES_INSTALL_DIR)/build/v15 (cd $(POSTGRES_INSTALL_DIR)/build/v15 && \ - $(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure CFLAGS='$(PG_CFLAGS)' \ + env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-v15/configure \ + CFLAGS='$(PG_CFLAGS)' \ $(PG_CONFIGURE_OPTS) \ --prefix=$(abspath $(POSTGRES_INSTALL_DIR))/v15 > configure.log) diff --git a/README.md b/README.md index 01de24cdf6..770c24d11f 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh 1. Install XCode and dependencies ``` xcode-select --install -brew install protobuf etcd openssl +brew install protobuf etcd openssl flex bison ``` 2. [Install Rust](https://www.rust-lang.org/tools/install) From fd99e0fbc4174ce2963091c73ed93a01b087a63c Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Fri, 18 Nov 2022 09:10:32 +0200 Subject: [PATCH 10/22] Build pg_prewrm extension (#2794) --- Makefile | 4 ++++ vendor/postgres-v14 | 2 +- vendor/postgres-v15 | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4b7d349b00..4711dc1c7d 100644 --- a/Makefile +++ b/Makefile @@ -113,6 +113,8 @@ postgres-v14: postgres-v14-configure \ $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14 MAKELEVEL=0 install +@echo "Compiling libpq v14" $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/src/interfaces/libpq install + +@echo "Compiling pg_prewarm v14" + $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_prewarm install +@echo "Compiling pg_buffercache v14" $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v14/contrib/pg_buffercache install +@echo "Compiling pageinspect v14" @@ -125,6 +127,8 @@ postgres-v15: postgres-v15-configure \ $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15 MAKELEVEL=0 install +@echo "Compiling libpq v15" $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/src/interfaces/libpq install + +@echo "Compiling pg_prewarm v15" + $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_prewarm install +@echo "Compiling pg_buffercache v15" $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/v15/contrib/pg_buffercache install +@echo "Compiling pageinspect v15" diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index cd0693e2be..360ff1c637 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit cd0693e2be224bedfa0b61f9c5e2ff4cd88eec2c +Subproject commit 360ff1c637a57d351a7a5a391d8e8afd8fde8c3a diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 1bf5e3f53c..d31b3f7c6d 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 1bf5e3f53cbb2f5b569ed2da6d014c245841c24e +Subproject commit d31b3f7c6d108e52c8bb11e812ce4e266501ea3d From dcb79ef08f77446564605c74a1018f7e97a4255e Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 17 Nov 2022 22:30:04 +0200 Subject: [PATCH 11/22] Silence yet another test failure from race condition between GC and delete. Another similar case to commit 9ae4da4f31. --- test_runner/regress/test_tenant_size.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 71778fa64c..d9aed351a5 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -166,6 +166,10 @@ def test_get_tenant_size_with_multiple_branches(neon_env_builder: NeonEnvBuilder env = neon_env_builder.init_start() + # FIXME: we have a race condition between GC and delete timeline. GC might fail with this + # error. Similar to https://github.com/neondatabase/neon/issues/2671 + env.pageserver.allowed_errors.append(".*InternalServerError\\(No such file or directory.*") + tenant_id = env.initial_tenant main_branch_name, main_timeline_id = env.neon_cli.list_timelines(tenant_id)[0] From 328ec1ce2469ee964f81e2b9b87fc61c290a70cd Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 18 Nov 2022 10:18:42 +0200 Subject: [PATCH 12/22] Print a more full error message, with stack trace, on GC failure. In a CI run, I got a test failure because of this error in the log, from the test_get_tenant_size_with_multiple_branches test: ERROR gc_loop{tenant_id=f1630516d4b526139836ced93be0c878}: Gc failed, retrying in 2s: No such file or directory (os error 2) There are known race conditions between GC and timeline deletion, which surely caused that error. But if we didn't know the cause, it would be pretty hard to debug without a stack trace. --- pageserver/src/tenant_tasks.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pageserver/src/tenant_tasks.rs b/pageserver/src/tenant_tasks.rs index 5a9c5aa3a5..d8a4d5521c 100644 --- a/pageserver/src/tenant_tasks.rs +++ b/pageserver/src/tenant_tasks.rs @@ -71,7 +71,7 @@ async fn compaction_loop(tenant_id: TenantId) { let mut sleep_duration = tenant.get_compaction_period(); if let Err(e) = tenant.compaction_iteration() { sleep_duration = wait_duration; - error!("Compaction failed, retrying in {:?}: {e:#}", sleep_duration); + error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration); } // Sleep @@ -120,7 +120,7 @@ async fn gc_loop(tenant_id: TenantId) { if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), false).await { sleep_duration = wait_duration; - error!("Gc failed, retrying in {:?}: {e:#}", sleep_duration); + error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration); } } From b9152f1ef45ccf64454b51442eaf6f2161e2c306 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Fri, 18 Nov 2022 15:04:58 +0200 Subject: [PATCH 13/22] Correctly terminate prefetch in case of pageserver restart (#2850) refer #2819 This patch requires deep knowledge of prefetch internals. So @MMeent please review it or suggest better solution. --- pgxn/neon/libpagestore.c | 13 +++--- pgxn/neon/pagestore_client.h | 5 +++ pgxn/neon/pagestore_smgr.c | 76 ++++++++++++++++++++++-------------- 3 files changed, 58 insertions(+), 36 deletions(-) diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index d8e9d8b52c..d7507e69df 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -32,11 +32,6 @@ #define PageStoreTrace DEBUG5 -#define NEON_TAG "[NEON_SMGR] " -#define neon_log(tag, fmt, ...) ereport(tag, \ - (errmsg(NEON_TAG fmt, ##__VA_ARGS__), \ - errhidestmt(true), errhidecontext(true))) - bool connected = false; PGconn *pageserver_conn = NULL; @@ -239,6 +234,9 @@ pageserver_receive(void) StringInfoData resp_buff; NeonResponse *resp; + if (!connected) + return NULL; + PG_TRY(); { /* read response */ @@ -248,7 +246,10 @@ pageserver_receive(void) if (resp_buff.len < 0) { if (resp_buff.len == -1) - neon_log(ERROR, "end of COPY"); + { + pageserver_disconnect(); + return NULL; + } else if (resp_buff.len == -2) neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn)); } diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h index 9b8081065c..170a0cb72d 100644 --- a/pgxn/neon/pagestore_client.h +++ b/pgxn/neon/pagestore_client.h @@ -49,6 +49,11 @@ typedef struct #define messageTag(m) (((const NeonMessage *)(m))->tag) +#define NEON_TAG "[NEON_SMGR] " +#define neon_log(tag, fmt, ...) ereport(tag, \ + (errmsg(NEON_TAG fmt, ##__VA_ARGS__), \ + errhidestmt(true), errhidecontext(true))) + /* * supertype of all the Neon*Request structs below * diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index d6fa7c46c9..d9b45e3933 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -251,9 +251,9 @@ XLogRecPtr prefetch_lsn = 0; static void consume_prefetch_responses(void); static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn); -static void prefetch_read(PrefetchRequest *slot); +static bool prefetch_read(PrefetchRequest *slot); static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn); -static void prefetch_wait_for(uint64 ring_index); +static bool prefetch_wait_for(uint64 ring_index); static void prefetch_cleanup(void); static inline void prefetch_set_unused(uint64 ring_index); @@ -393,7 +393,7 @@ prefetch_cleanup(void) * NOTE: this function may indirectly update MyPState->pfs_hash; which * invalidates any active pointers into the hash table. */ -static void +static bool prefetch_wait_for(uint64 ring_index) { PrefetchRequest *entry; @@ -412,8 +412,10 @@ prefetch_wait_for(uint64 ring_index) entry = GetPrfSlot(MyPState->ring_receive); Assert(entry->status == PRFS_REQUESTED); - prefetch_read(entry); + if (!prefetch_read(entry)) + return false; } + return true; } /* @@ -425,7 +427,7 @@ prefetch_wait_for(uint64 ring_index) * NOTE: this function may indirectly update MyPState->pfs_hash; which * invalidates any active pointers into the hash table. */ -static void +static bool prefetch_read(PrefetchRequest *slot) { NeonResponse *response; @@ -438,15 +440,22 @@ prefetch_read(PrefetchRequest *slot) old = MemoryContextSwitchTo(MyPState->errctx); response = (NeonResponse *) page_server->receive(); MemoryContextSwitchTo(old); - - /* update prefetch state */ - MyPState->n_responses_buffered += 1; - MyPState->n_requests_inflight -= 1; - MyPState->ring_receive += 1; + if (response) + { + /* update prefetch state */ + MyPState->n_responses_buffered += 1; + MyPState->n_requests_inflight -= 1; + MyPState->ring_receive += 1; - /* update slot state */ - slot->status = PRFS_RECEIVED; - slot->response = response; + /* update slot state */ + slot->status = PRFS_RECEIVED; + slot->response = response; + return true; + } + else + { + return false; + } } /* @@ -746,11 +755,16 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls static NeonResponse * page_server_request(void const *req) { - page_server->send((NeonRequest *) req); - page_server->flush(); - MyPState->ring_flush = MyPState->ring_unused; - consume_prefetch_responses(); - return page_server->receive(); + NeonResponse* resp; + do { + page_server->send((NeonRequest *) req); + page_server->flush(); + MyPState->ring_flush = MyPState->ring_unused; + consume_prefetch_responses(); + resp = page_server->receive(); + } while (resp == NULL); + return resp; + } @@ -1755,22 +1769,24 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, } } - if (entry == NULL) + do { - n_prefetch_misses += 1; + if (entry == NULL) + { + n_prefetch_misses += 1; - ring_index = prefetch_register_buffer(buftag, &request_latest, - &request_lsn); - slot = GetPrfSlot(ring_index); - } + ring_index = prefetch_register_buffer(buftag, &request_latest, + &request_lsn); + slot = GetPrfSlot(ring_index); + } - Assert(slot->my_ring_index == ring_index); - Assert(MyPState->ring_last <= ring_index && - MyPState->ring_unused > ring_index); - Assert(slot->status != PRFS_UNUSED); - Assert(GetPrfSlot(ring_index) == slot); + Assert(slot->my_ring_index == ring_index); + Assert(MyPState->ring_last <= ring_index && + MyPState->ring_unused > ring_index); + Assert(slot->status != PRFS_UNUSED); + Assert(GetPrfSlot(ring_index) == slot); - prefetch_wait_for(ring_index); + } while (!prefetch_wait_for(ring_index)); Assert(slot->status == PRFS_RECEIVED); From 2655bdbb2e184cc0357c16f1ed8032ffb670e8db Mon Sep 17 00:00:00 2001 From: bojanserafimov Date: Fri, 18 Nov 2022 09:05:13 -0500 Subject: [PATCH 14/22] Add remote seqscans test (#2840) --- test_runner/performance/test_seqscans.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/test_runner/performance/test_seqscans.py b/test_runner/performance/test_seqscans.py index c681c50ff5..26258355c0 100644 --- a/test_runner/performance/test_seqscans.py +++ b/test_runner/performance/test_seqscans.py @@ -6,6 +6,7 @@ import pytest from fixtures.benchmark_fixture import MetricReport from fixtures.compare_fixtures import PgCompare from fixtures.log_helper import log +from pytest_lazyfixture import lazy_fixture # type: ignore @pytest.mark.parametrize( @@ -20,9 +21,16 @@ from fixtures.log_helper import log pytest.param(10000000, 1, 4), ], ) -def test_seqscans(neon_with_baseline: PgCompare, rows: int, iters: int, workers: int): - env = neon_with_baseline - +@pytest.mark.parametrize( + "env", + [ + # Run on all envs + pytest.param(lazy_fixture("neon_compare"), id="neon"), + pytest.param(lazy_fixture("vanilla_compare"), id="vanilla"), + pytest.param(lazy_fixture("remote_compare"), id="remote", marks=pytest.mark.remote_cluster), + ], +) +def test_seqscans(env: PgCompare, rows: int, iters: int, workers: int): with closing(env.pg.connect()) as conn: with conn.cursor() as cur: cur.execute("create table t (i integer);") From d783889a1fc58f00ceb5d2bf0dcfe5f7ca3f4406 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 17 Nov 2022 12:32:11 -0500 Subject: [PATCH 15/22] timeline: explicit tracking of flush loop state: NotStarted, Running, Exited This allows us to error out in the case where we request flush but the flush loop is not running. Before, we would only track whether it was started, but not when it exited. Better to use an enum with 3 states than a 2-state bool because then the error message can answer the question whether we ever started the flush loop or not. --- pageserver/src/tenant/timeline.rs | 49 +++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 0b2f7876db..63ba4c10e6 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -61,6 +61,13 @@ use crate::{ storage_sync::{self, index::LayerFileMetadata}, }; +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +enum FlushLoopState { + NotStarted, + Running, + Exited, +} + pub struct Timeline { conf: &'static PageServerConf, tenant_conf: Arc>, @@ -122,7 +129,7 @@ pub struct Timeline { write_lock: Mutex<()>, /// Used to avoid multiple `flush_loop` tasks running - flush_loop_started: Mutex, + flush_loop_state: Mutex, /// layer_flush_start_tx can be used to wake up the layer-flushing task. /// The value is a counter, incremented every time a new flush cycle is requested. @@ -755,7 +762,7 @@ impl Timeline { upload_layers: AtomicBool::new(upload_layers), - flush_loop_started: Mutex::new(false), + flush_loop_state: Mutex::new(FlushLoopState::NotStarted), layer_flush_start_tx, layer_flush_done_tx, @@ -794,13 +801,23 @@ impl Timeline { } pub(super) fn maybe_spawn_flush_loop(self: &Arc) { - let mut flush_loop_started = self.flush_loop_started.lock().unwrap(); - if *flush_loop_started { - info!( - "skipping attempt to start flush_loop twice {}/{}", - self.tenant_id, self.timeline_id - ); - return; + let mut flush_loop_state = self.flush_loop_state.lock().unwrap(); + match *flush_loop_state { + FlushLoopState::NotStarted => (), + FlushLoopState::Running => { + info!( + "skipping attempt to start flush_loop twice {}/{}", + self.tenant_id, self.timeline_id + ); + return; + } + FlushLoopState::Exited => { + warn!( + "ignoring attempt to restart exited flush_loop {}/{}", + self.tenant_id, self.timeline_id + ); + return; + } } let layer_flush_start_rx = self.layer_flush_start_tx.subscribe(); @@ -813,11 +830,16 @@ impl Timeline { Some(self.timeline_id), "layer flush task", false, - async move { self_clone.flush_loop(layer_flush_start_rx).await; Ok(()) } + async move { + self_clone.flush_loop(layer_flush_start_rx).await; + let mut flush_loop_state = self_clone.flush_loop_state.lock().unwrap(); + assert_eq!(*flush_loop_state, FlushLoopState::Running); + *flush_loop_state = FlushLoopState::Exited; + Ok(()) } .instrument(info_span!(parent: None, "layer flush task", tenant = %self.tenant_id, timeline = %self.timeline_id)) ); - *flush_loop_started = true; + *flush_loop_state = FlushLoopState::Running; } pub(super) fn launch_wal_receiver(self: &Arc) { @@ -1365,8 +1387,9 @@ impl Timeline { // finished, instead of some other flush that was started earlier. let mut my_flush_request = 0; - if !&*self.flush_loop_started.lock().unwrap() { - anyhow::bail!("cannot flush frozen layers when flush_loop is not running") + let flush_loop_state = { *self.flush_loop_state.lock().unwrap() }; + if flush_loop_state != FlushLoopState::Running { + anyhow::bail!("cannot flush frozen layers when flush_loop is not running, state is {flush_loop_state:?}") } self.layer_flush_start_tx.send_modify(|counter| { From f564dff0e320a859ab9eb8b40caa9a70599c829a Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 17 Nov 2022 10:53:06 -0500 Subject: [PATCH 16/22] make test_tenant_detach_smoke fail reproducibly Add failpoint that triggers the race condition. Skip test until we'll land the fix from https://github.com/neondatabase/neon/pull/2851 with https://github.com/neondatabase/neon/pull/2785 --- libs/utils/src/lib.rs | 19 +++++++++++ pageserver/src/tenant.rs | 4 +++ test_runner/regress/test_tenant_detach.py | 40 ++++++++++++++--------- 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index 11ee7ac7eb..7b8f1623f5 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -48,6 +48,25 @@ pub mod nonblock; // Default signal handling pub mod signals; +/// use with fail::cfg("$name", "return(2000)") +#[macro_export] +macro_rules! failpoint_sleep_millis_async { + ($name:literal) => {{ + let should_sleep: Option = (|| { + fail::fail_point!($name, |v: Option<_>| { + let millis = v.unwrap().parse::().unwrap(); + Some(Duration::from_millis(millis)) + }); + None + })(); + if let Some(d) = should_sleep { + tracing::info!("failpoint {:?}: sleeping for {:?}", $name, d); + tokio::time::sleep(d).await; + tracing::info!("failpoint {:?}: sleep done", $name); + } + }}; +} + /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages /// /// we have several cases: diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 11845ebb48..c5fcfda756 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1010,6 +1010,10 @@ impl Tenant { let gc_timelines = self.refresh_gc_info_internal(target_timeline_id, horizon, pitr)?; + utils::failpoint_sleep_millis_async!("gc_iteration_internal_after_getting_gc_timelines"); + + info!("starting on {} timelines", gc_timelines.len()); + // Perform GC for each timeline. // // Note that we don't hold the GC lock here because we don't want diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py index f049b9af20..de85e0322c 100644 --- a/test_runner/regress/test_tenant_detach.py +++ b/test_runner/regress/test_tenant_detach.py @@ -1,3 +1,4 @@ +import time from threading import Thread import pytest @@ -11,11 +12,21 @@ def do_gc_target( ): """Hack to unblock main, see https://github.com/neondatabase/neon/issues/2211""" try: + log.info("sending gc http request") pageserver_http.timeline_gc(tenant_id, timeline_id, 0) except Exception as e: log.error("do_gc failed: %s", e) + finally: + log.info("gc http thread returning") +@pytest.mark.skip( + reason=""" +Commit 'make test_tenant_detach_smoke fail reproducibly' adds failpoint to make this test fail reproducibly. +Fix in https://github.com/neondatabase/neon/pull/2851 will come as part of +https://github.com/neondatabase/neon/pull/2785 . +""" +) def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() pageserver_http = env.pageserver.http_client() @@ -51,7 +62,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): ] ) - # gc should not try to even start + # gc should not try to even start on a timeline that doesn't exist with pytest.raises( expected_exception=PageserverApiException, match="gc target timeline does not exist" ): @@ -61,25 +72,24 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder): # the error will be printed to the log too env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*") - # try to concurrently run gc and detach + # Detach while running manual GC. + # It should wait for manual GC to finish (right now it doesn't that's why this test fails sometimes) + pageserver_http.configure_failpoints( + ("gc_iteration_internal_after_getting_gc_timelines", "return(2000)") + ) gc_thread = Thread(target=lambda: do_gc_target(pageserver_http, tenant_id, timeline_id)) gc_thread.start() + time.sleep(1) + # By now the gc task is spawned but in sleep for another second due to the failpoint. - last_error = None - for i in range(3): - try: - pageserver_http.tenant_detach(tenant_id) - except Exception as e: - last_error = e - log.error(f"try {i} error detaching tenant: {e}") - continue - else: - break - # else is called if the loop finished without reaching "break" - else: - pytest.fail(f"could not detach tenant: {last_error}") + log.info("detaching tenant") + pageserver_http.tenant_detach(tenant_id) + log.info("tenant detached without error") + log.info("wait for gc thread to return") gc_thread.join(timeout=10) + assert not gc_thread.is_alive() + log.info("gc thread returned") # check that nothing is left on disk for deleted tenant assert not (env.repo_dir / "tenants" / str(tenant_id)).exists() From e28eda79391f0c4c293a1bde2324484bab384a57 Mon Sep 17 00:00:00 2001 From: Egor Suvorov Date: Sat, 19 Nov 2022 01:30:41 +0300 Subject: [PATCH 17/22] sourcetree/docs: mention hakari generate (#2864) --- docs/sourcetree.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/sourcetree.md b/docs/sourcetree.md index 4b4efcecd7..4ea83dd068 100644 --- a/docs/sourcetree.md +++ b/docs/sourcetree.md @@ -83,6 +83,16 @@ A subject for future modularization. `/libs/metrics`: Helpers for exposing Prometheus metrics from the server. +### Adding dependencies +When you add a Cargo dependency, you should update hakari manifest by running commands below and committing the updated `Cargo.lock` and `workspace_hack/`. There may be no changes, that's fine. + +```bash +cargo hakari generate +cargo hakari manage-deps +``` + +If you don't have hakari installed (`error: no such subcommand: hakari`), install it by running `cargo install cargo-hakari`. + ## Using Python Note that Debian/Ubuntu Python packages are stale, as it commonly happens, so manual installation of dependencies is not recommended. From a50a7e8ac083285222fa5923e25991eaeee62885 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 18 Nov 2022 16:56:27 +0200 Subject: [PATCH 18/22] Try to silence test_gc_cutoff flakiness. Commit d013a2b227 changed the test, so that it fails if pgbench runs to completion without triggering the failpoint. That has now happened several times in the CI. That's not expected, so this needs some investigation, but as a quick fix just make the pgbench runs longer so that we're closer to the situation before commit d013a2b227. See https://github.com/neondatabase/neon/issues/2856 --- test_runner/regress/test_gc_cutoff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_runner/regress/test_gc_cutoff.py b/test_runner/regress/test_gc_cutoff.py index f760c993f4..9c9b9d26c7 100644 --- a/test_runner/regress/test_gc_cutoff.py +++ b/test_runner/regress/test_gc_cutoff.py @@ -38,7 +38,7 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): for _ in range(5): with pytest.raises(Exception): - pg_bin.run_capture(["pgbench", "-N", "-c5", "-T100", "-Mprepared", connstr]) + pg_bin.run_capture(["pgbench", "-N", "-c5", "-T200", "-Mprepared", connstr]) env.pageserver.stop() env.pageserver.start() pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit")) From 3f393276228319c7b8556a053fd0d830d588f6b4 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 19 Nov 2022 01:20:09 +0200 Subject: [PATCH 19/22] Silence a few compiler warnings I saw these from the build of the compute docker image in the CI (compute-node-image-v15): pagestore_smgr.c: In function 'neon_prefetch': pagestore_smgr.c:1654:2: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement] 1654 | BufferTag tag = (BufferTag) { | ^~~~~~~~~ walproposer.c:197:1: warning: no previous prototype for 'WalProposerSync' [-Wmissing-prototypes] 197 | WalProposerSync(int argc, char *argv[]) | ^~~~~~~~~~~~~~~ libpagestore.c: In function 'pageserver_connect': libpagestore.c:100:9: warning: variable 'wc' set but not used [-Wunused-but-set-variable] 100 | int wc; | ^~ libpagestore.c: In function 'call_PQgetCopyData': libpagestore.c:144:9: warning: variable 'wc' set but not used [-Wunused-but-set-variable] 144 | int wc; | ^~ Harmless warnings, but let's be tidy. In the passing, I added some "extern" to a few function declarations that were missing them, and marked WalProposerSync as "static". Those changes are also purely cosmetic. --- pgxn/neon/libpagestore.c | 6 ++---- pgxn/neon/pagestore_smgr.c | 5 +++-- pgxn/neon/walproposer.c | 3 ++- pgxn/neon/walproposer.h | 20 ++++++++++---------- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index d7507e69df..df92a1e2f4 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -92,11 +92,10 @@ pageserver_connect() while (PQisBusy(pageserver_conn)) { - int wc; WaitEvent event; /* Sleep until there's something to do */ - wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION); + (void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION); ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); @@ -136,11 +135,10 @@ retry: if (ret == 0) { - int wc; WaitEvent event; /* Sleep until there's something to do */ - wc = WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION); + (void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION); ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index d9b45e3933..21067fb56f 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -1649,7 +1649,8 @@ neon_close(SMgrRelation reln, ForkNumber forknum) bool neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) { - uint64 ring_index PG_USED_FOR_ASSERTS_ONLY; + BufferTag tag; + uint64 ring_index PG_USED_FOR_ASSERTS_ONLY; switch (reln->smgr_relpersistence) { @@ -1665,7 +1666,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence); } - BufferTag tag = (BufferTag) { + tag = (BufferTag) { .rnode = reln->smgr_rnode.node, .forkNum = forknum, .blockNum = blocknum diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c index c24142dca1..8323811b0d 100644 --- a/pgxn/neon/walproposer.c +++ b/pgxn/neon/walproposer.c @@ -119,6 +119,7 @@ static TimestampTz last_reconnect_attempt; static WalproposerShmemState * walprop_shared; /* Prototypes for private functions */ +static void WalProposerRegister(void); static void WalProposerInit(XLogRecPtr flushRecPtr, uint64 systemId); static void WalProposerStart(void); static void WalProposerLoop(void); @@ -455,7 +456,7 @@ WalProposerPoll(void) /* * Register a background worker proposing WAL to wal acceptors. */ -void +static void WalProposerRegister(void) { BackgroundWorker bgw; diff --git a/pgxn/neon/walproposer.h b/pgxn/neon/walproposer.h index 3c4f080353..362b194b32 100644 --- a/pgxn/neon/walproposer.h +++ b/pgxn/neon/walproposer.h @@ -377,18 +377,18 @@ typedef struct Safekeeper AppendResponse appendResponse; /* feedback for master */ } Safekeeper; -extern PGDLLIMPORT void WalProposerMain(Datum main_arg); -void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos); -void WalProposerPoll(void); -void WalProposerRegister(void); -void ParseReplicationFeedbackMessage(StringInfo reply_message, - ReplicationFeedback * rf); +extern void WalProposerSync(int argc, char *argv[]); +extern void WalProposerMain(Datum main_arg); +extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos); +extern void WalProposerPoll(void); +extern void ParseReplicationFeedbackMessage(StringInfo reply_message, + ReplicationFeedback *rf); extern void StartProposerReplication(StartReplicationCmd *cmd); -Size WalproposerShmemSize(void); -bool WalproposerShmemInit(void); -void replication_feedback_set(ReplicationFeedback * rf); -void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn); +extern Size WalproposerShmemSize(void); +extern bool WalproposerShmemInit(void); +extern void replication_feedback_set(ReplicationFeedback *rf); +extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn); /* libpqwalproposer hooks & helper type */ From ed40a045c09898759708a22c453b17ca42a1ac94 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 19 Nov 2022 12:06:33 +0200 Subject: [PATCH 20/22] Add more logging to track down test_gc_cutoff failure. see https://github.com/neondatabase/neon/issues/2856 --- test_runner/regress/test_gc_cutoff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_runner/regress/test_gc_cutoff.py b/test_runner/regress/test_gc_cutoff.py index 9c9b9d26c7..5899102586 100644 --- a/test_runner/regress/test_gc_cutoff.py +++ b/test_runner/regress/test_gc_cutoff.py @@ -38,7 +38,7 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): for _ in range(5): with pytest.raises(Exception): - pg_bin.run_capture(["pgbench", "-N", "-c5", "-T200", "-Mprepared", connstr]) + pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T200", "-Mprepared", connstr]) env.pageserver.stop() env.pageserver.start() pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit")) From 684329d4d259fc947124522c2a2a7eb34bd1c226 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 19 Nov 2022 19:28:56 +0200 Subject: [PATCH 21/22] Another attempt at silencing test_gc_cutoff failures. Increse the pgbench runtimes even further. The theory is that when there are many other tests running at the same time, one pgbench run could take a long time until it generates enough layers for GC to kick in. --- test_runner/regress/test_gc_cutoff.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_runner/regress/test_gc_cutoff.py b/test_runner/regress/test_gc_cutoff.py index 5899102586..1b98a414da 100644 --- a/test_runner/regress/test_gc_cutoff.py +++ b/test_runner/regress/test_gc_cutoff.py @@ -8,6 +8,7 @@ from fixtures.neon_fixtures import NeonEnvBuilder, PgBin # normally restarts after it. Also, there should be GC ERRORs in the log, # but the fixture checks the log for any unexpected ERRORs after every # test anyway, so it doesn't need any special attention here. +@pytest.mark.timeout(600) def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): env = neon_env_builder.init_start() @@ -38,7 +39,7 @@ def test_gc_cutoff(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): for _ in range(5): with pytest.raises(Exception): - pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T200", "-Mprepared", connstr]) + pg_bin.run_capture(["pgbench", "-P1", "-N", "-c5", "-T500", "-Mprepared", connstr]) env.pageserver.stop() env.pageserver.start() pageserver_http.configure_failpoints(("after-timeline-gc-removed-layers", "exit")) From cb9b26776ef6c15d75ca37013b718b2f598edcdf Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Sat, 19 Nov 2022 23:39:42 +0000 Subject: [PATCH 22/22] Fix test_seqscans on remote cluster (#2869) A remote project is reused between tests, so we need to ensure that we don't have a table with the same name already created. --- test_runner/performance/test_seqscans.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test_runner/performance/test_seqscans.py b/test_runner/performance/test_seqscans.py index 26258355c0..1755c70324 100644 --- a/test_runner/performance/test_seqscans.py +++ b/test_runner/performance/test_seqscans.py @@ -33,6 +33,7 @@ from pytest_lazyfixture import lazy_fixture # type: ignore def test_seqscans(env: PgCompare, rows: int, iters: int, workers: int): with closing(env.pg.connect()) as conn: with conn.cursor() as cur: + cur.execute("drop table if exists t;") cur.execute("create table t (i integer);") cur.execute(f"insert into t values (generate_series(1,{rows}));")