From a4ea1e53ae6fd4c529237e36a6a441bef8d5cb0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 11 Feb 2025 10:40:22 +0100 Subject: [PATCH] Apply Azure SDK patch to periodically load workload identity file (#10415) The SDK bug https://github.com/Azure/azure-sdk-for-rust/issues/1739 was originally worked around via #10378, but now upstream has provided a fix in [this](https://github.com/Azure/azure-sdk-for-rust/pull/1997) PR, which we've been asked to test. So this is what this PR is doing: revert #10378 (to make sure we fail if the bug isn't fixed by the SDK PR), and apply the SDK PR to our fork. Currently pointing to my local branch to check CI. I'd like to merge the [SDK fork PR](https://github.com/neondatabase/azure-sdk-for-rust/pull/2) before merging this to main. --- Cargo.lock | 10 ++--- proxy/src/context/parquet.rs | 73 +++++++++++++----------------------- 2 files changed, 32 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e73f1f9cdb..3f06a74c5e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -786,7 +786,7 @@ dependencies = [ [[package]] name = "azure_core" version = "0.21.0" -source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#66e77bdd87bf87e773acf3b0c84b532c1124367d" +source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#985db729824be324ed11527e45de722250028d9e" dependencies = [ "async-trait", "base64 0.22.1", @@ -815,7 +815,7 @@ dependencies = [ [[package]] name = "azure_identity" version = "0.21.0" -source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#66e77bdd87bf87e773acf3b0c84b532c1124367d" +source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#985db729824be324ed11527e45de722250028d9e" dependencies = [ "async-lock", "async-trait", @@ -834,7 +834,7 @@ dependencies = [ [[package]] name = "azure_storage" version = "0.21.0" -source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#66e77bdd87bf87e773acf3b0c84b532c1124367d" +source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#985db729824be324ed11527e45de722250028d9e" dependencies = [ "RustyXML", "async-lock", @@ -852,7 +852,7 @@ dependencies = [ [[package]] name = "azure_storage_blobs" version = "0.21.0" -source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#66e77bdd87bf87e773acf3b0c84b532c1124367d" +source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#985db729824be324ed11527e45de722250028d9e" dependencies = [ "RustyXML", "azure_core", @@ -872,7 +872,7 @@ dependencies = [ [[package]] name = "azure_svc_blobstorage" version = "0.21.0" -source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#66e77bdd87bf87e773acf3b0c84b532c1124367d" +source = "git+https://github.com/neondatabase/azure-sdk-for-rust.git?branch=neon#985db729824be324ed11527e45de722250028d9e" dependencies = [ "azure_core", "bytes", diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs index 4f1dd39d92..0537ae6a62 100644 --- a/proxy/src/context/parquet.rs +++ b/proxy/src/context/parquet.rs @@ -187,6 +187,10 @@ pub async fn worker( let rx = futures::stream::poll_fn(move |cx| rx.poll_recv(cx)); let rx = rx.map(RequestData::from); + let storage = GenericRemoteStorage::from_config(&remote_storage_config) + .await + .context("remote storage init")?; + let properties = WriterProperties::builder() .set_data_page_size_limit(config.parquet_upload_page_size) .set_compression(config.parquet_upload_compression); @@ -220,18 +224,18 @@ pub async fn worker( let rx_disconnect = futures::stream::poll_fn(move |cx| rx_disconnect.poll_recv(cx)); let rx_disconnect = rx_disconnect.map(RequestData::from); + let storage_disconnect = + GenericRemoteStorage::from_config(&disconnect_events_storage_config) + .await + .context("remote storage for disconnect events init")?; let parquet_config_disconnect = parquet_config.clone(); tokio::try_join!( - worker_inner(remote_storage_config, rx, parquet_config), - worker_inner( - disconnect_events_storage_config, - rx_disconnect, - parquet_config_disconnect - ) + worker_inner(storage, rx, parquet_config), + worker_inner(storage_disconnect, rx_disconnect, parquet_config_disconnect) ) .map(|_| ()) } else { - worker_inner(remote_storage_config, rx, parquet_config).await + worker_inner(storage, rx, parquet_config).await } } @@ -247,32 +251,18 @@ struct ParquetConfig { test_remote_failures: u64, } -impl ParquetConfig { - async fn storage( - &self, - storage_config: &RemoteStorageConfig, - ) -> anyhow::Result { - let storage = GenericRemoteStorage::from_config(storage_config) - .await - .context("remote storage init")?; - - #[cfg(any(test, feature = "testing"))] - if self.test_remote_failures > 0 { - return Ok(GenericRemoteStorage::unreliable_wrapper( - storage, - self.test_remote_failures, - )); - } - - Ok(storage) - } -} - async fn worker_inner( - storage_config: RemoteStorageConfig, + storage: GenericRemoteStorage, rx: impl Stream, config: ParquetConfig, ) -> anyhow::Result<()> { + #[cfg(any(test, feature = "testing"))] + let storage = if config.test_remote_failures > 0 { + GenericRemoteStorage::unreliable_wrapper(storage, config.test_remote_failures) + } else { + storage + }; + let mut rx = std::pin::pin!(rx); let mut rows = Vec::with_capacity(config.rows_per_group); @@ -295,7 +285,7 @@ async fn worker_inner( } if len > config.file_size || force { last_upload = time::Instant::now(); - let file = upload_parquet(w, len, &storage_config, &config).await?; + let file = upload_parquet(w, len, &storage).await?; w = SerializedFileWriter::new(file, schema.clone(), config.propeties.clone())?; len = 0; } @@ -308,7 +298,7 @@ async fn worker_inner( } if !w.flushed_row_groups().is_empty() { - let _rtchk: Writer = upload_parquet(w, len, &storage_config, &config).await?; + let _rtchk: Writer = upload_parquet(w, len, &storage).await?; } Ok(()) @@ -350,8 +340,7 @@ where async fn upload_parquet( mut w: SerializedFileWriter>, len: i64, - storage_config: &RemoteStorageConfig, - config: &ParquetConfig, + storage: &GenericRemoteStorage, ) -> anyhow::Result> { let len_uncompressed = w .flushed_row_groups() @@ -388,15 +377,6 @@ async fn upload_parquet( size, compression, "uploading request parquet file" ); - // A bug in azure-sdk means that the identity-token-file that expires after - // 1 hour is not refreshed. This identity-token is used to fetch the actual azure storage - // tokens that last for 24 hours. After this 24 hour period, azure-sdk tries to refresh - // the storage token, but the identity token has now expired. - // - // - // To work around this, we recreate the storage every time. - let storage = config.storage(storage_config).await?; - let year = now.year(); let month = now.month(); let day = now.day(); @@ -451,8 +431,8 @@ mod tests { use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; use remote_storage::{ - RemoteStorageConfig, RemoteStorageKind, S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, - DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT, + GenericRemoteStorage, RemoteStorageConfig, RemoteStorageKind, S3Config, + DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT, }; use tokio::sync::mpsc; use tokio::time; @@ -579,11 +559,12 @@ mod tests { timeout: std::time::Duration::from_secs(120), small_timeout: std::time::Duration::from_secs(30), }; - - worker_inner(remote_storage_config, rx, config) + let storage = GenericRemoteStorage::from_config(&remote_storage_config) .await .unwrap(); + worker_inner(storage, rx, config).await.unwrap(); + let mut files = WalkDir::new(tmpdir.as_std_path()) .into_iter() .filter_map(|entry| entry.ok())