From b09a8517059a74499c1fdfe37f9c2842a7cbb229 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Mon, 6 Nov 2023 16:16:55 +0100 Subject: [PATCH] Make azure blob storage not do extra metadata requests (#5777) Load the metadata from the returned `GetBlobResponse` and avoid downloading it via a separate request. As it turns out, the SDK does return the metadata: https://github.com/Azure/azure-sdk-for-rust/issues/1439 . This PR will reduce the number of requests to Azure caused by downloads. Fixes #5571 --- libs/remote_storage/src/azure_blob.rs | 43 +++++++-------------------- 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs index f0521d27c5..ae08e9b171 100644 --- a/libs/remote_storage/src/azure_blob.rs +++ b/libs/remote_storage/src/azure_blob.rs @@ -1,21 +1,18 @@ //! Azure Blob Storage wrapper +use std::collections::HashMap; use std::env; use std::num::NonZeroU32; use std::sync::Arc; -use std::{borrow::Cow, collections::HashMap, io::Cursor}; +use std::{borrow::Cow, io::Cursor}; use super::REMOTE_STORAGE_PREFIX_SEPARATOR; use anyhow::Result; use azure_core::request_options::{MaxResults, Metadata, Range}; -use azure_core::Header; use azure_identity::DefaultAzureCredential; use azure_storage::StorageCredentials; use azure_storage_blobs::prelude::ClientBuilder; -use azure_storage_blobs::{ - blob::operations::GetBlobBuilder, - prelude::{BlobClient, ContainerClient}, -}; +use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient}; use futures_util::StreamExt; use http_types::StatusCode; use tokio::io::AsyncRead; @@ -112,16 +109,19 @@ impl AzureBlobStorage { async fn download_for_builder( &self, - metadata: StorageMetadata, builder: GetBlobBuilder, ) -> Result { let mut response = builder.into_stream(); + let mut metadata = HashMap::new(); // TODO give proper streaming response instead of buffering into RAM // https://github.com/neondatabase/neon/issues/5563 let mut buf = Vec::new(); while let Some(part) = response.next().await { let part = part.map_err(to_download_error)?; + if let Some(blob_meta) = part.blob.metadata { + metadata.extend(blob_meta.iter().map(|(k, v)| (k.to_owned(), v.to_owned()))); + } let data = part .data .collect() @@ -131,28 +131,9 @@ impl AzureBlobStorage { } Ok(Download { download_stream: Box::pin(Cursor::new(buf)), - metadata: Some(metadata), + metadata: Some(StorageMetadata(metadata)), }) } - // TODO get rid of this function once we have metadata included in the response - // https://github.com/Azure/azure-sdk-for-rust/issues/1439 - async fn get_metadata( - &self, - blob_client: &BlobClient, - ) -> Result { - let builder = blob_client.get_metadata(); - - let response = builder.into_future().await.map_err(to_download_error)?; - let mut map = HashMap::new(); - - for md in response.metadata.iter() { - map.insert( - md.name().as_str().to_string(), - md.value().as_str().to_string(), - ); - } - Ok(StorageMetadata(map)) - } async fn permit(&self, kind: RequestKind) -> tokio::sync::SemaphorePermit<'_> { self.concurrency_limiter @@ -269,11 +250,9 @@ impl RemoteStorage for AzureBlobStorage { let _permit = self.permit(RequestKind::Get).await; let blob_client = self.client.blob_client(self.relative_path_to_name(from)); - let metadata = self.get_metadata(&blob_client).await?; - let builder = blob_client.get(); - self.download_for_builder(metadata, builder).await + self.download_for_builder(builder).await } async fn download_byte_range( @@ -285,8 +264,6 @@ impl RemoteStorage for AzureBlobStorage { let _permit = self.permit(RequestKind::Get).await; let blob_client = self.client.blob_client(self.relative_path_to_name(from)); - let metadata = self.get_metadata(&blob_client).await?; - let mut builder = blob_client.get(); if let Some(end_exclusive) = end_exclusive { @@ -301,7 +278,7 @@ impl RemoteStorage for AzureBlobStorage { builder = builder.range(Range::new(start_inclusive, end_exclusive)); } - self.download_for_builder(metadata, builder).await + self.download_for_builder(builder).await } async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {