feat: support remote options for remote lancedb connection (#1895)

* Support subset of storage options as remote options
* Send Azure storage account name via HTTP header
This commit is contained in:
Ryan Green
2024-11-29 14:08:13 -03:30
committed by GitHub
parent d914722f79
commit 6c2d4c10a4
5 changed files with 76 additions and 3 deletions

View File

@@ -24,6 +24,7 @@ rust-version = "1.80.0" # TO
lance = { "version" = "=0.20.0", "features" = [
"dynamodb",
], git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-io = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-index = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-linalg = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }
lance-table = { version = "=0.20.0", git = "https://github.com/lancedb/lance.git", tag = "v0.20.0-beta.3" }

View File

@@ -27,6 +27,7 @@ half = { workspace = true }
lazy_static.workspace = true
lance = { workspace = true }
lance-datafusion.workspace = true
lance-io = { workspace = true }
lance-index = { workspace = true }
lance-table = { workspace = true }
lance-linalg = { workspace = true }

View File

@@ -38,6 +38,8 @@ use crate::table::{NativeTable, TableDefinition, WriteOptions};
use crate::utils::validate_table_name;
use crate::Table;
pub use lance_encoding::version::LanceFileVersion;
#[cfg(feature = "remote")]
use lance_io::object_store::StorageOptions;
use lance_table::io::commit::commit_handler_from_url;
pub const LANCE_FILE_EXTENSION: &str = "lance";
@@ -718,12 +720,14 @@ impl ConnectBuilder {
message: "An api_key is required when connecting to LanceDb Cloud".to_string(),
})?;
let storage_options = StorageOptions(self.storage_options.clone());
let internal = Arc::new(crate::remote::db::RemoteDatabase::try_new(
&self.uri,
&api_key,
&region,
self.host_override,
self.client_config,
storage_options.into(),
)?);
Ok(Connection {
internal,
@@ -856,7 +860,7 @@ impl Database {
let table_base_uri = if let Some(store) = engine {
static WARN_ONCE: std::sync::Once = std::sync::Once::new();
WARN_ONCE.call_once(|| {
log::warn!("Specifing engine is not a publicly supported feature in lancedb yet. THE API WILL CHANGE");
log::warn!("Specifying engine is not a publicly supported feature in lancedb yet. THE API WILL CHANGE");
});
let old_scheme = url.scheme().to_string();
let new_scheme = format!("{}+{}", old_scheme, store);

View File

@@ -21,6 +21,7 @@ use reqwest::{
};
use crate::error::{Error, Result};
use crate::remote::db::RemoteOptions;
const REQUEST_ID_HEADER: &str = "x-request-id";
@@ -215,6 +216,7 @@ impl RestfulLanceDbClient<Sender> {
region: &str,
host_override: Option<String>,
client_config: ClientConfig,
options: &RemoteOptions,
) -> Result<Self> {
let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
@@ -255,6 +257,7 @@ impl RestfulLanceDbClient<Sender> {
region,
db_name,
host_override.is_some(),
options,
)?)
.user_agent(client_config.user_agent)
.build()
@@ -262,6 +265,7 @@ impl RestfulLanceDbClient<Sender> {
message: "Failed to build HTTP client".into(),
source: Some(Box::new(err)),
})?;
let host = match host_override {
Some(host_override) => host_override,
None => format!("https://{}.{}.api.lancedb.com", db_name, region),
@@ -287,6 +291,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
region: &str,
db_name: &str,
has_host_override: bool,
options: &RemoteOptions,
) -> Result<HeaderMap> {
let mut headers = HeaderMap::new();
headers.insert(
@@ -313,6 +318,23 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
);
}
if let Some(v) = options.0.get("account_name") {
headers.insert(
"x-azure-storage-account-name",
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
message: format!("non-ascii storage account name '{}' provided", db_name),
})?,
);
}
if let Some(v) = options.0.get("azure_storage_account_name") {
headers.insert(
"x-azure-storage-account-name",
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
message: format!("non-ascii storage account name '{}' provided", db_name),
})?,
);
}
Ok(headers)
}

View File

@@ -12,11 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use arrow_array::RecordBatchReader;
use async_trait::async_trait;
use http::StatusCode;
use lance_io::object_store::StorageOptions;
use moka::future::Cache;
use reqwest::header::CONTENT_TYPE;
use serde::Deserialize;
@@ -53,9 +55,16 @@ impl RemoteDatabase {
region: &str,
host_override: Option<String>,
client_config: ClientConfig,
options: RemoteOptions,
) -> Result<Self> {
let client =
RestfulLanceDbClient::try_new(uri, api_key, region, host_override, client_config)?;
let client = RestfulLanceDbClient::try_new(
uri,
api_key,
region,
host_override,
client_config,
&options,
)?;
let table_cache = Cache::builder()
.time_to_live(std::time::Duration::from_secs(300))
@@ -243,6 +252,29 @@ impl<S: HttpSend> ConnectionInternal for RemoteDatabase<S> {
}
}
/// RemoteOptions contains a subset of StorageOptions that are compatible with Remote LanceDB connections
#[derive(Clone, Debug, Default)]
pub struct RemoteOptions(pub HashMap<String, String>);
impl RemoteOptions {
pub fn new(options: HashMap<String, String>) -> Self {
Self(options)
}
}
impl From<StorageOptions> for RemoteOptions {
fn from(options: StorageOptions) -> Self {
let supported_opts = vec!["account_name", "azure_storage_account_name"];
let mut filtered = HashMap::new();
for opt in supported_opts {
if let Some(v) = options.0.get(opt) {
filtered.insert(opt.to_string(), v.to_string());
}
}
RemoteOptions::new(filtered)
}
}
#[cfg(test)]
mod tests {
use std::sync::{Arc, OnceLock};
@@ -250,6 +282,7 @@ mod tests {
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
use arrow_schema::{DataType, Field, Schema};
use crate::connection::ConnectBuilder;
use crate::{
connection::CreateTableMode,
remote::{ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE},
@@ -541,4 +574,16 @@ mod tests {
});
conn.rename_table("table1", "table2").await.unwrap();
}
#[tokio::test]
async fn test_connect_remote_options() {
let db_uri = "db://my-container/my-prefix";
let _ = ConnectBuilder::new(db_uri)
.region("us-east-1")
.api_key("my-api-key")
.storage_options(vec![("azure_storage_account_name", "my-storage-account")])
.execute()
.await
.unwrap();
}
}