diff --git a/Cargo.lock b/Cargo.lock index 206cc208e..8843401e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow-array", "rand 0.9.2", @@ -4404,8 +4404,8 @@ dependencies = [ [[package]] name = "lance" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow", "arrow-arith", @@ -4470,8 +4470,8 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow-array", "arrow-buffer", @@ -4490,8 +4490,8 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrayref", "paste", @@ -4500,8 +4500,8 @@ dependencies = [ [[package]] name = "lance-core" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow-array", "arrow-buffer", @@ -4538,8 +4538,8 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow", "arrow-array", @@ -4569,8 +4569,8 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow", "arrow-array", @@ -4588,8 +4588,8 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow-arith", "arrow-array", @@ -4626,8 +4626,8 @@ dependencies = [ [[package]] name = "lance-file" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow-arith", "arrow-array", @@ -4659,8 +4659,8 @@ dependencies = [ [[package]] name = "lance-geo" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "datafusion", "geo-traits", @@ -4674,8 +4674,8 @@ dependencies = [ [[package]] name = "lance-index" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow", "arrow-arith", @@ -4742,8 +4742,8 @@ dependencies = [ [[package]] name = "lance-io" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow", "arrow-arith", @@ -4783,8 +4783,8 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow-array", "arrow-buffer", @@ -4800,8 +4800,8 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow", "async-trait", @@ -4813,8 +4813,8 @@ dependencies = [ [[package]] name = "lance-namespace-impls" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow", "arrow-ipc", @@ -4857,8 +4857,8 @@ dependencies = [ [[package]] name = "lance-table" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow", "arrow-array", @@ -4897,8 +4897,8 @@ dependencies = [ [[package]] name = "lance-testing" -version = "2.0.0-beta.8" -source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.8#5bffcfc3f9716fd6eb069579eb5027ed9c69feb7" +version = "2.0.0-beta.10" +source = "git+https://github.com/lance-format/lance.git?tag=v2.0.0-beta.10#8948169e6f9299721c0dd74aa01d565d225a4afa" dependencies = [ "arrow-array", "arrow-schema", diff --git a/Cargo.toml b/Cargo.toml index 54be799f5..2bb2a4451 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,20 +15,20 @@ categories = ["database-implementations"] rust-version = "1.78.0" [workspace.dependencies] -lance = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-core = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-datagen = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-file = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-io = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-index = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-linalg = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-namespace-impls = { "version" = "=2.0.0-beta.8", default-features = false, "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-table = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-testing = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-datafusion = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-encoding = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } -lance-arrow = { "version" = "=2.0.0-beta.8", "tag" = "v2.0.0-beta.8", "git" = "https://github.com/lance-format/lance.git" } +lance = { "version" = "=2.0.0-beta.10", default-features = false, "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-core = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-datagen = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-file = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-io = { "version" = "=2.0.0-beta.10", default-features = false, "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-index = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-linalg = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-namespace-impls = { "version" = "=2.0.0-beta.10", default-features = false, "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-table = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-testing = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-datafusion = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-encoding = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } +lance-arrow = { "version" = "=2.0.0-beta.10", "tag" = "v2.0.0-beta.10", "git" = "https://github.com/lance-format/lance.git" } ahash = "0.8" # Note that this one does not include pyarrow arrow = { version = "57.2", optional = false } diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index 7743e3396..b6c1e8e8a 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -9,6 +9,7 @@ use std::sync::Arc; use arrow_array::RecordBatchReader; use arrow_schema::{Field, SchemaRef}; use lance::dataset::ReadParams; +use lance::io::ObjectStoreParams; use lance_namespace::models::{ CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest, DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest, @@ -39,7 +40,64 @@ use crate::Table; pub use lance_encoding::version::LanceFileVersion; #[cfg(feature = "remote")] use lance_io::object_store::StorageOptions; -use lance_io::object_store::StorageOptionsProvider; +use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider}; + +fn merge_storage_options( + store_params: &mut ObjectStoreParams, + pairs: impl IntoIterator, +) { + let mut storage_options = store_params.storage_options().cloned().unwrap_or_default(); + for (key, value) in pairs { + storage_options.insert(key, value); + } + store_params.storage_options_accessor = Some(Arc::new( + StorageOptionsAccessor::with_static_options(storage_options), + )); +} + +fn apply_storage_options_provider( + store_params: &mut ObjectStoreParams, + provider: Option>, +) { + let Some(provider) = provider else { + return; + }; + let storage_options = store_params.storage_options().cloned().unwrap_or_default(); + let accessor = if storage_options.is_empty() { + StorageOptionsAccessor::with_provider(provider) + } else { + StorageOptionsAccessor::with_initial_and_provider(storage_options, provider) + }; + store_params.storage_options_accessor = Some(Arc::new(accessor)); +} + +fn apply_storage_options_provider_to_write_options( + write_options: &mut WriteOptions, + provider: Option>, +) { + let Some(provider) = provider else { + return; + }; + let store_params = write_options + .lance_write_params + .get_or_insert_with(Default::default) + .store_params + .get_or_insert_with(Default::default); + apply_storage_options_provider(store_params, Some(provider)); +} + +fn apply_storage_options_provider_to_read_params( + read_params: &mut ReadParams, + provider: Option>, +) { + let Some(provider) = provider else { + return; + }; + let store_params = read_params + .store_options + .get_or_insert_with(Default::default); + apply_storage_options_provider(store_params, Some(provider)); +} /// A builder for configuring a [`Connection::table_names`] operation pub struct TableNamesBuilder { @@ -106,6 +164,7 @@ pub struct CreateTableBuilder { embeddings: Vec<(EmbeddingDefinition, Arc)>, embedding_registry: Arc, request: CreateTableRequest, + storage_options_provider: Option>, // This is a bit clumsy but we defer errors until `execute` is called // to maintain backwards compatibility data: CreateTableBuilderInitialData, @@ -128,6 +187,7 @@ impl CreateTableBuilder { ), embeddings: Vec::new(), embedding_registry, + storage_options_provider: None, data: CreateTableBuilderInitialData::Iterator(data.into_arrow()), } } @@ -147,6 +207,7 @@ impl CreateTableBuilder { ), embeddings: Vec::new(), embedding_registry, + storage_options_provider: None, data: CreateTableBuilderInitialData::Stream(data.into_arrow()), } } @@ -168,20 +229,30 @@ impl CreateTableBuilder { match self.data { CreateTableBuilderInitialData::Iterator(maybe_iter) => { let data = maybe_iter?; - Ok(CreateTableRequest { + let mut request = CreateTableRequest { data: CreateTableData::Data(data), ..self.request - }) + }; + apply_storage_options_provider_to_write_options( + &mut request.write_options, + self.storage_options_provider, + ); + Ok(request) } CreateTableBuilderInitialData::None => { unreachable!("No data provided for CreateTableBuilder") } CreateTableBuilderInitialData::Stream(maybe_stream) => { let data = maybe_stream?; - Ok(CreateTableRequest { + let mut request = CreateTableRequest { data: CreateTableData::StreamingData(data), ..self.request - }) + }; + apply_storage_options_provider_to_write_options( + &mut request.write_options, + self.storage_options_provider, + ); + Ok(request) } } } else { @@ -190,10 +261,15 @@ impl CreateTableBuilder { }; let data = maybe_iter?; let data = Box::new(WithEmbeddings::new(data, self.embeddings)); - Ok(CreateTableRequest { + let mut request = CreateTableRequest { data: CreateTableData::Data(data), ..self.request - }) + }; + apply_storage_options_provider_to_write_options( + &mut request.write_options, + self.storage_options_provider, + ); + Ok(request) } } } @@ -213,13 +289,19 @@ impl CreateTableBuilder { data: CreateTableBuilderInitialData::None, embeddings: Vec::default(), embedding_registry, + storage_options_provider: None, } } /// Execute the create table operation pub async fn execute(self) -> Result { let parent = self.parent.clone(); - let table = parent.create_table(self.request).await?; + let mut request = self.request; + apply_storage_options_provider_to_write_options( + &mut request.write_options, + self.storage_options_provider, + ); + let table = parent.create_table(request).await?; Ok(Table::new(table, parent)) } } @@ -246,16 +328,14 @@ impl CreateTableBuilder { /// /// See available options at pub fn storage_option(mut self, key: impl Into, value: impl Into) -> Self { - let store_options = self + let store_params = self .request .write_options .lance_write_params .get_or_insert(Default::default()) .store_params - .get_or_insert(Default::default()) - .storage_options .get_or_insert(Default::default()); - store_options.insert(key.into(), value.into()); + merge_storage_options(store_params, [(key.into(), value.into())]); self } @@ -269,19 +349,20 @@ impl CreateTableBuilder { mut self, pairs: impl IntoIterator, impl Into)>, ) -> Self { - let store_options = self + let store_params = self .request .write_options .lance_write_params .get_or_insert(Default::default()) .store_params - .get_or_insert(Default::default()) - .storage_options .get_or_insert(Default::default()); - for (key, value) in pairs { - store_options.insert(key.into(), value.into()); - } + merge_storage_options( + store_params, + pairs + .into_iter() + .map(|(key, value)| (key.into(), value.into())), + ); self } @@ -318,23 +399,21 @@ impl CreateTableBuilder { /// This has no effect in LanceDB Cloud. #[deprecated(since = "0.15.1", note = "Use `database_options` instead")] pub fn enable_v2_manifest_paths(mut self, use_v2_manifest_paths: bool) -> Self { - let storage_options = self + let store_params = self .request .write_options .lance_write_params .get_or_insert_with(Default::default) .store_params - .get_or_insert_with(Default::default) - .storage_options .get_or_insert_with(Default::default); - - storage_options.insert( - OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), - if use_v2_manifest_paths { - "true".to_string() - } else { - "false".to_string() - }, + let value = if use_v2_manifest_paths { + "true".to_string() + } else { + "false".to_string() + }; + merge_storage_options( + store_params, + [(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)], ); self } @@ -344,19 +423,19 @@ impl CreateTableBuilder { /// The default is `LanceFileVersion::Stable`. #[deprecated(since = "0.15.1", note = "Use `database_options` instead")] pub fn data_storage_version(mut self, data_storage_version: LanceFileVersion) -> Self { - let storage_options = self + let store_params = self .request .write_options .lance_write_params .get_or_insert_with(Default::default) .store_params - .get_or_insert_with(Default::default) - .storage_options .get_or_insert_with(Default::default); - - storage_options.insert( - OPT_NEW_TABLE_STORAGE_VERSION.to_string(), - data_storage_version.to_string(), + merge_storage_options( + store_params, + [( + OPT_NEW_TABLE_STORAGE_VERSION.to_string(), + data_storage_version.to_string(), + )], ); self } @@ -381,13 +460,7 @@ impl CreateTableBuilder { /// This allows tables to automatically refresh cloud storage credentials /// when they expire, enabling long-running operations on remote storage. pub fn storage_options_provider(mut self, provider: Arc) -> Self { - self.request - .write_options - .lance_write_params - .get_or_insert(Default::default()) - .store_params - .get_or_insert(Default::default()) - .storage_options_provider = Some(provider); + self.storage_options_provider = Some(provider); self } } @@ -397,6 +470,7 @@ pub struct OpenTableBuilder { parent: Arc, request: OpenTableRequest, embedding_registry: Arc, + storage_options_provider: Option>, } impl OpenTableBuilder { @@ -416,6 +490,7 @@ impl OpenTableBuilder { namespace_client: None, }, embedding_registry, + storage_options_provider: None, } } @@ -450,15 +525,13 @@ impl OpenTableBuilder { /// /// See available options at pub fn storage_option(mut self, key: impl Into, value: impl Into) -> Self { - let storage_options = self + let store_params = self .request .lance_read_params .get_or_insert(Default::default()) .store_options - .get_or_insert(Default::default()) - .storage_options .get_or_insert(Default::default()); - storage_options.insert(key.into(), value.into()); + merge_storage_options(store_params, [(key.into(), value.into())]); self } @@ -472,18 +545,19 @@ impl OpenTableBuilder { mut self, pairs: impl IntoIterator, impl Into)>, ) -> Self { - let storage_options = self + let store_params = self .request .lance_read_params .get_or_insert(Default::default()) .store_options - .get_or_insert(Default::default()) - .storage_options .get_or_insert(Default::default()); - for (key, value) in pairs { - storage_options.insert(key.into(), value.into()); - } + merge_storage_options( + store_params, + pairs + .into_iter() + .map(|(key, value)| (key.into(), value.into())), + ); self } @@ -507,18 +581,23 @@ impl OpenTableBuilder { /// This allows tables to automatically refresh cloud storage credentials /// when they expire, enabling long-running operations on remote storage. pub fn storage_options_provider(mut self, provider: Arc) -> Self { - self.request - .lance_read_params - .get_or_insert(Default::default()) - .store_options - .get_or_insert(Default::default()) - .storage_options_provider = Some(provider); + self.storage_options_provider = Some(provider); self } /// Open the table pub async fn execute(self) -> Result
{ - let table = self.parent.open_table(self.request).await?; + let mut request = self.request; + if let Some(provider) = self.storage_options_provider { + if let Some(read_params) = request.lance_read_params.as_mut() { + apply_storage_options_provider_to_read_params(read_params, Some(provider)); + } else { + let mut read_params = ReadParams::default(); + apply_storage_options_provider_to_read_params(&mut read_params, Some(provider)); + request.lance_read_params = Some(read_params); + } + } + let table = self.parent.open_table(request).await?; Ok(Table::new_with_embedding_registry( table, self.parent, diff --git a/rust/lancedb/src/database/listing.rs b/rust/lancedb/src/database/listing.rs index 626baa46f..4a6fc57df 100644 --- a/rust/lancedb/src/database/listing.rs +++ b/rust/lancedb/src/database/listing.rs @@ -12,7 +12,7 @@ use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode}; use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore}; use lance_datafusion::utils::StreamingWriteSource; use lance_encoding::version::LanceFileVersion; -use lance_io::object_store::StorageOptionsProvider; +use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider}; use lance_table::io::commit::commit_handler_from_url; use object_store::local::LocalFileSystem; use snafu::ResultExt; @@ -42,6 +42,49 @@ pub const OPT_NEW_TABLE_STORAGE_VERSION: &str = "new_table_data_storage_version" pub const OPT_NEW_TABLE_V2_MANIFEST_PATHS: &str = "new_table_enable_v2_manifest_paths"; pub const OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS: &str = "new_table_enable_stable_row_ids"; +fn build_storage_options_accessor( + options: Option>, + provider: Option>, +) -> Option> { + match (options, provider) { + (Some(opts), Some(provider)) => Some(Arc::new( + StorageOptionsAccessor::with_initial_and_provider(opts, provider), + )), + (None, Some(provider)) => Some(Arc::new(StorageOptionsAccessor::with_provider(provider))), + (Some(opts), None) => Some(Arc::new(StorageOptionsAccessor::with_static_options(opts))), + (None, None) => None, + } +} + +fn merge_storage_options( + store_params: &mut ObjectStoreParams, + pairs: impl IntoIterator, +) { + let mut storage_options = store_params.storage_options().cloned().unwrap_or_default(); + for (key, value) in pairs { + storage_options.insert(key, value); + } + store_params.storage_options_accessor = Some(Arc::new( + StorageOptionsAccessor::with_static_options(storage_options), + )); +} + +fn apply_storage_options_provider( + store_params: &mut ObjectStoreParams, + provider: Option>, +) { + let Some(provider) = provider else { + return; + }; + let storage_options = store_params.storage_options().cloned().unwrap_or_default(); + let accessor = if storage_options.is_empty() { + StorageOptionsAccessor::with_provider(provider) + } else { + StorageOptionsAccessor::with_initial_and_provider(storage_options, provider) + }; + store_params.storage_options_accessor = Some(Arc::new(accessor)); +} + /// Controls how new tables should be created #[derive(Clone, Debug, Default)] pub struct NewTableConfig { @@ -356,7 +399,10 @@ impl ListingDatabase { .clone() .unwrap_or_else(|| Arc::new(lance::session::Session::default())); let os_params = ObjectStoreParams { - storage_options: Some(options.storage_options.clone()), + storage_options_accessor: build_storage_options_accessor( + Some(options.storage_options.clone()), + None, + ), ..Default::default() }; let (object_store, base_path) = ObjectStore::from_uri_and_params( @@ -481,7 +527,10 @@ impl ListingDatabase { async fn drop_tables(&self, names: Vec) -> Result<()> { let object_store_params = ObjectStoreParams { - storage_options: Some(self.storage_options.clone()), + storage_options_accessor: build_storage_options_accessor( + Some(self.storage_options.clone()), + self.storage_options_provider.clone(), + ), ..Default::default() }; let mut uri = self.uri.clone(); @@ -530,7 +579,7 @@ impl ListingDatabase { .lance_write_params .as_ref() .and_then(|p| p.store_params.as_ref()) - .and_then(|sp| sp.storage_options.as_ref()); + .and_then(|sp| sp.storage_options()); let storage_version_override = storage_options .and_then(|opts| opts.get(OPT_NEW_TABLE_STORAGE_VERSION)) @@ -582,20 +631,20 @@ impl ListingDatabase { // be dropped from the cache when python GCs the table object, which // confounds reuse across tables. if !self.storage_options.is_empty() { - let storage_options = write_params + let store_params = write_params .store_params - .get_or_insert_with(Default::default) - .storage_options .get_or_insert_with(Default::default); - self.inherit_storage_options(storage_options); + let mut inherited = store_params.storage_options().cloned().unwrap_or_default(); + self.inherit_storage_options(&mut inherited); + merge_storage_options(store_params, inherited); } // Set storage options provider if available if self.storage_options_provider.is_some() { - write_params + let store_params = write_params .store_params - .get_or_insert_with(Default::default) - .storage_options_provider = self.storage_options_provider.clone(); + .get_or_insert_with(Default::default); + apply_storage_options_provider(store_params, self.storage_options_provider.clone()); } write_params.data_storage_version = self @@ -881,7 +930,10 @@ impl Database for ListingDatabase { validate_table_name(&request.target_table_name)?; let storage_params = ObjectStoreParams { - storage_options: Some(self.storage_options.clone()), + storage_options_accessor: build_storage_options_accessor( + Some(self.storage_options.clone()), + self.storage_options_provider.clone(), + ), ..Default::default() }; let read_params = ReadParams { @@ -946,24 +998,24 @@ impl Database for ListingDatabase { // be dropped from the cache when python GCs the table object, which // confounds reuse across tables. if !self.storage_options.is_empty() { - let storage_options = request + let store_params = request .lance_read_params .get_or_insert_with(Default::default) .store_options - .get_or_insert_with(Default::default) - .storage_options .get_or_insert_with(Default::default); - self.inherit_storage_options(storage_options); + let mut inherited = store_params.storage_options().cloned().unwrap_or_default(); + self.inherit_storage_options(&mut inherited); + merge_storage_options(store_params, inherited); } // Set storage options provider if available if self.storage_options_provider.is_some() { - request + let store_params = request .lance_read_params .get_or_insert_with(Default::default) .store_options - .get_or_insert_with(Default::default) - .storage_options_provider = self.storage_options_provider.clone(); + .get_or_insert_with(Default::default); + apply_storage_options_provider(store_params, self.storage_options_provider.clone()); } // Some ReadParams are exposed in the OpenTableBuilder, but we also @@ -1869,7 +1921,9 @@ mod tests { let write_options = WriteOptions { lance_write_params: Some(lance::dataset::WriteParams { store_params: Some(lance::io::ObjectStoreParams { - storage_options: Some(storage_options), + storage_options_accessor: Some(Arc::new( + StorageOptionsAccessor::with_static_options(storage_options), + )), ..Default::default() }), ..Default::default() @@ -1943,7 +1997,9 @@ mod tests { let write_options = WriteOptions { lance_write_params: Some(lance::dataset::WriteParams { store_params: Some(lance::io::ObjectStoreParams { - storage_options: Some(storage_options), + storage_options_accessor: Some(Arc::new( + StorageOptionsAccessor::with_static_options(storage_options), + )), ..Default::default() }), ..Default::default() diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index 60421905d..901107882 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -40,7 +40,7 @@ use lance_index::vector::pq::PQBuildParams; use lance_index::vector::sq::builder::SQBuildParams; use lance_index::DatasetIndexExt; use lance_index::IndexType; -use lance_io::object_store::LanceNamespaceStorageOptionsProvider; +use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsAccessor}; use lance_namespace::models::{ QueryTableRequest as NsQueryTableRequest, QueryTableRequestColumns, QueryTableRequestFullTextQuery, QueryTableRequestVector, StringFtsQuery, @@ -1667,18 +1667,14 @@ impl NativeTable { // Use DatasetBuilder::from_namespace which automatically fetches location // and storage options from the namespace - let builder = DatasetBuilder::from_namespace( - namespace_client.clone(), - table_id, - false, // Don't ignore namespace storage options - ) - .await - .map_err(|e| match e { - lance::Error::Namespace { source, .. } => Error::Runtime { - message: format!("Failed to get table info from namespace: {:?}", source), - }, - source => Error::Lance { source }, - })?; + let builder = DatasetBuilder::from_namespace(namespace_client.clone(), table_id) + .await + .map_err(|e| match e { + lance::Error::Namespace { source, .. } => Error::Runtime { + message: format!("Failed to get table info from namespace: {:?}", source), + }, + source => Error::Lance { source }, + })?; let dataset = builder .with_read_params(params) @@ -1882,7 +1878,9 @@ impl NativeTable { let store_params = params .store_params .get_or_insert_with(ObjectStoreParams::default); - store_params.storage_options_provider = Some(storage_options_provider); + store_params.storage_options_accessor = Some(Arc::new( + StorageOptionsAccessor::with_provider(storage_options_provider), + )); // Patch the params if we have a write store wrapper let params = match write_store_wrapper.clone() { @@ -3243,7 +3241,7 @@ impl BaseTable for NativeTable { .get() .await .ok() - .and_then(|dataset| dataset.storage_options().cloned()) + .and_then(|dataset| dataset.initial_storage_options().cloned()) } async fn index_stats(&self, index_name: &str) -> Result> {