feat: expose storage options in LanceDB (#1204)

Exposes `storage_options` in LanceDB. This is provided for Python async,
Node `lancedb`, and Node `vectordb` (and Rust of course). Python
synchronous is omitted because it's not compatible with the PyArrow
filesystems we use there currently. In the future, we will move the sync
API to wrap the async one, and then it will get support for
`storage_options`.

1. Fixes #1168
2. Closes #1165
3. Closes #1082
4. Closes #439
5. Closes #897
6. Closes #642
7. Closes #281
8. Closes #114
9. Closes #990
10. Deprecating `awsCredentials` and `awsRegion`. Users are encouraged
to use `storageOptions` instead.
This commit is contained in:
Will Jones
2024-04-10 10:12:04 -07:00
committed by GitHub
parent 25dea4e859
commit 1d23af213b
31 changed files with 3128 additions and 262 deletions

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use napi::bindgen_prelude::*;
use napi_derive::*;
@@ -64,6 +66,11 @@ impl Connection {
builder =
builder.read_consistency_interval(std::time::Duration::from_secs_f64(interval));
}
if let Some(storage_options) = options.storage_options {
for (key, value) in storage_options {
builder = builder.storage_option(key, value);
}
}
Ok(Self::inner_new(
builder
.execute()
@@ -118,14 +125,18 @@ impl Connection {
name: String,
buf: Buffer,
mode: String,
storage_options: Option<HashMap<String, String>>,
) -> napi::Result<Table> {
let batches = ipc_file_to_batches(buf.to_vec())
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
let mode = Self::parse_create_mode_str(&mode)?;
let tbl = self
.get_inner()?
.create_table(&name, batches)
.mode(mode)
let mut builder = self.get_inner()?.create_table(&name, batches).mode(mode);
if let Some(storage_options) = storage_options {
for (key, value) in storage_options {
builder = builder.storage_option(key, value);
}
}
let tbl = builder
.execute()
.await
.map_err(|e| napi::Error::from_reason(format!("{}", e)))?;
@@ -138,15 +149,22 @@ impl Connection {
name: String,
schema_buf: Buffer,
mode: String,
storage_options: Option<HashMap<String, String>>,
) -> napi::Result<Table> {
let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
napi::Error::from_reason(format!("Failed to marshal schema from JS to Rust: {}", e))
})?;
let mode = Self::parse_create_mode_str(&mode)?;
let tbl = self
let mut builder = self
.get_inner()?
.create_empty_table(&name, schema)
.mode(mode)
.mode(mode);
if let Some(storage_options) = storage_options {
for (key, value) in storage_options {
builder = builder.storage_option(key, value);
}
}
let tbl = builder
.execute()
.await
.map_err(|e| napi::Error::from_reason(format!("{}", e)))?;
@@ -154,10 +172,18 @@ impl Connection {
}
#[napi]
pub async fn open_table(&self, name: String) -> napi::Result<Table> {
let tbl = self
.get_inner()?
.open_table(&name)
pub async fn open_table(
&self,
name: String,
storage_options: Option<HashMap<String, String>>,
) -> napi::Result<Table> {
let mut builder = self.get_inner()?.open_table(&name);
if let Some(storage_options) = storage_options {
for (key, value) in storage_options {
builder = builder.storage_option(key, value);
}
}
let tbl = builder
.execute()
.await
.map_err(|e| napi::Error::from_reason(format!("{}", e)))?;

View File

@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use connection::Connection;
use std::collections::HashMap;
use napi_derive::*;
mod connection;
@@ -38,6 +39,10 @@ pub struct ConnectionOptions {
/// Note: this consistency only applies to read operations. Write operations are
/// always consistent.
pub read_consistency_interval: Option<f64>,
/// (For LanceDB OSS only): configuration for object storage.
///
/// The available options are described at https://lancedb.github.io/lancedb/guides/storage/
pub storage_options: Option<HashMap<String, String>>,
}
/// Write mode for writing a table.
@@ -54,7 +59,7 @@ pub struct WriteOptions {
pub mode: Option<WriteMode>,
}
#[napi]
pub async fn connect(uri: String, options: ConnectionOptions) -> napi::Result<Connection> {
Connection::new(uri, options).await
#[napi(object)]
pub struct OpenTableOptions {
pub storage_options: Option<HashMap<String, String>>,
}