feat: add flag to enable faster manifest paths (#1612)

The new V2 manifest path scheme makes discovering the latest version of
a table constant time on object stores, regardless of the number of
versions in the table. See benchmarks in the PR here:
https://github.com/lancedb/lance/pull/2798

Closes #1583
This commit is contained in:
Will Jones
2024-09-09 11:34:36 -07:00
committed by GitHub
parent 029b01bbbf
commit 2a6586d6fb
16 changed files with 292 additions and 2 deletions

View File

@@ -124,6 +124,7 @@ impl Connection {
mode: String,
storage_options: Option<HashMap<String, String>>,
data_storage_options: Option<String>,
enable_v2_manifest_paths: Option<bool>,
) -> napi::Result<Table> {
let batches = ipc_file_to_batches(buf.to_vec())
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
@@ -140,6 +141,9 @@ impl Connection {
.map_err(|e| napi::Error::from_reason(format!("{}", e)))?,
);
}
if let Some(enable_v2_manifest_paths) = enable_v2_manifest_paths {
builder = builder.enable_v2_manifest_paths(enable_v2_manifest_paths);
}
let tbl = builder
.execute()
.await
@@ -155,6 +159,7 @@ impl Connection {
mode: String,
storage_options: Option<HashMap<String, String>>,
data_storage_options: Option<String>,
enable_v2_manifest_paths: Option<bool>,
) -> napi::Result<Table> {
let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
napi::Error::from_reason(format!("Failed to marshal schema from JS to Rust: {}", e))
@@ -175,6 +180,9 @@ impl Connection {
.map_err(|e| napi::Error::from_reason(format!("{}", e)))?,
);
}
if let Some(enable_v2_manifest_paths) = enable_v2_manifest_paths {
builder = builder.enable_v2_manifest_paths(enable_v2_manifest_paths);
}
let tbl = builder
.execute()
.await

View File

@@ -347,6 +347,26 @@ impl Table {
let on: Vec<_> = on.iter().map(String::as_str).collect();
Ok(self.inner_ref()?.merge_insert(on.as_slice()).into())
}
#[napi(catch_unwind)]
pub async fn uses_v2_manifest_paths(&self) -> napi::Result<bool> {
self.inner_ref()?
.as_native()
.ok_or_else(|| napi::Error::from_reason("This cannot be run on a remote table"))?
.uses_v2_manifest_paths()
.await
.default_error()
}
#[napi(catch_unwind)]
pub async fn migrate_manifest_paths_v2(&self) -> napi::Result<()> {
self.inner_ref()?
.as_native()
.ok_or_else(|| napi::Error::from_reason("This cannot be run on a remote table"))?
.migrate_manifest_paths_v2()
.await
.default_error()
}
}
#[napi(object)]