feat: add flag to enable faster manifest paths (#1612)

The new V2 manifest path scheme makes discovering the latest version of
a table constant time on object stores, regardless of the number of
versions in the table. See benchmarks in the PR here:
https://github.com/lancedb/lance/pull/2798

Closes #1583
This commit is contained in:
Will Jones
2024-09-09 11:34:36 -07:00
committed by GitHub
parent 029b01bbbf
commit 2a6586d6fb
16 changed files with 292 additions and 2 deletions

View File

@@ -81,6 +81,7 @@ impl Connection {
data: Bound<'_, PyAny>,
storage_options: Option<HashMap<String, String>>,
data_storage_version: Option<String>,
enable_v2_manifest_paths: Option<bool>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -93,6 +94,10 @@ impl Connection {
builder = builder.storage_options(storage_options);
}
if let Some(enable_v2_manifest_paths) = enable_v2_manifest_paths {
builder = builder.enable_v2_manifest_paths(enable_v2_manifest_paths);
}
if let Some(data_storage_version) = data_storage_version.as_ref() {
builder = builder.data_storage_version(
LanceFileVersion::from_str(data_storage_version)
@@ -113,6 +118,7 @@ impl Connection {
schema: Bound<'_, PyAny>,
storage_options: Option<HashMap<String, String>>,
data_storage_version: Option<String>,
enable_v2_manifest_paths: Option<bool>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -126,6 +132,10 @@ impl Connection {
builder = builder.storage_options(storage_options);
}
if let Some(enable_v2_manifest_paths) = enable_v2_manifest_paths {
builder = builder.enable_v2_manifest_paths(enable_v2_manifest_paths);
}
if let Some(data_storage_version) = data_storage_version.as_ref() {
builder = builder.data_storage_version(
LanceFileVersion::from_str(data_storage_version)