feat!: refactor ConnectionInternal into a Database trait (#2067)

This opens up the door for more custom database implementations than the
two we have today. The biggest change should be inivisble:
`ConnectionInternal` has been renamed to `Database`, made public, and
refactored

However, there are a few breaking changes. `data_storage_version` and
`enable_v2_manifest_paths` have been moved from options on
`create_table` to options for the database which are now set via
`storage_options`.

Before:
```
db = connect(uri)
tbl = db.create_table("my_table", data, data_storage_version="legacy", enable_v2_manifest_paths=True)
```

After:
```
db = connect(uri, storage_options={
  "new_table_enable_v2_manifest_paths": "true",
  "new_table_data_storage_version": "legacy"
})
tbl = db.create_table("my_table", data)
```

BREAKING CHANGE: the data_storage_version, enable_v2_manifest_paths
options have moved from options to create_table to storage_options.
BREAKING CHANGE: the use_legacy_format option has been removed,
data_storage_version has replaced it for some time now
This commit is contained in:
Weston Pace
2025-02-04 14:35:14 -08:00
committed by GitHub
parent f6eef14313
commit c269524b2f
20 changed files with 1131 additions and 876 deletions

View File

@@ -1,10 +1,10 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use std::{collections::HashMap, str::FromStr, sync::Arc, time::Duration};
use std::{collections::HashMap, sync::Arc, time::Duration};
use arrow::{datatypes::Schema, ffi_stream::ArrowArrayStreamReader, pyarrow::FromPyArrow};
use lancedb::connection::{Connection as LanceConnection, CreateTableMode, LanceFileVersion};
use lancedb::{connection::Connection as LanceConnection, database::CreateTableMode};
use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods, Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
@@ -80,15 +80,13 @@ impl Connection {
future_into_py(self_.py(), async move { op.execute().await.infer_error() })
}
#[pyo3(signature = (name, mode, data, storage_options=None, data_storage_version=None, enable_v2_manifest_paths=None))]
#[pyo3(signature = (name, mode, data, storage_options=None))]
pub fn create_table<'a>(
self_: PyRef<'a, Self>,
name: String,
mode: &str,
data: Bound<'_, PyAny>,
storage_options: Option<HashMap<String, String>>,
data_storage_version: Option<String>,
enable_v2_manifest_paths: Option<bool>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -101,32 +99,19 @@ impl Connection {
builder = builder.storage_options(storage_options);
}
if let Some(enable_v2_manifest_paths) = enable_v2_manifest_paths {
builder = builder.enable_v2_manifest_paths(enable_v2_manifest_paths);
}
if let Some(data_storage_version) = data_storage_version.as_ref() {
builder = builder.data_storage_version(
LanceFileVersion::from_str(data_storage_version)
.map_err(|e| PyValueError::new_err(e.to_string()))?,
);
}
future_into_py(self_.py(), async move {
let table = builder.execute().await.infer_error()?;
Ok(Table::new(table))
})
}
#[pyo3(signature = (name, mode, schema, storage_options=None, data_storage_version=None, enable_v2_manifest_paths=None))]
#[pyo3(signature = (name, mode, schema, storage_options=None))]
pub fn create_empty_table<'a>(
self_: PyRef<'a, Self>,
name: String,
mode: &str,
schema: Bound<'_, PyAny>,
storage_options: Option<HashMap<String, String>>,
data_storage_version: Option<String>,
enable_v2_manifest_paths: Option<bool>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -140,17 +125,6 @@ impl Connection {
builder = builder.storage_options(storage_options);
}
if let Some(enable_v2_manifest_paths) = enable_v2_manifest_paths {
builder = builder.enable_v2_manifest_paths(enable_v2_manifest_paths);
}
if let Some(data_storage_version) = data_storage_version.as_ref() {
builder = builder.data_storage_version(
LanceFileVersion::from_str(data_storage_version)
.map_err(|e| PyValueError::new_err(e.to_string()))?,
);
}
future_into_py(self_.py(), async move {
let table = builder.execute().await.infer_error()?;
Ok(Table::new(table))