mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-03 11:00:40 +00:00
feat(client): Table.load_columns() REST client for LOAD COLUMNS
Geneva Table.load_columns() parity on the REST-only client. Fills existing
columns from an external Parquet/Lance/IPC source by primary-key join.
- BaseTable::load_columns default (NotSupported) + public Table::load_columns,
taking a LoadColumnsRequest (source uris/format/storage_options, target/source
key, (target, source?) column mappings, on_missing, worker/batch/commit knobs).
- Remote impl POSTs to /v1/table/{id}/load_columns with the matching body;
mock test asserts the request shape.
- PyO3 binding + Python remote Table.load_columns(source, pk, columns, *,
source_format, source_pk, on_missing, ...) accepting a column list or
{target: source} dict.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -17,8 +17,8 @@ use arrow::{
|
||||
pyarrow::{FromPyArrow, PyArrowType, ToPyArrow},
|
||||
};
|
||||
use lancedb::table::{
|
||||
AddDataMode, ColumnAlteration, Duration, FieldMetadataUpdate, NewColumnTransform,
|
||||
OptimizeAction, OptimizeOptions, Ref, Table as LanceDbTable,
|
||||
AddDataMode, ColumnAlteration, Duration, FieldMetadataUpdate, LoadColumnsRequest,
|
||||
NewColumnTransform, OptimizeAction, OptimizeOptions, Ref, Table as LanceDbTable,
|
||||
};
|
||||
use pyo3::{
|
||||
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||
@@ -1100,6 +1100,43 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[pyo3(signature = (source_uris, source_format, target_key, columns, source_key=None, source_storage_options=None, on_missing=None, num_workers=None, max_workers=None, batch_size=None, commit_granularity=None, priority=None))]
|
||||
pub fn load_columns(
|
||||
self_: PyRef<'_, Self>,
|
||||
source_uris: Vec<String>,
|
||||
source_format: String,
|
||||
target_key: String,
|
||||
columns: Vec<(String, Option<String>)>,
|
||||
source_key: Option<String>,
|
||||
source_storage_options: Option<std::collections::HashMap<String, String>>,
|
||||
on_missing: Option<String>,
|
||||
num_workers: Option<u32>,
|
||||
max_workers: Option<u32>,
|
||||
batch_size: Option<u32>,
|
||||
commit_granularity: Option<u32>,
|
||||
priority: Option<String>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
let request = LoadColumnsRequest {
|
||||
source_uris,
|
||||
source_format,
|
||||
source_storage_options,
|
||||
target_key,
|
||||
source_key,
|
||||
columns,
|
||||
on_missing,
|
||||
num_workers,
|
||||
max_workers,
|
||||
batch_size,
|
||||
commit_granularity,
|
||||
priority,
|
||||
};
|
||||
future_into_py(self_.py(), async move {
|
||||
inner.load_columns(request).await.infer_error()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn add_columns(
|
||||
self_: PyRef<'_, Self>,
|
||||
definitions: Vec<(String, String)>,
|
||||
|
||||
Reference in New Issue
Block a user