feat(python): add update_field_metadata bindings

This commit is contained in:
Brendan Clement
2026-06-01 18:57:15 -07:00
parent 42801bdb97
commit f95acfbbea
6 changed files with 136 additions and 3 deletions

View File

@@ -208,6 +208,9 @@ class Table:
async def alter_columns(
self, columns: list[dict[str, Any]]
) -> AlterColumnsResult: ...
async def update_field_metadata(
self, updates: list[dict[str, Any]]
) -> UpdateFieldMetadataResult: ...
async def optimize(
self,
*,
@@ -460,6 +463,9 @@ class AddColumnsResult:
class AlterColumnsResult:
version: int
class UpdateFieldMetadataResult:
version: int
class DropColumnsResult:
version: int

View File

@@ -24,6 +24,7 @@ from lancedb._lancedb import (
AddColumnsResult,
AddResult,
AlterColumnsResult,
UpdateFieldMetadataResult,
DeleteResult,
DropColumnsResult,
IndexConfig,
@@ -777,6 +778,11 @@ class RemoteTable(Table):
) -> AlterColumnsResult:
return LOOP.run(self._table.alter_columns(*alterations))
def update_field_metadata(
self, *updates: dict[str, Any]
) -> UpdateFieldMetadataResult:
return LOOP.run(self._table.update_field_metadata(*updates))
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
return LOOP.run(self._table.drop_columns(columns))

View File

@@ -154,6 +154,7 @@ if TYPE_CHECKING:
AddColumnsResult,
AddResult,
AlterColumnsResult,
UpdateFieldMetadataResult,
DeleteResult,
DropColumnsResult,
LsmWriteSpec,
@@ -1799,6 +1800,29 @@ class Table(ABC):
version: the new version number of the table after the alteration.
"""
@abstractmethod
def update_field_metadata(
self, *updates: dict[str, Any]
) -> UpdateFieldMetadataResult:
"""
Update per-field (column) metadata.
Parameters
----------
updates : dict
One or more dicts, each with:
- "path": str — dot-path to the field (e.g. "embedding" or "a.b.c").
- "metadata": dict[str, str | None] — keys to set; a value of ``None``
deletes that key.
- "replace": bool, optional — replace the field's whole metadata map
instead of merging (default False).
Returns
-------
UpdateFieldMetadataResult
version: the new table version after the update.
"""
@abstractmethod
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
"""
@@ -3583,6 +3607,11 @@ class LanceTable(Table):
) -> AlterColumnsResult:
return LOOP.run(self._table.alter_columns(*alterations))
def update_field_metadata(
self, *updates: dict[str, Any]
) -> UpdateFieldMetadataResult:
return LOOP.run(self._table.update_field_metadata(*updates))
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
return LOOP.run(self._table.drop_columns(columns))
@@ -5234,6 +5263,13 @@ class AsyncTable:
"""
return await self._inner.alter_columns(alterations)
async def update_field_metadata(
self, *updates: dict[str, Any]
) -> UpdateFieldMetadataResult:
"""Update per-field metadata. See
[`Table.update_field_metadata`][lancedb.table.Table.update_field_metadata]."""
return await self._inner.update_field_metadata(updates)
async def drop_columns(self, columns: Iterable[str]):
"""
Drop columns from the table.

View File

@@ -2472,6 +2472,30 @@ def test_alter_columns(mem_db: DBConnection):
assert table.to_arrow().column_names == ["new_id"]
def test_update_field_metadata(mem_db: DBConnection):
data = pa.table({"id": [0, 1], "category": ["a", "b"]})
table = mem_db.create_table("my_table", data=data)
res = table.update_field_metadata(
{"path": "category", "metadata": {"unit": "label", "pii": "false"}}
)
assert res.version == 2
# Arrow field metadata is bytes-keyed
assert table.schema.field("category").metadata == {
b"unit": b"label",
b"pii": b"false",
}
# merge: add a key, delete one via None, keep the rest
table.update_field_metadata(
{"path": "category", "metadata": {"source": "import", "pii": None}}
)
assert table.schema.field("category").metadata == {
b"unit": b"label",
b"source": b"import",
}
@pytest.mark.asyncio
async def test_alter_columns_async(mem_db_async: AsyncConnection):
data = pa.table({"id": [0, 1]})

View File

@@ -16,7 +16,7 @@ use query::{FTSQuery, HybridQuery, Query, VectorQuery};
use session::Session;
use table::{
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, LsmWriteSpec,
MergeResult, Table, UpdateResult,
MergeResult, Table, UpdateFieldMetadataResult, UpdateResult,
};
pub mod arrow;
@@ -50,6 +50,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<RecordBatchStream>()?;
m.add_class::<AddColumnsResult>()?;
m.add_class::<AlterColumnsResult>()?;
m.add_class::<UpdateFieldMetadataResult>()?;
m.add_class::<AddResult>()?;
m.add_class::<MergeResult>()?;
m.add_class::<LsmWriteSpec>()?;

View File

@@ -16,8 +16,8 @@ use arrow::{
pyarrow::{FromPyArrow, PyArrowType, ToPyArrow},
};
use lancedb::table::{
AddDataMode, ColumnAlteration, Duration, NewColumnTransform, OptimizeAction, OptimizeOptions,
Table as LanceDbTable,
AddDataMode, ColumnAlteration, Duration, FieldMetadataUpdate, NewColumnTransform,
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
};
use pyo3::{
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
@@ -357,6 +357,27 @@ impl From<lancedb::table::AlterColumnsResult> for AlterColumnsResult {
}
}
#[pyclass(get_all, from_py_object)]
#[derive(Clone, Debug)]
pub struct UpdateFieldMetadataResult {
pub version: u64,
}
#[pymethods]
impl UpdateFieldMetadataResult {
pub fn __repr__(&self) -> String {
format!("UpdateFieldMetadataResult(version={})", self.version)
}
}
impl From<lancedb::table::UpdateFieldMetadataResult> for UpdateFieldMetadataResult {
fn from(result: lancedb::table::UpdateFieldMetadataResult) -> Self {
Self {
version: result.version,
}
}
}
#[pyclass(get_all, from_py_object)]
#[derive(Clone, Debug)]
pub struct DropColumnsResult {
@@ -1127,6 +1148,45 @@ impl Table {
Ok(())
})
}
pub fn update_field_metadata<'a>(
self_: PyRef<'a, Self>,
updates: Vec<Bound<PyDict>>,
) -> PyResult<Bound<'a, PyAny>> {
let updates = updates
.iter()
.map(|update| {
let path: String = update
.get_item("path")?
.ok_or_else(|| PyValueError::new_err("Missing path"))?
.extract()?;
let mut field_update = FieldMetadataUpdate::new(path);
if let Some(metadata) = update.get_item("metadata")? {
let metadata_dict = metadata.cast::<PyDict>()?;
for (key, value) in metadata_dict.iter() {
let key: String = key.extract()?;
if value.is_none() {
field_update = field_update.remove(key);
} else {
field_update = field_update.set(key, value.extract::<String>()?);
}
}
}
if let Some(replace) = update.get_item("replace")? {
if replace.extract::<bool>()? {
field_update = field_update.replace();
}
}
Ok(field_update)
})
.collect::<PyResult<Vec<_>>>()?;
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let result = inner.update_field_metadata(&updates).await.infer_error()?;
Ok(UpdateFieldMetadataResult::from(result))
})
}
}
#[derive(FromPyObject)]