From f95acfbbea65020e507146d6f5863498dfde760e Mon Sep 17 00:00:00 2001 From: Brendan Clement Date: Mon, 1 Jun 2026 18:57:15 -0700 Subject: [PATCH] feat(python): add update_field_metadata bindings --- python/python/lancedb/_lancedb.pyi | 6 +++ python/python/lancedb/remote/table.py | 6 +++ python/python/lancedb/table.py | 36 +++++++++++++++ python/python/tests/test_table.py | 24 ++++++++++ python/src/lib.rs | 3 +- python/src/table.rs | 64 ++++++++++++++++++++++++++- 6 files changed, 136 insertions(+), 3 deletions(-) diff --git a/python/python/lancedb/_lancedb.pyi b/python/python/lancedb/_lancedb.pyi index 0148f6575..afbd62086 100644 --- a/python/python/lancedb/_lancedb.pyi +++ b/python/python/lancedb/_lancedb.pyi @@ -208,6 +208,9 @@ class Table: async def alter_columns( self, columns: list[dict[str, Any]] ) -> AlterColumnsResult: ... + async def update_field_metadata( + self, updates: list[dict[str, Any]] + ) -> UpdateFieldMetadataResult: ... async def optimize( self, *, @@ -460,6 +463,9 @@ class AddColumnsResult: class AlterColumnsResult: version: int +class UpdateFieldMetadataResult: + version: int + class DropColumnsResult: version: int diff --git a/python/python/lancedb/remote/table.py b/python/python/lancedb/remote/table.py index 73bdbb8b1..79e56e8d7 100644 --- a/python/python/lancedb/remote/table.py +++ b/python/python/lancedb/remote/table.py @@ -24,6 +24,7 @@ from lancedb._lancedb import ( AddColumnsResult, AddResult, AlterColumnsResult, + UpdateFieldMetadataResult, DeleteResult, DropColumnsResult, IndexConfig, @@ -777,6 +778,11 @@ class RemoteTable(Table): ) -> AlterColumnsResult: return LOOP.run(self._table.alter_columns(*alterations)) + def update_field_metadata( + self, *updates: dict[str, Any] + ) -> UpdateFieldMetadataResult: + return LOOP.run(self._table.update_field_metadata(*updates)) + def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult: return LOOP.run(self._table.drop_columns(columns)) diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 2de369419..deb289fd6 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -154,6 +154,7 @@ if TYPE_CHECKING: AddColumnsResult, AddResult, AlterColumnsResult, + UpdateFieldMetadataResult, DeleteResult, DropColumnsResult, LsmWriteSpec, @@ -1799,6 +1800,29 @@ class Table(ABC): version: the new version number of the table after the alteration. """ + @abstractmethod + def update_field_metadata( + self, *updates: dict[str, Any] + ) -> UpdateFieldMetadataResult: + """ + Update per-field (column) metadata. + + Parameters + ---------- + updates : dict + One or more dicts, each with: + - "path": str — dot-path to the field (e.g. "embedding" or "a.b.c"). + - "metadata": dict[str, str | None] — keys to set; a value of ``None`` + deletes that key. + - "replace": bool, optional — replace the field's whole metadata map + instead of merging (default False). + + Returns + ------- + UpdateFieldMetadataResult + version: the new table version after the update. + """ + @abstractmethod def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult: """ @@ -3583,6 +3607,11 @@ class LanceTable(Table): ) -> AlterColumnsResult: return LOOP.run(self._table.alter_columns(*alterations)) + def update_field_metadata( + self, *updates: dict[str, Any] + ) -> UpdateFieldMetadataResult: + return LOOP.run(self._table.update_field_metadata(*updates)) + def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult: return LOOP.run(self._table.drop_columns(columns)) @@ -5234,6 +5263,13 @@ class AsyncTable: """ return await self._inner.alter_columns(alterations) + async def update_field_metadata( + self, *updates: dict[str, Any] + ) -> UpdateFieldMetadataResult: + """Update per-field metadata. See + [`Table.update_field_metadata`][lancedb.table.Table.update_field_metadata].""" + return await self._inner.update_field_metadata(updates) + async def drop_columns(self, columns: Iterable[str]): """ Drop columns from the table. diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py index 2a07c2df6..964f6b904 100644 --- a/python/python/tests/test_table.py +++ b/python/python/tests/test_table.py @@ -2472,6 +2472,30 @@ def test_alter_columns(mem_db: DBConnection): assert table.to_arrow().column_names == ["new_id"] +def test_update_field_metadata(mem_db: DBConnection): + data = pa.table({"id": [0, 1], "category": ["a", "b"]}) + table = mem_db.create_table("my_table", data=data) + + res = table.update_field_metadata( + {"path": "category", "metadata": {"unit": "label", "pii": "false"}} + ) + assert res.version == 2 + # Arrow field metadata is bytes-keyed + assert table.schema.field("category").metadata == { + b"unit": b"label", + b"pii": b"false", + } + + # merge: add a key, delete one via None, keep the rest + table.update_field_metadata( + {"path": "category", "metadata": {"source": "import", "pii": None}} + ) + assert table.schema.field("category").metadata == { + b"unit": b"label", + b"source": b"import", + } + + @pytest.mark.asyncio async def test_alter_columns_async(mem_db_async: AsyncConnection): data = pa.table({"id": [0, 1]}) diff --git a/python/src/lib.rs b/python/src/lib.rs index b37a88226..fdf8f5cb7 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -16,7 +16,7 @@ use query::{FTSQuery, HybridQuery, Query, VectorQuery}; use session::Session; use table::{ AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, LsmWriteSpec, - MergeResult, Table, UpdateResult, + MergeResult, Table, UpdateFieldMetadataResult, UpdateResult, }; pub mod arrow; @@ -50,6 +50,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/python/src/table.rs b/python/src/table.rs index 302c2bb46..3fa0a5a08 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -16,8 +16,8 @@ use arrow::{ pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}, }; use lancedb::table::{ - AddDataMode, ColumnAlteration, Duration, NewColumnTransform, OptimizeAction, OptimizeOptions, - Table as LanceDbTable, + AddDataMode, ColumnAlteration, Duration, FieldMetadataUpdate, NewColumnTransform, + OptimizeAction, OptimizeOptions, Table as LanceDbTable, }; use pyo3::{ Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python, @@ -357,6 +357,27 @@ impl From for AlterColumnsResult { } } +#[pyclass(get_all, from_py_object)] +#[derive(Clone, Debug)] +pub struct UpdateFieldMetadataResult { + pub version: u64, +} + +#[pymethods] +impl UpdateFieldMetadataResult { + pub fn __repr__(&self) -> String { + format!("UpdateFieldMetadataResult(version={})", self.version) + } +} + +impl From for UpdateFieldMetadataResult { + fn from(result: lancedb::table::UpdateFieldMetadataResult) -> Self { + Self { + version: result.version, + } + } +} + #[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct DropColumnsResult { @@ -1127,6 +1148,45 @@ impl Table { Ok(()) }) } + + pub fn update_field_metadata<'a>( + self_: PyRef<'a, Self>, + updates: Vec>, + ) -> PyResult> { + let updates = updates + .iter() + .map(|update| { + let path: String = update + .get_item("path")? + .ok_or_else(|| PyValueError::new_err("Missing path"))? + .extract()?; + let mut field_update = FieldMetadataUpdate::new(path); + if let Some(metadata) = update.get_item("metadata")? { + let metadata_dict = metadata.cast::()?; + for (key, value) in metadata_dict.iter() { + let key: String = key.extract()?; + if value.is_none() { + field_update = field_update.remove(key); + } else { + field_update = field_update.set(key, value.extract::()?); + } + } + } + if let Some(replace) = update.get_item("replace")? { + if replace.extract::()? { + field_update = field_update.replace(); + } + } + Ok(field_update) + }) + .collect::>>()?; + + let inner = self_.inner_ref()?.clone(); + future_into_py(self_.py(), async move { + let result = inner.update_field_metadata(&updates).await.infer_error()?; + Ok(UpdateFieldMetadataResult::from(result)) + }) + } } #[derive(FromPyObject)]