diff --git a/docs/src/js/classes/Table.md b/docs/src/js/classes/Table.md index 62b962daf..1675f2c93 100644 --- a/docs/src/js/classes/Table.md +++ b/docs/src/js/classes/Table.md @@ -994,6 +994,29 @@ based on the row being updated (e.g. "my_col + 1") *** +### updateFieldMetadata() + +```ts +abstract updateFieldMetadata(updates): Promise +``` + +Update per-field (column) metadata. + +#### Parameters + +* **updates**: [`FieldMetadataUpdate`](../interfaces/FieldMetadataUpdate.md)[] + One or more per-field updates. Each + update's metadata is merged into the field's existing metadata by default; + a value of `null` deletes that key, and `replace: true` swaps the whole map. + +#### Returns + +`Promise`<[`UpdateFieldMetadataResult`](../interfaces/UpdateFieldMetadataResult.md)> + +resolves to the new table version. + +*** + ### vectorSearch() ```ts diff --git a/docs/src/js/globals.md b/docs/src/js/globals.md index 2e7254020..7d26fd75b 100644 --- a/docs/src/js/globals.md +++ b/docs/src/js/globals.md @@ -65,6 +65,7 @@ - [DropNamespaceOptions](interfaces/DropNamespaceOptions.md) - [DropNamespaceResponse](interfaces/DropNamespaceResponse.md) - [ExecutableQuery](interfaces/ExecutableQuery.md) +- [FieldMetadataUpdate](interfaces/FieldMetadataUpdate.md) - [FragmentStatistics](interfaces/FragmentStatistics.md) - [FragmentSummaryStats](interfaces/FragmentSummaryStats.md) - [FtsOptions](interfaces/FtsOptions.md) @@ -101,6 +102,7 @@ - [TimeoutConfig](interfaces/TimeoutConfig.md) - [TlsConfig](interfaces/TlsConfig.md) - [TokenResponse](interfaces/TokenResponse.md) +- [UpdateFieldMetadataResult](interfaces/UpdateFieldMetadataResult.md) - [UpdateOptions](interfaces/UpdateOptions.md) - [UpdateResult](interfaces/UpdateResult.md) - [Version](interfaces/Version.md) diff --git a/docs/src/js/interfaces/FieldMetadataUpdate.md b/docs/src/js/interfaces/FieldMetadataUpdate.md new file mode 100644 index 000000000..38c675630 --- /dev/null +++ b/docs/src/js/interfaces/FieldMetadataUpdate.md @@ -0,0 +1,41 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / FieldMetadataUpdate + +# Interface: FieldMetadataUpdate + +A per-field metadata update, addressed by dot-path. + +## Properties + +### metadata + +```ts +metadata: Record; +``` + +Metadata key/value pairs. Merged into the field's existing metadata by +default; a value of `null` deletes that key. + +*** + +### path + +```ts +path: string; +``` + +Dot-separated path to the field. For a top-level column this is just its +name; for a nested field it's the path, e.g. "a.b.c". + +*** + +### replace? + +```ts +optional replace: boolean; +``` + +If true, replace the field's entire metadata map instead of merging. diff --git a/docs/src/js/interfaces/UpdateFieldMetadataResult.md b/docs/src/js/interfaces/UpdateFieldMetadataResult.md new file mode 100644 index 000000000..e5433c5db --- /dev/null +++ b/docs/src/js/interfaces/UpdateFieldMetadataResult.md @@ -0,0 +1,15 @@ +[**@lancedb/lancedb**](../README.md) • **Docs** + +*** + +[@lancedb/lancedb](../globals.md) / UpdateFieldMetadataResult + +# Interface: UpdateFieldMetadataResult + +## Properties + +### version + +```ts +version: number; +``` diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index 3be56d3c7..12fee4733 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -1571,6 +1571,33 @@ describe("schema evolution", function () { expect(await table.schema()).toEqual(expectedSchema3); }); + it("can update field metadata", async function () { + const con = await connect(tmpDir.name); + const table = await con.createTable("fm", [ + { id: 1, category: "a" }, + { id: 2, category: "b" }, + ]); + + const res = await table.updateFieldMetadata([ + { path: "category", metadata: { unit: "label", pii: "false" } }, + ]); + expect(res).toHaveProperty("version"); + expect(res.version).toBe(2); + + let cat = (await table.schema()).fields.find((f) => f.name === "category"); + expect(cat?.metadata.get("unit")).toBe("label"); + expect(cat?.metadata.get("pii")).toBe("false"); + + // merge: add a key, delete one via null, keep the rest + await table.updateFieldMetadata([ + { path: "category", metadata: { source: "import", pii: null } }, + ]); + cat = (await table.schema()).fields.find((f) => f.name === "category"); + expect(cat?.metadata.get("unit")).toBe("label"); // preserved + expect(cat?.metadata.get("source")).toBe("import"); // added + expect(cat?.metadata.has("pii")).toBe(false); // deleted + }); + it("can cast to various types", async function () { const con = await connect(tmpDir.name); diff --git a/nodejs/lancedb/index.ts b/nodejs/lancedb/index.ts index 56b86ac5f..f4f724e8a 100644 --- a/nodejs/lancedb/index.ts +++ b/nodejs/lancedb/index.ts @@ -42,6 +42,7 @@ export { AddResult, AddColumnsResult, AlterColumnsResult, + UpdateFieldMetadataResult, DeleteResult, DropColumnsResult, UpdateResult, @@ -117,6 +118,7 @@ export { WriteProgress, LsmWriteSpec, ColumnAlteration, + FieldMetadataUpdate, } from "./table"; export { diff --git a/nodejs/lancedb/table.ts b/nodejs/lancedb/table.ts index ae2e86995..e3821bd81 100644 --- a/nodejs/lancedb/table.ts +++ b/nodejs/lancedb/table.ts @@ -32,6 +32,7 @@ import { OptimizeStats, TableStatistics, Tags, + UpdateFieldMetadataResult, UpdateResult, Table as _NativeTable, } from "./native"; @@ -508,6 +509,18 @@ export abstract class Table { abstract alterColumns( columnAlterations: ColumnAlteration[], ): Promise; + + /** + * Update per-field (column) metadata. + * @param {FieldMetadataUpdate[]} updates One or more per-field updates. Each + * update's metadata is merged into the field's existing metadata by default; + * a value of `null` deletes that key, and `replace: true` swaps the whole map. + * @returns {Promise} resolves to the new table version. + */ + abstract updateFieldMetadata( + updates: FieldMetadataUpdate[], + ): Promise; + /** * Drop one or more columns from the dataset * @@ -1037,6 +1050,12 @@ export class LocalTable extends Table { return await this.inner.alterColumns(processedAlterations); } + async updateFieldMetadata( + updates: FieldMetadataUpdate[], + ): Promise { + return await this.inner.updateFieldMetadata(updates); + } + async dropColumns(columnNames: string[]): Promise { return await this.inner.dropColumns(columnNames); } @@ -1203,3 +1222,19 @@ export interface ColumnAlteration { /** Set the new nullability. Note that a nullable column cannot be made non-nullable. */ nullable?: boolean; } + +/** A per-field metadata update, addressed by dot-path. */ +export interface FieldMetadataUpdate { + /** + * Dot-separated path to the field. For a top-level column this is just its + * name; for a nested field it's the path, e.g. "a.b.c". + */ + path: string; + /** + * Metadata key/value pairs. Merged into the field's existing metadata by + * default; a value of `null` deletes that key. + */ + metadata: Record; + /** If true, replace the field's entire metadata map instead of merging. */ + replace?: boolean; +} diff --git a/nodejs/src/table.rs b/nodejs/src/table.rs index 16cde35d8..2add2f3f3 100644 --- a/nodejs/src/table.rs +++ b/nodejs/src/table.rs @@ -5,8 +5,9 @@ use std::collections::HashMap; use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema}; use lancedb::table::{ - AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform, - OptimizeAction, OptimizeOptions, Table as LanceDbTable, + AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, + FieldMetadataUpdate as LanceFieldMetadataUpdate, NewColumnTransform, OptimizeAction, + OptimizeOptions, Table as LanceDbTable, }; use napi::bindgen_prelude::*; use napi::threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode}; @@ -355,6 +356,23 @@ impl Table { Ok(res.into()) } + #[napi(catch_unwind)] + pub async fn update_field_metadata( + &self, + updates: Vec, + ) -> napi::Result { + let updates = updates + .into_iter() + .map(LanceFieldMetadataUpdate::from) + .collect::>(); + let res = self + .inner_ref()? + .update_field_metadata(&updates) + .await + .default_error()?; + Ok(res.into()) + } + #[napi(catch_unwind)] pub async fn drop_columns(&self, columns: Vec) -> napi::Result { let col_refs = columns.iter().map(String::as_str).collect::>(); @@ -747,6 +765,29 @@ pub struct ColumnAlteration { pub nullable: Option, } +/// A per-field metadata update, addressed by dot-path. Merges into the field's +/// existing metadata by default; a `null` value deletes a key, and `replace` +/// swaps the field's entire metadata map. +#[napi(object)] +pub struct FieldMetadataUpdate { + /// Dot-separated path to the field (e.g. "embedding" or "a.b.c"). + pub path: String, + /// Metadata keys to set; a `null` value deletes that key. + pub metadata: HashMap>, + /// If true, replace the field's entire metadata map instead of merging. + pub replace: Option, +} + +impl From for LanceFieldMetadataUpdate { + fn from(js: FieldMetadataUpdate) -> Self { + Self { + path: js.path, + metadata: js.metadata, + replace: js.replace.unwrap_or(false), + } + } +} + impl TryFrom for LanceColumnAlteration { type Error = String; fn try_from(js: ColumnAlteration) -> std::result::Result { @@ -987,6 +1028,19 @@ impl From for AlterColumnsResult { } } +#[napi(object)] +pub struct UpdateFieldMetadataResult { + pub version: i64, +} + +impl From for UpdateFieldMetadataResult { + fn from(value: lancedb::table::UpdateFieldMetadataResult) -> Self { + Self { + version: value.version as i64, + } + } +} + #[napi(object)] pub struct DropColumnsResult { pub version: i64, diff --git a/python/python/lancedb/_lancedb.pyi b/python/python/lancedb/_lancedb.pyi index 0148f6575..afbd62086 100644 --- a/python/python/lancedb/_lancedb.pyi +++ b/python/python/lancedb/_lancedb.pyi @@ -208,6 +208,9 @@ class Table: async def alter_columns( self, columns: list[dict[str, Any]] ) -> AlterColumnsResult: ... + async def update_field_metadata( + self, updates: list[dict[str, Any]] + ) -> UpdateFieldMetadataResult: ... async def optimize( self, *, @@ -460,6 +463,9 @@ class AddColumnsResult: class AlterColumnsResult: version: int +class UpdateFieldMetadataResult: + version: int + class DropColumnsResult: version: int diff --git a/python/python/lancedb/remote/table.py b/python/python/lancedb/remote/table.py index 019f91044..b578f829a 100644 --- a/python/python/lancedb/remote/table.py +++ b/python/python/lancedb/remote/table.py @@ -25,6 +25,7 @@ from lancedb._lancedb import ( AddColumnsResult, AddResult, AlterColumnsResult, + UpdateFieldMetadataResult, DeleteResult, DropColumnsResult, IndexConfig, @@ -850,6 +851,11 @@ class RemoteTable(Table): ) -> AlterColumnsResult: return LOOP.run(self._table.alter_columns(*alterations)) + def update_field_metadata( + self, *updates: dict[str, Any] + ) -> UpdateFieldMetadataResult: + return LOOP.run(self._table.update_field_metadata(*updates)) + def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult: return LOOP.run(self._table.drop_columns(columns)) diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 2de369419..deb289fd6 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -154,6 +154,7 @@ if TYPE_CHECKING: AddColumnsResult, AddResult, AlterColumnsResult, + UpdateFieldMetadataResult, DeleteResult, DropColumnsResult, LsmWriteSpec, @@ -1799,6 +1800,29 @@ class Table(ABC): version: the new version number of the table after the alteration. """ + @abstractmethod + def update_field_metadata( + self, *updates: dict[str, Any] + ) -> UpdateFieldMetadataResult: + """ + Update per-field (column) metadata. + + Parameters + ---------- + updates : dict + One or more dicts, each with: + - "path": str — dot-path to the field (e.g. "embedding" or "a.b.c"). + - "metadata": dict[str, str | None] — keys to set; a value of ``None`` + deletes that key. + - "replace": bool, optional — replace the field's whole metadata map + instead of merging (default False). + + Returns + ------- + UpdateFieldMetadataResult + version: the new table version after the update. + """ + @abstractmethod def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult: """ @@ -3583,6 +3607,11 @@ class LanceTable(Table): ) -> AlterColumnsResult: return LOOP.run(self._table.alter_columns(*alterations)) + def update_field_metadata( + self, *updates: dict[str, Any] + ) -> UpdateFieldMetadataResult: + return LOOP.run(self._table.update_field_metadata(*updates)) + def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult: return LOOP.run(self._table.drop_columns(columns)) @@ -5234,6 +5263,13 @@ class AsyncTable: """ return await self._inner.alter_columns(alterations) + async def update_field_metadata( + self, *updates: dict[str, Any] + ) -> UpdateFieldMetadataResult: + """Update per-field metadata. See + [`Table.update_field_metadata`][lancedb.table.Table.update_field_metadata].""" + return await self._inner.update_field_metadata(updates) + async def drop_columns(self, columns: Iterable[str]): """ Drop columns from the table. diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py index 2a07c2df6..964f6b904 100644 --- a/python/python/tests/test_table.py +++ b/python/python/tests/test_table.py @@ -2472,6 +2472,30 @@ def test_alter_columns(mem_db: DBConnection): assert table.to_arrow().column_names == ["new_id"] +def test_update_field_metadata(mem_db: DBConnection): + data = pa.table({"id": [0, 1], "category": ["a", "b"]}) + table = mem_db.create_table("my_table", data=data) + + res = table.update_field_metadata( + {"path": "category", "metadata": {"unit": "label", "pii": "false"}} + ) + assert res.version == 2 + # Arrow field metadata is bytes-keyed + assert table.schema.field("category").metadata == { + b"unit": b"label", + b"pii": b"false", + } + + # merge: add a key, delete one via None, keep the rest + table.update_field_metadata( + {"path": "category", "metadata": {"source": "import", "pii": None}} + ) + assert table.schema.field("category").metadata == { + b"unit": b"label", + b"source": b"import", + } + + @pytest.mark.asyncio async def test_alter_columns_async(mem_db_async: AsyncConnection): data = pa.table({"id": [0, 1]}) diff --git a/python/src/lib.rs b/python/src/lib.rs index b37a88226..fdf8f5cb7 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -16,7 +16,7 @@ use query::{FTSQuery, HybridQuery, Query, VectorQuery}; use session::Session; use table::{ AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, LsmWriteSpec, - MergeResult, Table, UpdateResult, + MergeResult, Table, UpdateFieldMetadataResult, UpdateResult, }; pub mod arrow; @@ -50,6 +50,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/python/src/table.rs b/python/src/table.rs index 302c2bb46..a0462caf3 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -16,8 +16,8 @@ use arrow::{ pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}, }; use lancedb::table::{ - AddDataMode, ColumnAlteration, Duration, NewColumnTransform, OptimizeAction, OptimizeOptions, - Table as LanceDbTable, + AddDataMode, ColumnAlteration, Duration, FieldMetadataUpdate, NewColumnTransform, + OptimizeAction, OptimizeOptions, Table as LanceDbTable, }; use pyo3::{ Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python, @@ -357,6 +357,27 @@ impl From for AlterColumnsResult { } } +#[pyclass(get_all, from_py_object)] +#[derive(Clone, Debug)] +pub struct UpdateFieldMetadataResult { + pub version: u64, +} + +#[pymethods] +impl UpdateFieldMetadataResult { + pub fn __repr__(&self) -> String { + format!("UpdateFieldMetadataResult(version={})", self.version) + } +} + +impl From for UpdateFieldMetadataResult { + fn from(result: lancedb::table::UpdateFieldMetadataResult) -> Self { + Self { + version: result.version, + } + } +} + #[pyclass(get_all, from_py_object)] #[derive(Clone, Debug)] pub struct DropColumnsResult { @@ -1127,6 +1148,45 @@ impl Table { Ok(()) }) } + + pub fn update_field_metadata<'a>( + self_: PyRef<'a, Self>, + updates: Vec>, + ) -> PyResult> { + let updates = updates + .iter() + .map(|update| { + let path: String = update + .get_item("path")? + .ok_or_else(|| PyValueError::new_err("Missing path"))? + .extract()?; + let mut field_update = FieldMetadataUpdate::new(path); + if let Some(metadata) = update.get_item("metadata")? { + let metadata_dict = metadata.cast::()?; + for (key, value) in metadata_dict.iter() { + let key: String = key.extract()?; + if value.is_none() { + field_update = field_update.remove(key); + } else { + field_update = field_update.set(key, value.extract::()?); + } + } + } + if let Some(replace) = update.get_item("replace")? + && replace.extract::()? + { + field_update = field_update.replace(); + } + Ok(field_update) + }) + .collect::>>()?; + + let inner = self_.inner_ref()?.clone(); + future_into_py(self_.py(), async move { + let result = inner.update_field_metadata(&updates).await.infer_error()?; + Ok(UpdateFieldMetadataResult::from(result)) + }) + } } #[derive(FromPyObject)] diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index dc16b61c6..6dab22590 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -18,13 +18,13 @@ use crate::index::waiter::wait_for_index; use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest}; use crate::table::AddColumnsResult; use crate::table::AddResult; -use crate::table::AlterColumnsResult; use crate::table::DeleteResult; use crate::table::DropColumnsResult; use crate::table::MergeResult; use crate::table::Tags; use crate::table::UpdateResult; use crate::table::query::create_multi_vector_plan; +use crate::table::{AlterColumnsResult, FieldMetadataUpdate, UpdateFieldMetadataResult}; use crate::table::{AnyQuery, Filter, Predicate, PreprocessingOutput, TableStatistics}; use crate::utils::background_cache::BackgroundCache; use crate::utils::{ @@ -1968,6 +1968,35 @@ impl BaseTable for RemoteTable { Ok(result) } + async fn update_field_metadata( + &self, + updates: &[FieldMetadataUpdate], + ) -> Result { + self.check_mutable().await?; + let body = serde_json::json!({ "updates": updates }); + let request = self + .client + .post(&format!( + "/v1/table/{}/update_field_metadata/", + self.identifier + )) + .json(&body); + let (request_id, response) = self.send(request, true).await?; + let response = self.check_table_response(&request_id, response).await?; + let body = response.text().await.err_to_http(request_id.clone())?; + + let result: UpdateFieldMetadataResult = + serde_json::from_str(&body).map_err(|e| Error::Http { + source: format!("Failed to parse update_field_metadata response: {}", e).into(), + request_id, + status_code: None, + })?; + + self.invalidate_schema_cache(); + self.track_write_version(result.version); + Ok(result) + } + async fn drop_columns(&self, columns: &[&str]) -> Result { self.check_mutable().await?; let body = serde_json::json!({ "columns": columns }); @@ -2261,6 +2290,7 @@ mod tests { use crate::remote::client::{ClientConfig, RetryConfig}; use crate::table::AddDataMode; + use crate::table::FieldMetadataUpdate; use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type}; use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, record_batch}; @@ -6460,4 +6490,25 @@ mod tests { assert!(!headers.contains_key("x-lancedb-min-version")); assert!(!headers.contains_key("x-lancedb-min-timestamp")); } + + #[tokio::test] + async fn test_update_field_metadata() { + let table = Table::new_with_handler("my_table", |request| { + assert_eq!(request.method(), "POST"); + assert_eq!( + request.url().path(), + "/v1/table/my_table/update_field_metadata/" + ); + http::Response::builder() + .status(200) + .body(r#"{"version": 7, "fields": {"category": {"unit": "label"}}}"#) + .unwrap() + }); + + let result = table + .update_field_metadata(&[FieldMetadataUpdate::new("category").set("unit", "label")]) + .await + .unwrap(); + assert_eq!(result.version, 7); + } } diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index ca34bbdf3..0eb6b578a 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -91,7 +91,10 @@ pub use lance::dataset::scanner::DatasetRecordBatchStream; use lance::dataset::statistics::DatasetStatisticsExt; pub use lance_index::optimize::OptimizeOptions; pub use optimize::{CompactionOptions, OptimizeAction, OptimizeStats}; -pub use schema_evolution::{AddColumnsResult, AlterColumnsResult, DropColumnsResult}; +pub use schema_evolution::{ + AddColumnsResult, AlterColumnsResult, DropColumnsResult, FieldMetadataUpdate, + UpdateFieldMetadataResult, +}; use serde_with::skip_serializing_none; pub use update::{UpdateBuilder, UpdateResult}; @@ -660,6 +663,19 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync { message: "create_insert_exec not implemented".to_string(), }) } + /// Update per-field metadata. Merges into existing metadata by default; + /// [`FieldMetadataUpdate::remove`] deletes a key and + /// [`FieldMetadataUpdate::replace`] swaps the field's whole map. + /// + /// The default returns `NotSupported`; Lance-backed and remote tables override it. + async fn update_field_metadata( + &self, + _updates: &[FieldMetadataUpdate], + ) -> Result { + Err(Error::NotSupported { + message: "update_field_metadata is not supported on this table type".into(), + }) + } } /// A Table is a collection of strong typed Rows. @@ -1340,6 +1356,14 @@ impl Table { self.inner.alter_columns(alterations).await } + /// Update per-field metadata (merges by default). + pub async fn update_field_metadata( + &self, + updates: &[FieldMetadataUpdate], + ) -> Result { + self.inner.update_field_metadata(updates).await + } + /// Remove columns from the table. pub async fn drop_columns(&self, columns: &[&str]) -> Result { self.inner.drop_columns(columns).await @@ -2886,6 +2910,13 @@ impl BaseTable for NativeTable { schema_evolution::execute_alter_columns(self, alterations).await } + async fn update_field_metadata( + &self, + updates: &[FieldMetadataUpdate], + ) -> Result { + schema_evolution::execute_update_field_metadata(self, updates).await + } + async fn drop_columns(&self, columns: &[&str]) -> Result { schema_evolution::execute_drop_columns(self, columns).await } diff --git a/rust/lancedb/src/table/schema_evolution.rs b/rust/lancedb/src/table/schema_evolution.rs index c9bf9d7a8..52c8c191f 100644 --- a/rust/lancedb/src/table/schema_evolution.rs +++ b/rust/lancedb/src/table/schema_evolution.rs @@ -10,6 +10,7 @@ use lance::dataset::{ColumnAlteration, NewColumnTransform}; use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use super::NativeTable; use crate::Result; @@ -44,6 +45,52 @@ pub struct DropColumnsResult { pub version: u64, } +/// A single field's metadata update, addressed by dot-path. +/// +/// Merges into the field's existing metadata by default. Use [`Self::remove`] to +/// delete a key, or [`Self::replace`] to swap the field's entire metadata map. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize)] +pub struct FieldMetadataUpdate { + /// Dot-separated path to the field (e.g. `"embedding"` or `"address.zip"`). + pub path: String, + /// Keys to set (`Some`) or delete (`None`). + pub metadata: HashMap>, + /// If `true`, replace the field's entire metadata map instead of merging. + pub replace: bool, +} + +impl FieldMetadataUpdate { + pub fn new(path: impl Into) -> Self { + Self { + path: path.into(), + metadata: HashMap::new(), + replace: false, + } + } + + pub fn set(mut self, key: impl Into, value: impl Into) -> Self { + self.metadata.insert(key.into(), Some(value.into())); + self + } + + pub fn remove(mut self, key: impl Into) -> Self { + self.metadata.insert(key.into(), None); + self + } + + pub fn replace(mut self) -> Self { + self.replace = true; + self + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +pub struct UpdateFieldMetadataResult { + /// The commit version associated with the operation. + #[serde(default)] + pub version: u64, +} + /// Internal implementation of the add columns logic. /// /// Adds new columns to the table using the provided transforms. @@ -90,6 +137,32 @@ pub(crate) async fn execute_drop_columns( Ok(DropColumnsResult { version }) } +/// Internal implementation of the update field metadata logic. +/// +/// Merges or replaces per-field metadata, addressing fields by dot-path. +pub(crate) async fn execute_update_field_metadata( + table: &NativeTable, + updates: &[FieldMetadataUpdate], +) -> Result { + table.dataset.ensure_mutable()?; + let mut dataset = (*table.dataset.get().await?).clone(); + + let mut builder = dataset.update_field_metadata(); + for update in updates { + let entries = update.metadata.iter().map(|(k, v)| (k.clone(), v.clone())); + builder = if update.replace { + builder.replace(&update.path, entries)? + } else { + builder.update(&update.path, entries)? + }; + } + builder.await?; + + let version = dataset.version().version; + table.dataset.update(dataset); + Ok(UpdateFieldMetadataResult { version }) +} + #[cfg(test)] mod tests { use arrow_array::{Int32Array, StringArray, record_batch}; @@ -97,6 +170,7 @@ mod tests { use futures::TryStreamExt; use lance::dataset::ColumnAlteration; + use super::FieldMetadataUpdate; use crate::connect; use crate::query::{ExecutableQuery, QueryBase, Select}; use crate::table::NewColumnTransform; @@ -610,4 +684,46 @@ mod tests { let v4 = table.version().await.unwrap(); assert_eq!(drop_result.version, v4); } + + #[tokio::test] + async fn test_update_field_metadata() { + let conn = connect("memory://").execute().await.unwrap(); + let batch = record_batch!( + ("id", Int32, [1, 2, 3]), + ("category", Utf8, ["A", "B", "C"]) + ) + .unwrap(); + let table = conn + .create_table("test_update_field_metadata", batch) + .execute() + .await + .unwrap(); + + // Set metadata on a field. + table + .update_field_metadata(&[FieldMetadataUpdate::new("category") + .set("unit", "label") + .set("pii", "false")]) + .await + .unwrap(); + let schema = table.schema().await.unwrap(); + let field = schema.field_with_name("category").unwrap(); + assert_eq!( + field.metadata().get("unit").map(String::as_str), + Some("label") + ); + + // Merge: add a key, delete one, keep the rest. + table + .update_field_metadata(&[FieldMetadataUpdate::new("category") + .set("source", "import") + .remove("pii")]) + .await + .unwrap(); + let schema = table.schema().await.unwrap(); + let md = schema.field_with_name("category").unwrap().metadata(); + assert_eq!(md.get("unit").map(String::as_str), Some("label")); // preserved + assert_eq!(md.get("source").map(String::as_str), Some("import")); // added + assert!(!md.contains_key("pii")); // deleted + } }