feat: add update_field_metadata to edit per-field metadata (#3482)

### Summary
Adds update_field_metadata to the client SDK (Rust core, Python, and
TypeScript) so clients can edit per-field (column) Arrow metadata
(schema.fields[].metadata)

### Testing
- added unit tests
- ran E2E against a local server on both local and remote tables (set →
merge → delete), across Python sync/async and TypeScript

### Next steps
- deprecate replace_field_metadata in the python lancedb favor of this
(typescript didn't have replace_field_metadata method). This matches
Lance's API direction (Lance already deprecated replace_field_metadata
for update_field_metadata)
This commit is contained in:
Brendan Clement
2026-06-02 07:00:00 -07:00
committed by GitHub
parent 7b874905fd
commit d065be0474
17 changed files with 537 additions and 7 deletions

View File

@@ -994,6 +994,29 @@ based on the row being updated (e.g. "my_col + 1")
***
### updateFieldMetadata()
```ts
abstract updateFieldMetadata(updates): Promise<UpdateFieldMetadataResult>
```
Update per-field (column) metadata.
#### Parameters
* **updates**: [`FieldMetadataUpdate`](../interfaces/FieldMetadataUpdate.md)[]
One or more per-field updates. Each
update's metadata is merged into the field's existing metadata by default;
a value of `null` deletes that key, and `replace: true` swaps the whole map.
#### Returns
`Promise`&lt;[`UpdateFieldMetadataResult`](../interfaces/UpdateFieldMetadataResult.md)&gt;
resolves to the new table version.
***
### vectorSearch()
```ts

View File

@@ -65,6 +65,7 @@
- [DropNamespaceOptions](interfaces/DropNamespaceOptions.md)
- [DropNamespaceResponse](interfaces/DropNamespaceResponse.md)
- [ExecutableQuery](interfaces/ExecutableQuery.md)
- [FieldMetadataUpdate](interfaces/FieldMetadataUpdate.md)
- [FragmentStatistics](interfaces/FragmentStatistics.md)
- [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
- [FtsOptions](interfaces/FtsOptions.md)
@@ -101,6 +102,7 @@
- [TimeoutConfig](interfaces/TimeoutConfig.md)
- [TlsConfig](interfaces/TlsConfig.md)
- [TokenResponse](interfaces/TokenResponse.md)
- [UpdateFieldMetadataResult](interfaces/UpdateFieldMetadataResult.md)
- [UpdateOptions](interfaces/UpdateOptions.md)
- [UpdateResult](interfaces/UpdateResult.md)
- [Version](interfaces/Version.md)

View File

@@ -0,0 +1,41 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / FieldMetadataUpdate
# Interface: FieldMetadataUpdate
A per-field metadata update, addressed by dot-path.
## Properties
### metadata
```ts
metadata: Record<string, null | string>;
```
Metadata key/value pairs. Merged into the field's existing metadata by
default; a value of `null` deletes that key.
***
### path
```ts
path: string;
```
Dot-separated path to the field. For a top-level column this is just its
name; for a nested field it's the path, e.g. "a.b.c".
***
### replace?
```ts
optional replace: boolean;
```
If true, replace the field's entire metadata map instead of merging.

View File

@@ -0,0 +1,15 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / UpdateFieldMetadataResult
# Interface: UpdateFieldMetadataResult
## Properties
### version
```ts
version: number;
```

View File

@@ -1571,6 +1571,33 @@ describe("schema evolution", function () {
expect(await table.schema()).toEqual(expectedSchema3);
});
it("can update field metadata", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("fm", [
{ id: 1, category: "a" },
{ id: 2, category: "b" },
]);
const res = await table.updateFieldMetadata([
{ path: "category", metadata: { unit: "label", pii: "false" } },
]);
expect(res).toHaveProperty("version");
expect(res.version).toBe(2);
let cat = (await table.schema()).fields.find((f) => f.name === "category");
expect(cat?.metadata.get("unit")).toBe("label");
expect(cat?.metadata.get("pii")).toBe("false");
// merge: add a key, delete one via null, keep the rest
await table.updateFieldMetadata([
{ path: "category", metadata: { source: "import", pii: null } },
]);
cat = (await table.schema()).fields.find((f) => f.name === "category");
expect(cat?.metadata.get("unit")).toBe("label"); // preserved
expect(cat?.metadata.get("source")).toBe("import"); // added
expect(cat?.metadata.has("pii")).toBe(false); // deleted
});
it("can cast to various types", async function () {
const con = await connect(tmpDir.name);

View File

@@ -42,6 +42,7 @@ export {
AddResult,
AddColumnsResult,
AlterColumnsResult,
UpdateFieldMetadataResult,
DeleteResult,
DropColumnsResult,
UpdateResult,
@@ -117,6 +118,7 @@ export {
WriteProgress,
LsmWriteSpec,
ColumnAlteration,
FieldMetadataUpdate,
} from "./table";
export {

View File

@@ -32,6 +32,7 @@ import {
OptimizeStats,
TableStatistics,
Tags,
UpdateFieldMetadataResult,
UpdateResult,
Table as _NativeTable,
} from "./native";
@@ -508,6 +509,18 @@ export abstract class Table {
abstract alterColumns(
columnAlterations: ColumnAlteration[],
): Promise<AlterColumnsResult>;
/**
* Update per-field (column) metadata.
* @param {FieldMetadataUpdate[]} updates One or more per-field updates. Each
* update's metadata is merged into the field's existing metadata by default;
* a value of `null` deletes that key, and `replace: true` swaps the whole map.
* @returns {Promise<UpdateFieldMetadataResult>} resolves to the new table version.
*/
abstract updateFieldMetadata(
updates: FieldMetadataUpdate[],
): Promise<UpdateFieldMetadataResult>;
/**
* Drop one or more columns from the dataset
*
@@ -1037,6 +1050,12 @@ export class LocalTable extends Table {
return await this.inner.alterColumns(processedAlterations);
}
async updateFieldMetadata(
updates: FieldMetadataUpdate[],
): Promise<UpdateFieldMetadataResult> {
return await this.inner.updateFieldMetadata(updates);
}
async dropColumns(columnNames: string[]): Promise<DropColumnsResult> {
return await this.inner.dropColumns(columnNames);
}
@@ -1203,3 +1222,19 @@ export interface ColumnAlteration {
/** Set the new nullability. Note that a nullable column cannot be made non-nullable. */
nullable?: boolean;
}
/** A per-field metadata update, addressed by dot-path. */
export interface FieldMetadataUpdate {
/**
* Dot-separated path to the field. For a top-level column this is just its
* name; for a nested field it's the path, e.g. "a.b.c".
*/
path: string;
/**
* Metadata key/value pairs. Merged into the field's existing metadata by
* default; a value of `null` deletes that key.
*/
metadata: Record<string, string | null>;
/** If true, replace the field's entire metadata map instead of merging. */
replace?: boolean;
}

View File

@@ -5,8 +5,9 @@ use std::collections::HashMap;
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
use lancedb::table::{
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration,
FieldMetadataUpdate as LanceFieldMetadataUpdate, NewColumnTransform, OptimizeAction,
OptimizeOptions, Table as LanceDbTable,
};
use napi::bindgen_prelude::*;
use napi::threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode};
@@ -355,6 +356,23 @@ impl Table {
Ok(res.into())
}
#[napi(catch_unwind)]
pub async fn update_field_metadata(
&self,
updates: Vec<FieldMetadataUpdate>,
) -> napi::Result<UpdateFieldMetadataResult> {
let updates = updates
.into_iter()
.map(LanceFieldMetadataUpdate::from)
.collect::<Vec<_>>();
let res = self
.inner_ref()?
.update_field_metadata(&updates)
.await
.default_error()?;
Ok(res.into())
}
#[napi(catch_unwind)]
pub async fn drop_columns(&self, columns: Vec<String>) -> napi::Result<DropColumnsResult> {
let col_refs = columns.iter().map(String::as_str).collect::<Vec<_>>();
@@ -747,6 +765,29 @@ pub struct ColumnAlteration {
pub nullable: Option<bool>,
}
/// A per-field metadata update, addressed by dot-path. Merges into the field's
/// existing metadata by default; a `null` value deletes a key, and `replace`
/// swaps the field's entire metadata map.
#[napi(object)]
pub struct FieldMetadataUpdate {
/// Dot-separated path to the field (e.g. "embedding" or "a.b.c").
pub path: String,
/// Metadata keys to set; a `null` value deletes that key.
pub metadata: HashMap<String, Option<String>>,
/// If true, replace the field's entire metadata map instead of merging.
pub replace: Option<bool>,
}
impl From<FieldMetadataUpdate> for LanceFieldMetadataUpdate {
fn from(js: FieldMetadataUpdate) -> Self {
Self {
path: js.path,
metadata: js.metadata,
replace: js.replace.unwrap_or(false),
}
}
}
impl TryFrom<ColumnAlteration> for LanceColumnAlteration {
type Error = String;
fn try_from(js: ColumnAlteration) -> std::result::Result<Self, Self::Error> {
@@ -987,6 +1028,19 @@ impl From<lancedb::table::AlterColumnsResult> for AlterColumnsResult {
}
}
#[napi(object)]
pub struct UpdateFieldMetadataResult {
pub version: i64,
}
impl From<lancedb::table::UpdateFieldMetadataResult> for UpdateFieldMetadataResult {
fn from(value: lancedb::table::UpdateFieldMetadataResult) -> Self {
Self {
version: value.version as i64,
}
}
}
#[napi(object)]
pub struct DropColumnsResult {
pub version: i64,

View File

@@ -208,6 +208,9 @@ class Table:
async def alter_columns(
self, columns: list[dict[str, Any]]
) -> AlterColumnsResult: ...
async def update_field_metadata(
self, updates: list[dict[str, Any]]
) -> UpdateFieldMetadataResult: ...
async def optimize(
self,
*,
@@ -460,6 +463,9 @@ class AddColumnsResult:
class AlterColumnsResult:
version: int
class UpdateFieldMetadataResult:
version: int
class DropColumnsResult:
version: int

View File

@@ -25,6 +25,7 @@ from lancedb._lancedb import (
AddColumnsResult,
AddResult,
AlterColumnsResult,
UpdateFieldMetadataResult,
DeleteResult,
DropColumnsResult,
IndexConfig,
@@ -850,6 +851,11 @@ class RemoteTable(Table):
) -> AlterColumnsResult:
return LOOP.run(self._table.alter_columns(*alterations))
def update_field_metadata(
self, *updates: dict[str, Any]
) -> UpdateFieldMetadataResult:
return LOOP.run(self._table.update_field_metadata(*updates))
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
return LOOP.run(self._table.drop_columns(columns))

View File

@@ -154,6 +154,7 @@ if TYPE_CHECKING:
AddColumnsResult,
AddResult,
AlterColumnsResult,
UpdateFieldMetadataResult,
DeleteResult,
DropColumnsResult,
LsmWriteSpec,
@@ -1799,6 +1800,29 @@ class Table(ABC):
version: the new version number of the table after the alteration.
"""
@abstractmethod
def update_field_metadata(
self, *updates: dict[str, Any]
) -> UpdateFieldMetadataResult:
"""
Update per-field (column) metadata.
Parameters
----------
updates : dict
One or more dicts, each with:
- "path": str — dot-path to the field (e.g. "embedding" or "a.b.c").
- "metadata": dict[str, str | None] — keys to set; a value of ``None``
deletes that key.
- "replace": bool, optional — replace the field's whole metadata map
instead of merging (default False).
Returns
-------
UpdateFieldMetadataResult
version: the new table version after the update.
"""
@abstractmethod
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
"""
@@ -3583,6 +3607,11 @@ class LanceTable(Table):
) -> AlterColumnsResult:
return LOOP.run(self._table.alter_columns(*alterations))
def update_field_metadata(
self, *updates: dict[str, Any]
) -> UpdateFieldMetadataResult:
return LOOP.run(self._table.update_field_metadata(*updates))
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
return LOOP.run(self._table.drop_columns(columns))
@@ -5234,6 +5263,13 @@ class AsyncTable:
"""
return await self._inner.alter_columns(alterations)
async def update_field_metadata(
self, *updates: dict[str, Any]
) -> UpdateFieldMetadataResult:
"""Update per-field metadata. See
[`Table.update_field_metadata`][lancedb.table.Table.update_field_metadata]."""
return await self._inner.update_field_metadata(updates)
async def drop_columns(self, columns: Iterable[str]):
"""
Drop columns from the table.

View File

@@ -2472,6 +2472,30 @@ def test_alter_columns(mem_db: DBConnection):
assert table.to_arrow().column_names == ["new_id"]
def test_update_field_metadata(mem_db: DBConnection):
data = pa.table({"id": [0, 1], "category": ["a", "b"]})
table = mem_db.create_table("my_table", data=data)
res = table.update_field_metadata(
{"path": "category", "metadata": {"unit": "label", "pii": "false"}}
)
assert res.version == 2
# Arrow field metadata is bytes-keyed
assert table.schema.field("category").metadata == {
b"unit": b"label",
b"pii": b"false",
}
# merge: add a key, delete one via None, keep the rest
table.update_field_metadata(
{"path": "category", "metadata": {"source": "import", "pii": None}}
)
assert table.schema.field("category").metadata == {
b"unit": b"label",
b"source": b"import",
}
@pytest.mark.asyncio
async def test_alter_columns_async(mem_db_async: AsyncConnection):
data = pa.table({"id": [0, 1]})

View File

@@ -16,7 +16,7 @@ use query::{FTSQuery, HybridQuery, Query, VectorQuery};
use session::Session;
use table::{
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, LsmWriteSpec,
MergeResult, Table, UpdateResult,
MergeResult, Table, UpdateFieldMetadataResult, UpdateResult,
};
pub mod arrow;
@@ -50,6 +50,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<RecordBatchStream>()?;
m.add_class::<AddColumnsResult>()?;
m.add_class::<AlterColumnsResult>()?;
m.add_class::<UpdateFieldMetadataResult>()?;
m.add_class::<AddResult>()?;
m.add_class::<MergeResult>()?;
m.add_class::<LsmWriteSpec>()?;

View File

@@ -16,8 +16,8 @@ use arrow::{
pyarrow::{FromPyArrow, PyArrowType, ToPyArrow},
};
use lancedb::table::{
AddDataMode, ColumnAlteration, Duration, NewColumnTransform, OptimizeAction, OptimizeOptions,
Table as LanceDbTable,
AddDataMode, ColumnAlteration, Duration, FieldMetadataUpdate, NewColumnTransform,
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
};
use pyo3::{
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
@@ -357,6 +357,27 @@ impl From<lancedb::table::AlterColumnsResult> for AlterColumnsResult {
}
}
#[pyclass(get_all, from_py_object)]
#[derive(Clone, Debug)]
pub struct UpdateFieldMetadataResult {
pub version: u64,
}
#[pymethods]
impl UpdateFieldMetadataResult {
pub fn __repr__(&self) -> String {
format!("UpdateFieldMetadataResult(version={})", self.version)
}
}
impl From<lancedb::table::UpdateFieldMetadataResult> for UpdateFieldMetadataResult {
fn from(result: lancedb::table::UpdateFieldMetadataResult) -> Self {
Self {
version: result.version,
}
}
}
#[pyclass(get_all, from_py_object)]
#[derive(Clone, Debug)]
pub struct DropColumnsResult {
@@ -1127,6 +1148,45 @@ impl Table {
Ok(())
})
}
pub fn update_field_metadata<'a>(
self_: PyRef<'a, Self>,
updates: Vec<Bound<PyDict>>,
) -> PyResult<Bound<'a, PyAny>> {
let updates = updates
.iter()
.map(|update| {
let path: String = update
.get_item("path")?
.ok_or_else(|| PyValueError::new_err("Missing path"))?
.extract()?;
let mut field_update = FieldMetadataUpdate::new(path);
if let Some(metadata) = update.get_item("metadata")? {
let metadata_dict = metadata.cast::<PyDict>()?;
for (key, value) in metadata_dict.iter() {
let key: String = key.extract()?;
if value.is_none() {
field_update = field_update.remove(key);
} else {
field_update = field_update.set(key, value.extract::<String>()?);
}
}
}
if let Some(replace) = update.get_item("replace")?
&& replace.extract::<bool>()?
{
field_update = field_update.replace();
}
Ok(field_update)
})
.collect::<PyResult<Vec<_>>>()?;
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let result = inner.update_field_metadata(&updates).await.infer_error()?;
Ok(UpdateFieldMetadataResult::from(result))
})
}
}
#[derive(FromPyObject)]

View File

@@ -18,13 +18,13 @@ use crate::index::waiter::wait_for_index;
use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest};
use crate::table::AddColumnsResult;
use crate::table::AddResult;
use crate::table::AlterColumnsResult;
use crate::table::DeleteResult;
use crate::table::DropColumnsResult;
use crate::table::MergeResult;
use crate::table::Tags;
use crate::table::UpdateResult;
use crate::table::query::create_multi_vector_plan;
use crate::table::{AlterColumnsResult, FieldMetadataUpdate, UpdateFieldMetadataResult};
use crate::table::{AnyQuery, Filter, Predicate, PreprocessingOutput, TableStatistics};
use crate::utils::background_cache::BackgroundCache;
use crate::utils::{
@@ -1968,6 +1968,35 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
Ok(result)
}
async fn update_field_metadata(
&self,
updates: &[FieldMetadataUpdate],
) -> Result<UpdateFieldMetadataResult> {
self.check_mutable().await?;
let body = serde_json::json!({ "updates": updates });
let request = self
.client
.post(&format!(
"/v1/table/{}/update_field_metadata/",
self.identifier
))
.json(&body);
let (request_id, response) = self.send(request, true).await?;
let response = self.check_table_response(&request_id, response).await?;
let body = response.text().await.err_to_http(request_id.clone())?;
let result: UpdateFieldMetadataResult =
serde_json::from_str(&body).map_err(|e| Error::Http {
source: format!("Failed to parse update_field_metadata response: {}", e).into(),
request_id,
status_code: None,
})?;
self.invalidate_schema_cache();
self.track_write_version(result.version);
Ok(result)
}
async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
self.check_mutable().await?;
let body = serde_json::json!({ "columns": columns });
@@ -2261,6 +2290,7 @@ mod tests {
use crate::remote::client::{ClientConfig, RetryConfig};
use crate::table::AddDataMode;
use crate::table::FieldMetadataUpdate;
use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type};
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, record_batch};
@@ -6460,4 +6490,25 @@ mod tests {
assert!(!headers.contains_key("x-lancedb-min-version"));
assert!(!headers.contains_key("x-lancedb-min-timestamp"));
}
#[tokio::test]
async fn test_update_field_metadata() {
let table = Table::new_with_handler("my_table", |request| {
assert_eq!(request.method(), "POST");
assert_eq!(
request.url().path(),
"/v1/table/my_table/update_field_metadata/"
);
http::Response::builder()
.status(200)
.body(r#"{"version": 7, "fields": {"category": {"unit": "label"}}}"#)
.unwrap()
});
let result = table
.update_field_metadata(&[FieldMetadataUpdate::new("category").set("unit", "label")])
.await
.unwrap();
assert_eq!(result.version, 7);
}
}

View File

@@ -91,7 +91,10 @@ pub use lance::dataset::scanner::DatasetRecordBatchStream;
use lance::dataset::statistics::DatasetStatisticsExt;
pub use lance_index::optimize::OptimizeOptions;
pub use optimize::{CompactionOptions, OptimizeAction, OptimizeStats};
pub use schema_evolution::{AddColumnsResult, AlterColumnsResult, DropColumnsResult};
pub use schema_evolution::{
AddColumnsResult, AlterColumnsResult, DropColumnsResult, FieldMetadataUpdate,
UpdateFieldMetadataResult,
};
use serde_with::skip_serializing_none;
pub use update::{UpdateBuilder, UpdateResult};
@@ -660,6 +663,19 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
message: "create_insert_exec not implemented".to_string(),
})
}
/// Update per-field metadata. Merges into existing metadata by default;
/// [`FieldMetadataUpdate::remove`] deletes a key and
/// [`FieldMetadataUpdate::replace`] swaps the field's whole map.
///
/// The default returns `NotSupported`; Lance-backed and remote tables override it.
async fn update_field_metadata(
&self,
_updates: &[FieldMetadataUpdate],
) -> Result<UpdateFieldMetadataResult> {
Err(Error::NotSupported {
message: "update_field_metadata is not supported on this table type".into(),
})
}
}
/// A Table is a collection of strong typed Rows.
@@ -1340,6 +1356,14 @@ impl Table {
self.inner.alter_columns(alterations).await
}
/// Update per-field metadata (merges by default).
pub async fn update_field_metadata(
&self,
updates: &[FieldMetadataUpdate],
) -> Result<UpdateFieldMetadataResult> {
self.inner.update_field_metadata(updates).await
}
/// Remove columns from the table.
pub async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
self.inner.drop_columns(columns).await
@@ -2886,6 +2910,13 @@ impl BaseTable for NativeTable {
schema_evolution::execute_alter_columns(self, alterations).await
}
async fn update_field_metadata(
&self,
updates: &[FieldMetadataUpdate],
) -> Result<UpdateFieldMetadataResult> {
schema_evolution::execute_update_field_metadata(self, updates).await
}
async fn drop_columns(&self, columns: &[&str]) -> Result<DropColumnsResult> {
schema_evolution::execute_drop_columns(self, columns).await
}

View File

@@ -10,6 +10,7 @@
use lance::dataset::{ColumnAlteration, NewColumnTransform};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use super::NativeTable;
use crate::Result;
@@ -44,6 +45,52 @@ pub struct DropColumnsResult {
pub version: u64,
}
/// A single field's metadata update, addressed by dot-path.
///
/// Merges into the field's existing metadata by default. Use [`Self::remove`] to
/// delete a key, or [`Self::replace`] to swap the field's entire metadata map.
#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize)]
pub struct FieldMetadataUpdate {
/// Dot-separated path to the field (e.g. `"embedding"` or `"address.zip"`).
pub path: String,
/// Keys to set (`Some`) or delete (`None`).
pub metadata: HashMap<String, Option<String>>,
/// If `true`, replace the field's entire metadata map instead of merging.
pub replace: bool,
}
impl FieldMetadataUpdate {
pub fn new(path: impl Into<String>) -> Self {
Self {
path: path.into(),
metadata: HashMap::new(),
replace: false,
}
}
pub fn set(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.metadata.insert(key.into(), Some(value.into()));
self
}
pub fn remove(mut self, key: impl Into<String>) -> Self {
self.metadata.insert(key.into(), None);
self
}
pub fn replace(mut self) -> Self {
self.replace = true;
self
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct UpdateFieldMetadataResult {
/// The commit version associated with the operation.
#[serde(default)]
pub version: u64,
}
/// Internal implementation of the add columns logic.
///
/// Adds new columns to the table using the provided transforms.
@@ -90,6 +137,32 @@ pub(crate) async fn execute_drop_columns(
Ok(DropColumnsResult { version })
}
/// Internal implementation of the update field metadata logic.
///
/// Merges or replaces per-field metadata, addressing fields by dot-path.
pub(crate) async fn execute_update_field_metadata(
table: &NativeTable,
updates: &[FieldMetadataUpdate],
) -> Result<UpdateFieldMetadataResult> {
table.dataset.ensure_mutable()?;
let mut dataset = (*table.dataset.get().await?).clone();
let mut builder = dataset.update_field_metadata();
for update in updates {
let entries = update.metadata.iter().map(|(k, v)| (k.clone(), v.clone()));
builder = if update.replace {
builder.replace(&update.path, entries)?
} else {
builder.update(&update.path, entries)?
};
}
builder.await?;
let version = dataset.version().version;
table.dataset.update(dataset);
Ok(UpdateFieldMetadataResult { version })
}
#[cfg(test)]
mod tests {
use arrow_array::{Int32Array, StringArray, record_batch};
@@ -97,6 +170,7 @@ mod tests {
use futures::TryStreamExt;
use lance::dataset::ColumnAlteration;
use super::FieldMetadataUpdate;
use crate::connect;
use crate::query::{ExecutableQuery, QueryBase, Select};
use crate::table::NewColumnTransform;
@@ -610,4 +684,46 @@ mod tests {
let v4 = table.version().await.unwrap();
assert_eq!(drop_result.version, v4);
}
#[tokio::test]
async fn test_update_field_metadata() {
let conn = connect("memory://").execute().await.unwrap();
let batch = record_batch!(
("id", Int32, [1, 2, 3]),
("category", Utf8, ["A", "B", "C"])
)
.unwrap();
let table = conn
.create_table("test_update_field_metadata", batch)
.execute()
.await
.unwrap();
// Set metadata on a field.
table
.update_field_metadata(&[FieldMetadataUpdate::new("category")
.set("unit", "label")
.set("pii", "false")])
.await
.unwrap();
let schema = table.schema().await.unwrap();
let field = schema.field_with_name("category").unwrap();
assert_eq!(
field.metadata().get("unit").map(String::as_str),
Some("label")
);
// Merge: add a key, delete one, keep the rest.
table
.update_field_metadata(&[FieldMetadataUpdate::new("category")
.set("source", "import")
.remove("pii")])
.await
.unwrap();
let schema = table.schema().await.unwrap();
let md = schema.field_with_name("category").unwrap().metadata();
assert_eq!(md.get("unit").map(String::as_str), Some("label")); // preserved
assert_eq!(md.get("source").map(String::as_str), Some("import")); // added
assert!(!md.contains_key("pii")); // deleted
}
}