feat: computed columns as a param on add_columns

Per the interface design: computed columns are parameters on the
existing add_columns operation, not a separate method.

- BaseTable::add_computed_columns((name, sql_type) pairs + a f(args)
  expression) -- default NotSupported; RemoteTable posts 'computed'
  entries to the existing /v1/table/{id}/add_columns route.
- python add_columns gains computed= on LanceTable, RemoteTable, and
  AsyncTable: tbl.add_columns(computed={'doubled': ('FLOAT',
  'double_it(val)')}); grouped by expression so struct-returning
  functions' columns land adjacently.
This commit is contained in:
Wyatt Alt
2026-06-12 10:26:55 -07:00
committed by Jack Ye
parent ff3c7111b9
commit 04948fc4f6
5 changed files with 120 additions and 10 deletions

View File

@@ -884,8 +884,18 @@ class RemoteTable(Table):
def count_rows(self, filter: Optional[str] = None) -> int:
return LOOP.run(self._table.count_rows(filter))
def add_columns(self, transforms: Dict[str, str]) -> AddColumnsResult:
return LOOP.run(self._table.add_columns(transforms))
def add_columns(
self,
transforms: Optional[Dict[str, str]] = None,
*,
computed: Optional[Dict[str, tuple]] = None,
) -> Optional[AddColumnsResult]:
result = None
if transforms is not None:
result = LOOP.run(self._table.add_columns(transforms))
if computed:
LOOP.run(self._table.add_columns(computed=computed))
return result
def refresh_column(
self,

View File

@@ -702,6 +702,22 @@ def _normalize_progress(progress):
return progress, False
def _computed_groups(computed):
"""Group {column: (sql_type, expression)} by expression, preserving
declaration order (struct-returning functions need their columns
adjacent so schema order matches field order)."""
groups = []
for name, (sql_type, expression) in computed.items():
for expr, cols in groups:
if expr == expression:
cols.append((name, sql_type))
break
else:
groups.append((expression, [(name, sql_type)]))
return groups
class Table(ABC):
"""
A Table is a collection of Records in a LanceDB Database.
@@ -3710,9 +3726,20 @@ class LanceTable(Table):
return LOOP.run(self._table.index_stats(index_name))
def add_columns(
self, transforms: Dict[str, str] | pa.field | List[pa.field] | pa.Schema
) -> AddColumnsResult:
return LOOP.run(self._table.add_columns(transforms))
self,
transforms: Dict[str, str] | pa.field | List[pa.field] | pa.Schema | None = None,
*,
computed: Optional[Dict[str, tuple]] = None,
) -> Optional[AddColumnsResult]:
result = None
if transforms is not None:
result = LOOP.run(self._table.add_columns(transforms))
if computed:
# computed: {column: (sql_type, expression)} -- declares the
# binding only; the server fills the values (server-backed).
result_unused = LOOP.run(self._table.add_columns(computed=computed))
del result_unused
return result
def refresh_column(
self,
@@ -5437,8 +5464,11 @@ class AsyncTable:
)
async def add_columns(
self, transforms: dict[str, str] | pa.field | List[pa.field] | pa.Schema
) -> AddColumnsResult:
self,
transforms: dict[str, str] | pa.field | List[pa.field] | pa.Schema | None = None,
*,
computed: Optional[Dict[str, tuple]] = None,
) -> Optional[AddColumnsResult]:
"""
Add new columns with defined values.
@@ -5457,6 +5487,7 @@ class AsyncTable:
version: the new version number of the table after adding columns.
"""
result = None
if isinstance(transforms, pa.Field):
transforms = [transforms]
if isinstance(transforms, list) and all(
@@ -5464,9 +5495,15 @@ class AsyncTable:
):
transforms = pa.schema(transforms)
if isinstance(transforms, pa.Schema):
return await self._inner.add_columns_with_schema(transforms)
else:
return await self._inner.add_columns(list(transforms.items()))
result = await self._inner.add_columns_with_schema(transforms)
elif transforms is not None:
result = await self._inner.add_columns(list(transforms.items()))
if computed:
# computed: {column: (sql_type, expression)} -- declares the
# binding only; the server fills the values (server-backed).
for expression, cols in _computed_groups(computed):
await self._inner.add_computed_columns(cols, expression)
return result
async def alter_columns(
self, *alterations: Iterable[dict[str, Any]]

View File

@@ -1060,6 +1060,20 @@ impl Table {
})
}
pub fn add_computed_columns(
self_: PyRef<'_, Self>,
columns: Vec<(String, String)>,
expression: String,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
inner
.add_computed_columns(&columns, &expression)
.await
.infer_error()
})
}
#[pyo3(signature = (columns, where_clause=None, num_workers=None, max_workers=None))]
pub fn refresh_column(
self_: PyRef<'_, Self>,

View File

@@ -2309,6 +2309,29 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
message: "optimize is not supported on LanceDB cloud.".into(),
})
}
async fn add_computed_columns(
&self,
columns: &[(String, String)],
expression: &str,
) -> Result<()> {
let new_columns: Vec<serde_json::Value> = columns
.iter()
.map(|(name, data_type)| {
serde_json::json!({
"name": name,
"computed": { "data_type": data_type, "expression": expression },
})
})
.collect();
let request = self
.client
.post(&format!("/v1/table/{}/add_columns/", self.identifier))
.json(&serde_json::json!({ "new_columns": new_columns }));
let (request_id, response) = self.send(request, true).await?;
self.check_table_response(&request_id, response).await?;
Ok(())
}
async fn refresh_column(
&self,
columns: &[String],

View File

@@ -620,6 +620,21 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
transforms: NewColumnTransform,
read_columns: Option<Vec<String>>,
) -> Result<AddColumnsResult>;
/// Declare computed columns bound to a registered function: each
/// `(name, sql_type)` is added all-null with the expression stored
/// as its binding; no compute happens here (the server's lazy
/// detector or refresh_column fills them). Several columns map a
/// struct-returning function's fields positionally. Server-backed
/// feature; the default returns NotSupported.
async fn add_computed_columns(
&self,
_columns: &[(String, String)],
_expression: &str,
) -> Result<()> {
Err(Error::NotSupported {
message: "computed columns are not supported by this table".into(),
})
}
/// Trigger recompute of computed columns. The expression is
/// resolved server-side from each column's stored binding; columns
/// bound to the same struct-returning function refresh together.
@@ -1477,6 +1492,17 @@ impl Table {
self.inner.add_columns(transforms, read_columns).await
}
/// Declare computed columns bound to a registered function
/// (`(name, sql_type)` pairs + a `f(args)` expression). No compute
/// happens here. Server-backed feature.
pub async fn add_computed_columns(
&self,
columns: &[(String, String)],
expression: &str,
) -> Result<()> {
self.inner.add_computed_columns(columns, expression).await
}
/// Trigger recompute of computed columns (REFRESH COLUMN). The
/// expression comes from each column's stored binding; columns
/// bound to the same struct-returning function refresh together.