diff --git a/docs/src/guides/tables/merge_insert.md b/docs/src/guides/tables/merge_insert.md index f6af2fcb..4391f5a5 100644 --- a/docs/src/guides/tables/merge_insert.md +++ b/docs/src/guides/tables/merge_insert.md @@ -71,6 +71,45 @@ with merge insert, enable both `when_matched_update_all()` and If a column is nullable, it can be omitted from input data and it will be considered `null`. Columns can also be provided in any order. +### Conditional Updates + +You can add a `where` clause to `when_matched_update_all()` to only update rows +that meet certain conditions. When using the `where` parameter, you must prefix +column names with either `source.` (for the new data) or `target.` (for the +existing data) to specify which table you're referencing. + +=== "Python" + + ```python + # Only update rows where the target's status is 'active' + table.merge_insert("id") + .when_matched_update_all(where="target.status = 'active'") + .when_not_matched_insert_all() + .execute(new_data) + + # Only update if the new price is higher than the existing price + table.merge_insert("product_id") + .when_matched_update_all(where="source.price > target.price") + .when_not_matched_insert_all() + .execute(new_data) + ``` + +=== "Typescript" + + ```typescript + // Only update rows where the target's status is 'active' + await table.mergeInsert("id") + .whenMatchedUpdateAll({ where: "target.status = 'active'" }) + .whenNotMatchedInsertAll() + .execute(newData); + + // Only update if the new price is higher than the existing price + await table.mergeInsert("product_id") + .whenMatchedUpdateAll({ where: "source.price > target.price" }) + .whenNotMatchedInsertAll() + .execute(newData); + ``` + ## Insert-if-not-exists To avoid inserting duplicate rows, you can use the insert-if-not-exists command. diff --git a/python/python/lancedb/merge.py b/python/python/lancedb/merge.py index 3cf56a9d..c65ddb43 100644 --- a/python/python/lancedb/merge.py +++ b/python/python/lancedb/merge.py @@ -45,6 +45,16 @@ class LanceMergeInsertBuilder(object): If there are multiple matches then the behavior is undefined. Currently this causes multiple copies of the row to be created but that behavior is subject to change. + + Parameters + ---------- + where : Optional[str], default None + A SQL filter expression to apply to matched rows. The filter must + specify whether you are referencing the source table (new data) or + the target table (existing data) by prefixing column names with + "source." or "target." respectively. + + Example: "target.status = 'active'" or "source.price > target.price" """ self._when_matched_update_all = True self._when_matched_update_all_condition = where