mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-03 20:30:42 +00:00
feat: support setting unenforced primary key (#3394)
## Summary Adds `Table::set_unenforced_primary_key` — records a single column as the table's unenforced primary key in Lance schema field metadata. "Unenforced" means LanceDB does not check uniqueness on write; the key is metadata that `merge_insert` consumes. - Single-column only; the column must exist and have a supported dtype (Int32, Int64, Utf8, LargeUtf8, Binary, LargeBinary, FixedSizeBinary). The API accepts an iterable for binding ergonomics but requires exactly one column — compound keys are rejected. - The primary key is immutable: calling this on a table that already has an unenforced primary key is rejected. Concurrent writers racing to set the key fail at commit time rather than silently overriding it. - `RemoteTable` returns `NotSupported`. - Bindings: Python (`AsyncTable`, `LanceTable`, `RemoteTable`) and TypeScript (`Table.setUnenforcedPrimaryKey`). ## Context Split out from #3354 per review feedback, so the unenforced primary key and the `merge_insert` sharding spec land as separate reviewable PRs. No Lance dependency bump — `main` is already on v7.0.0-beta.10, which includes the field-metadata round-trip fix the API relies on. Enforcing primary-key immutability at the Lance commit layer (so the cross-column concurrent race is also rejected) is a companion Lance change: lance-format/lance#6810.
This commit is contained in:
79
python/python/tests/test_primary_key.py
Normal file
79
python/python/tests/test_primary_key.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
"""Tests for Table.set_unenforced_primary_key."""
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
import lancedb
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
|
||||
|
||||
def _empty_table(path, schema):
|
||||
db = lancedb.connect(path, read_consistency_interval=timedelta(seconds=0))
|
||||
return db.create_table("t", schema=schema)
|
||||
|
||||
|
||||
def test_set_unenforced_primary_key_accepts_string_or_one_element_list(tmp_path):
|
||||
schema = pa.schema([pa.field("id", pa.int64(), nullable=False)])
|
||||
|
||||
# Bare string.
|
||||
table = _empty_table(tmp_path / "s", schema)
|
||||
table.set_unenforced_primary_key("id")
|
||||
|
||||
# One-element list.
|
||||
table = _empty_table(tmp_path / "l", schema)
|
||||
table.set_unenforced_primary_key(["id"])
|
||||
|
||||
|
||||
def test_set_unenforced_primary_key_rejects_compound_and_empty(tmp_path):
|
||||
table = _empty_table(
|
||||
tmp_path,
|
||||
pa.schema(
|
||||
[
|
||||
pa.field("a", pa.utf8(), nullable=False),
|
||||
pa.field("b", pa.int64(), nullable=False),
|
||||
]
|
||||
),
|
||||
)
|
||||
# Compound keys are not supported.
|
||||
with pytest.raises(Exception, match="compound"):
|
||||
table.set_unenforced_primary_key(["a", "b"])
|
||||
# Empty input.
|
||||
with pytest.raises(Exception, match="required"):
|
||||
table.set_unenforced_primary_key([])
|
||||
|
||||
|
||||
def test_set_unenforced_primary_key_is_immutable(tmp_path):
|
||||
table = _empty_table(
|
||||
tmp_path,
|
||||
pa.schema(
|
||||
[
|
||||
pa.field("a", pa.utf8(), nullable=False),
|
||||
pa.field("b", pa.int64(), nullable=False),
|
||||
]
|
||||
),
|
||||
)
|
||||
table.set_unenforced_primary_key("a")
|
||||
# The primary key cannot be changed or re-set once installed.
|
||||
with pytest.raises(Exception, match="already set"):
|
||||
table.set_unenforced_primary_key("b")
|
||||
with pytest.raises(Exception, match="already set"):
|
||||
table.set_unenforced_primary_key("a")
|
||||
|
||||
|
||||
def test_set_unenforced_primary_key_validates(tmp_path):
|
||||
table = _empty_table(
|
||||
tmp_path / "t", pa.schema([pa.field("id", pa.utf8(), nullable=False)])
|
||||
)
|
||||
# Unknown column.
|
||||
with pytest.raises(Exception, match="not found"):
|
||||
table.set_unenforced_primary_key("nonexistent")
|
||||
|
||||
# Unsupported dtype (Float32 not in the supported set).
|
||||
bad = _empty_table(
|
||||
tmp_path / "bad", pa.schema([pa.field("id", pa.float32(), nullable=False)])
|
||||
)
|
||||
with pytest.raises(Exception, match="not supported"):
|
||||
bad.set_unenforced_primary_key("id")
|
||||
Reference in New Issue
Block a user