Files
lancedb/python/python/tests/test_primary_key.py
Heng Ge 6a431ff0a0 feat: support setting unenforced primary key (#3394)
## Summary

Adds `Table::set_unenforced_primary_key` — records a single column as
the
table's unenforced primary key in Lance schema field metadata.
"Unenforced"
means LanceDB does not check uniqueness on write; the key is metadata
that
`merge_insert` consumes.

- Single-column only; the column must exist and have a supported dtype
(Int32, Int64, Utf8, LargeUtf8, Binary, LargeBinary, FixedSizeBinary).
The
API accepts an iterable for binding ergonomics but requires exactly one
  column — compound keys are rejected.
- The primary key is immutable: calling this on a table that already has
an
unenforced primary key is rejected. Concurrent writers racing to set the
key
  fail at commit time rather than silently overriding it.
- `RemoteTable` returns `NotSupported`.
- Bindings: Python (`AsyncTable`, `LanceTable`, `RemoteTable`) and
TypeScript
  (`Table.setUnenforcedPrimaryKey`).

## Context

Split out from #3354 per review feedback, so the unenforced primary key
and the
`merge_insert` sharding spec land as separate reviewable PRs.

No Lance dependency bump — `main` is already on v7.0.0-beta.10, which
includes
the field-metadata round-trip fix the API relies on. Enforcing
primary-key
immutability at the Lance commit layer (so the cross-column concurrent
race is
also rejected) is a companion Lance change: lance-format/lance#6810.
2026-05-16 23:12:55 -07:00

80 lines
2.5 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
"""Tests for Table.set_unenforced_primary_key."""
from datetime import timedelta
import lancedb
import pyarrow as pa
import pytest
def _empty_table(path, schema):
db = lancedb.connect(path, read_consistency_interval=timedelta(seconds=0))
return db.create_table("t", schema=schema)
def test_set_unenforced_primary_key_accepts_string_or_one_element_list(tmp_path):
schema = pa.schema([pa.field("id", pa.int64(), nullable=False)])
# Bare string.
table = _empty_table(tmp_path / "s", schema)
table.set_unenforced_primary_key("id")
# One-element list.
table = _empty_table(tmp_path / "l", schema)
table.set_unenforced_primary_key(["id"])
def test_set_unenforced_primary_key_rejects_compound_and_empty(tmp_path):
table = _empty_table(
tmp_path,
pa.schema(
[
pa.field("a", pa.utf8(), nullable=False),
pa.field("b", pa.int64(), nullable=False),
]
),
)
# Compound keys are not supported.
with pytest.raises(Exception, match="compound"):
table.set_unenforced_primary_key(["a", "b"])
# Empty input.
with pytest.raises(Exception, match="required"):
table.set_unenforced_primary_key([])
def test_set_unenforced_primary_key_is_immutable(tmp_path):
table = _empty_table(
tmp_path,
pa.schema(
[
pa.field("a", pa.utf8(), nullable=False),
pa.field("b", pa.int64(), nullable=False),
]
),
)
table.set_unenforced_primary_key("a")
# The primary key cannot be changed or re-set once installed.
with pytest.raises(Exception, match="already set"):
table.set_unenforced_primary_key("b")
with pytest.raises(Exception, match="already set"):
table.set_unenforced_primary_key("a")
def test_set_unenforced_primary_key_validates(tmp_path):
table = _empty_table(
tmp_path / "t", pa.schema([pa.field("id", pa.utf8(), nullable=False)])
)
# Unknown column.
with pytest.raises(Exception, match="not found"):
table.set_unenforced_primary_key("nonexistent")
# Unsupported dtype (Float32 not in the supported set).
bad = _empty_table(
tmp_path / "bad", pa.schema([pa.field("id", pa.float32(), nullable=False)])
)
with pytest.raises(Exception, match="not supported"):
bad.set_unenforced_primary_key("id")