mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-03 12:20:40 +00:00
## Summary Adds `Table::set_unenforced_primary_key` — records a single column as the table's unenforced primary key in Lance schema field metadata. "Unenforced" means LanceDB does not check uniqueness on write; the key is metadata that `merge_insert` consumes. - Single-column only; the column must exist and have a supported dtype (Int32, Int64, Utf8, LargeUtf8, Binary, LargeBinary, FixedSizeBinary). The API accepts an iterable for binding ergonomics but requires exactly one column — compound keys are rejected. - The primary key is immutable: calling this on a table that already has an unenforced primary key is rejected. Concurrent writers racing to set the key fail at commit time rather than silently overriding it. - `RemoteTable` returns `NotSupported`. - Bindings: Python (`AsyncTable`, `LanceTable`, `RemoteTable`) and TypeScript (`Table.setUnenforcedPrimaryKey`). ## Context Split out from #3354 per review feedback, so the unenforced primary key and the `merge_insert` sharding spec land as separate reviewable PRs. No Lance dependency bump — `main` is already on v7.0.0-beta.10, which includes the field-metadata round-trip fix the API relies on. Enforcing primary-key immutability at the Lance commit layer (so the cross-column concurrent race is also rejected) is a companion Lance change: lance-format/lance#6810.
80 lines
2.5 KiB
Python
80 lines
2.5 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
|
|
"""Tests for Table.set_unenforced_primary_key."""
|
|
|
|
from datetime import timedelta
|
|
|
|
import lancedb
|
|
import pyarrow as pa
|
|
import pytest
|
|
|
|
|
|
def _empty_table(path, schema):
|
|
db = lancedb.connect(path, read_consistency_interval=timedelta(seconds=0))
|
|
return db.create_table("t", schema=schema)
|
|
|
|
|
|
def test_set_unenforced_primary_key_accepts_string_or_one_element_list(tmp_path):
|
|
schema = pa.schema([pa.field("id", pa.int64(), nullable=False)])
|
|
|
|
# Bare string.
|
|
table = _empty_table(tmp_path / "s", schema)
|
|
table.set_unenforced_primary_key("id")
|
|
|
|
# One-element list.
|
|
table = _empty_table(tmp_path / "l", schema)
|
|
table.set_unenforced_primary_key(["id"])
|
|
|
|
|
|
def test_set_unenforced_primary_key_rejects_compound_and_empty(tmp_path):
|
|
table = _empty_table(
|
|
tmp_path,
|
|
pa.schema(
|
|
[
|
|
pa.field("a", pa.utf8(), nullable=False),
|
|
pa.field("b", pa.int64(), nullable=False),
|
|
]
|
|
),
|
|
)
|
|
# Compound keys are not supported.
|
|
with pytest.raises(Exception, match="compound"):
|
|
table.set_unenforced_primary_key(["a", "b"])
|
|
# Empty input.
|
|
with pytest.raises(Exception, match="required"):
|
|
table.set_unenforced_primary_key([])
|
|
|
|
|
|
def test_set_unenforced_primary_key_is_immutable(tmp_path):
|
|
table = _empty_table(
|
|
tmp_path,
|
|
pa.schema(
|
|
[
|
|
pa.field("a", pa.utf8(), nullable=False),
|
|
pa.field("b", pa.int64(), nullable=False),
|
|
]
|
|
),
|
|
)
|
|
table.set_unenforced_primary_key("a")
|
|
# The primary key cannot be changed or re-set once installed.
|
|
with pytest.raises(Exception, match="already set"):
|
|
table.set_unenforced_primary_key("b")
|
|
with pytest.raises(Exception, match="already set"):
|
|
table.set_unenforced_primary_key("a")
|
|
|
|
|
|
def test_set_unenforced_primary_key_validates(tmp_path):
|
|
table = _empty_table(
|
|
tmp_path / "t", pa.schema([pa.field("id", pa.utf8(), nullable=False)])
|
|
)
|
|
# Unknown column.
|
|
with pytest.raises(Exception, match="not found"):
|
|
table.set_unenforced_primary_key("nonexistent")
|
|
|
|
# Unsupported dtype (Float32 not in the supported set).
|
|
bad = _empty_table(
|
|
tmp_path / "bad", pa.schema([pa.field("id", pa.float32(), nullable=False)])
|
|
)
|
|
with pytest.raises(Exception, match="not supported"):
|
|
bad.set_unenforced_primary_key("id")
|