fix: remove primary key constraint from MemWAL bucket sharding

Lance v7.0.0-rc.1 intentionally removed the requirement for bucket_sharding to match (or even require) the unenforced primary key column. Update LanceDB to match: drop the PK-related doc comments and the test assertions that expected rejection when no PK is set or when the bucket column differs from the PK. The Rust changes are taken from #3435; this commit additionally applies the equivalent updates to the Python and TypeScript bindings. See https://github.com/lance-format/lance/issues/6917 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 23:10:40 +00:00 · 2026-05-22 14:18:58 -07:00
parent 02b112e931
commit a69dd0f62b
5 changed files with 10 additions and 64 deletions
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -3875,15 +3875,11 @@ class AsyncTable:
        strategies:

        - ``LsmWriteSpec.bucket(column, num_buckets)`` — hash-bucket writes by
-          the single-column unenforced primary key.
+          a scalar column.
        - ``LsmWriteSpec.identity(column)`` — shard by the raw value of a
          scalar column.
        - ``LsmWriteSpec.unsharded()`` — route every write to a single shard.

-        All variants require the table to have an unenforced primary key set
-        via [`set_unenforced_primary_key`]; bucket sharding additionally
-        requires it to be the single column being bucketed.
-
        Parameters
        ----------
        spec : LsmWriteSpec
@@ -3892,7 +3888,6 @@ class AsyncTable:
        Examples
        --------
        >>> from lancedb._lancedb import LsmWriteSpec
-        >>> # table.set_unenforced_primary_key("id")
        >>> # table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 16))
        """
        await self._inner.set_lsm_write_spec(spec)
--- a/python/python/tests/test_lsm_write_spec.py
+++ b/python/python/tests/test_lsm_write_spec.py
@@ -40,16 +40,6 @@ def _make_table(tmp_path):
 def test_set_lsm_write_spec_validates(tmp_path):
    _db, table = _make_table(tmp_path)

-    # No PK set yet.
-    with pytest.raises(Exception, match="primary key"):
-        table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 4))
-
-    table.set_unenforced_primary_key("id")
-
-    # Column mismatch.
-    with pytest.raises(Exception, match="match"):
-        table.set_lsm_write_spec(LsmWriteSpec.bucket("v", 4))
-
    # Out-of-range num_buckets.
    with pytest.raises(Exception, match="num_buckets"):
        table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 0))
@@ -70,7 +60,6 @@ def test_unset_lsm_write_spec(tmp_path):
        table.unset_lsm_write_spec()

    # Install a spec, then remove it; afterwards a fresh spec can be set.
-    table.set_unenforced_primary_key("id")
    table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 4))
    table.unset_lsm_write_spec()
    # A second unset errors — there is no spec left to remove.
@@ -81,9 +70,7 @@ def test_unset_lsm_write_spec(tmp_path):

 def test_set_unsharded_spec(tmp_path):
    _db, table = _make_table(tmp_path)
-    # Lance MemWAL still requires a primary key on the dataset; Unsharded
-    # just skips per-row hashing.
-    table.set_unenforced_primary_key("id")
+    # Unsharded routes every write to a single shard, skipping per-row hashing.
    table.set_lsm_write_spec(LsmWriteSpec.unsharded())
    table.unset_lsm_write_spec()

@@ -120,7 +107,6 @@ async def test_async_set_unset_lsm_write_spec(tmp_path):
        pa.RecordBatchReader.from_batches(SCHEMA, [_batch(["seed"], [0])]),
    )

-    await table.set_unenforced_primary_key("id")
    await table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 4))
    await table.unset_lsm_write_spec()
    # A second unset errors.
@@ -130,9 +116,7 @@ async def test_async_set_unset_lsm_write_spec(tmp_path):

 def test_set_identity_spec(tmp_path):
    _db, table = _make_table(tmp_path)
-    # Identity sharding still requires an unenforced primary key on the
-    # table; it shards by the raw value of the given column.
-    table.set_unenforced_primary_key("id")
+    # Identity sharding shards by the raw value of the given column.
    table.set_lsm_write_spec(LsmWriteSpec.identity("v"))
    table.unset_lsm_write_spec()

--- a/python/src/table.rs
+++ b/python/src/table.rs
@@ -185,7 +185,7 @@ pub struct LsmWriteSpec {

 #[pymethods]
 impl LsmWriteSpec {
-    /// Hash-bucket sharding by the unenforced primary key column.
+    /// Hash-bucket sharding by a scalar column.
    #[staticmethod]
    pub fn bucket(column: String, num_buckets: u32) -> Self {
        Self {