feat: add table branch support to remote tables and Python/TS bindings (#3540)

### Description
Adding branch support for RemoteTable by threading a branch selector
onto every operation the data plane accepts it on. Exposes the
currentBranch to nodejs and python through the bindings.

Matching the server handlers, the branch rides as:
- a `?branch=` query parameter for Arrow-body and query-only ops
(insert, merge_insert, multipart_*, version/list, drop_index)
- a `branch` field in the JSON body for everything else (count_rows,
query, update, delete, create_index, column ops, index list/stats,
stats, restore, describe, tags create/update)

A main-branch handle (`branch == None`) produces byte-identical requests
to before: no `branch` field and no `?branch=`

- Handle-per-branch: `create_branch` / `checkout_branch` return a new
handle with fresh caches and reset version/freshness state, mirroring
`NativeTable`.
- `create_branch` maps 409 to already-exists, 400 to invalid, and 404 to
not-found with source context, and sends without retry so the 409 stays
observable.
- `Ref` translation covers version, version-number (relative to the
handle's branch), and tag (resolved via the tags endpoint); `"main"` and
empty normalize to the main branch.
- Python branch handles persist their branch (and pinned version) across
pickle/fork, so a forked or pickled handle reopens on its branch rather
than silently reverting to main.

### Tests

- Rust mock tests per op category (query-param and body mechanisms,
branch CRUD, error paths, backward-compat).
- Python sync branch CRUD, `open_table(branch=)`, and a pickle
round-trip regression test.
This commit is contained in:
Brendan Clement
2026-06-15 18:07:40 -04:00
committed by GitHub
parent 393ec981bf
commit f76b075d13
12 changed files with 1503 additions and 175 deletions

View File

@@ -154,50 +154,116 @@ async def test_async_checkout():
assert await table.count_rows() == 300
def _branch_open_handler(request):
if "/branches/list" in request.path:
body = json.dumps(
{
"branches": {
"exp": {
"parentBranch": None,
"parentVersion": 1,
"createAt": 1,
"manifestSize": 1,
}
}
}
).encode()
else:
# describe (table open + version/branch validation)
body = json.dumps({"version": 2, "schema": {"fields": []}}).encode()
request.send_response(200)
request.send_header("Content-Type", "application/json")
request.end_headers()
request.wfile.write(body)
def test_remote_open_table_branch_and_version():
with mock_lancedb_connection(_branch_open_handler) as db:
# version-only (and "main" + version) time-travels the main chain
assert db.open_table("test", version=2) is not None
assert db.open_table("test", branch="main", version=2).current_branch() is None
# a non-main branch opens a handle scoped to that branch, with or
# without a version
assert db.open_table("test", branch="exp").current_branch() == "exp"
assert db.open_table("test", branch="exp", version=2).current_branch() == "exp"
def test_remote_table_branches_sync():
# Branch CRUD + current_branch on the sync RemoteTable. The handle returned
# by create/checkout must stay a RemoteTable scoped to the branch.
from lancedb.remote.table import RemoteTable
def handler(request):
# describe (table open + version validation) always succeeds
if "/branches/list" in request.path:
body = json.dumps(
{
"branches": {
"exp": {
"parentBranch": None,
"parentVersion": 1,
"createAt": 1,
"manifestSize": 1,
}
}
}
).encode()
elif "/branches/create" in request.path or "/branches/delete" in request.path:
body = b"{}"
else:
# describe (table open + checkout validation)
body = json.dumps({"version": 1, "schema": {"fields": []}}).encode()
request.send_response(200)
request.send_header("Content-Type", "application/json")
request.end_headers()
request.wfile.write(
json.dumps({"version": 2, "schema": {"fields": []}}).encode()
)
request.wfile.write(body)
with mock_lancedb_connection(handler) as db:
# version-only (and "main" + version) is allowed: remote supports
# version time-travel even though it has no branches
assert db.open_table("test", version=2) is not None
assert db.open_table("test", branch="main", version=2) is not None
table = db.open_table("test")
assert isinstance(table, RemoteTable)
assert table.current_branch() is None
# a non-main branch is rejected, with or without a version
with pytest.raises(NotImplementedError, match="branching"):
db.open_table("test", branch="exp")
with pytest.raises(NotImplementedError, match="branching"):
db.open_table("test", branch="exp", version=2)
branch = table.branches.create("exp")
assert isinstance(branch, RemoteTable)
assert branch.current_branch() == "exp"
# list + checkout round trip; checkout also yields a branch-scoped handle
assert "exp" in table.branches.list()
checked = table.branches.checkout("exp")
assert isinstance(checked, RemoteTable)
assert checked.current_branch() == "exp"
table.branches.delete("exp")
@pytest.mark.asyncio
async def test_async_remote_open_table_branch_and_version():
def handler(request):
request.send_response(200)
request.send_header("Content-Type", "application/json")
request.end_headers()
request.wfile.write(
json.dumps({"version": 2, "schema": {"fields": []}}).encode()
)
async with mock_lancedb_connection_async(handler) as db:
# version-only (and "main" + version) is allowed: "main" is the default
# branch, so it must not hit the unsupported remote branch path
async with mock_lancedb_connection_async(_branch_open_handler) as db:
# version-only (and "main" + version) time-travels the main chain
assert await db.open_table("test", version=2) is not None
assert await db.open_table("test", branch="main", version=2) is not None
main_v2 = await db.open_table("test", branch="main", version=2)
assert main_v2.current_branch() is None
# a non-main branch is rejected, with or without a version
with pytest.raises(NotImplementedError, match="branching"):
await db.open_table("test", branch="exp")
with pytest.raises(NotImplementedError, match="branching"):
await db.open_table("test", branch="exp", version=2)
# a non-main branch opens a handle scoped to that branch
exp = await db.open_table("test", branch="exp")
assert exp.current_branch() == "exp"
exp_v2 = await db.open_table("test", branch="exp", version=2)
assert exp_v2.current_branch() == "exp"
def test_remote_table_branch_survives_pickle():
# Regression: a branch-scoped handle must keep its branch across a
# pickle/fork round-trip (it used to reopen on main).
with mock_lancedb_connection(_branch_open_handler) as db:
branch = db.open_table("test", branch="exp")
assert branch.current_branch() == "exp"
restored = pickle.loads(pickle.dumps(branch))
assert restored.current_branch() == "exp"
# the pinned version is carried through as well
branch_v2 = db.open_table("test", branch="exp", version=2)
restored_v2 = pickle.loads(pickle.dumps(branch_v2))
assert restored_v2.current_branch() == "exp"
def test_table_len_sync():