Compare commits

...

2 Commits

Author SHA1 Message Date
lancedb automation
dd8fd66e27 chore: update lance dependency to v5.0.0-rc.1 2026-04-09 19:18:39 +00:00
lennylxx
4c2939d66e fix(python): guard against None before .decode() on split_names metadata key (#3229)
`.get(b"split_names", None).decode()` was called unconditionally in both
Permutations.__init__ and Permutation.from_tables(), crashing with
AttributeError when schema metadata existed but lacked the split_names
key. Guard the decode behind a None check and add regression tests.
2026-04-08 16:04:13 -07:00
5 changed files with 96 additions and 53 deletions

65
Cargo.lock generated
View File

@@ -3072,8 +3072,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -4134,8 +4134,8 @@ dependencies = [
[[package]]
name = "lance"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-arith",
@@ -4201,13 +4201,14 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-data",
"arrow-ipc",
"arrow-ord",
"arrow-schema",
"arrow-select",
@@ -4222,8 +4223,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrayref",
"paste",
@@ -4232,8 +4233,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4270,8 +4271,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-array",
@@ -4301,8 +4302,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-array",
@@ -4320,8 +4321,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4358,8 +4359,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4391,8 +4392,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-arith",
@@ -4456,8 +4457,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-arith",
@@ -4501,8 +4502,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4518,8 +4519,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"async-trait",
@@ -4532,8 +4533,8 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-ipc",
@@ -4578,8 +4579,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow",
"arrow-array",
@@ -4618,8 +4619,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "5.0.0-beta.5"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-beta.5#d630106da5a238b3adfb8c5dea3b3921f3519945"
version = "5.0.0-rc.1"
source = "git+https://github.com/lance-format/lance.git?tag=v5.0.0-rc.1#d130b036a62a5d8a904dfbe711d3f7b91b132194"
dependencies = [
"arrow-array",
"arrow-schema",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.91.0"
[workspace.dependencies]
lance = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=5.0.0-beta.5", default-features = false, "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=5.0.0-beta.5", "tag" = "v5.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=5.0.0-rc.1", default-features = false, "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=5.0.0-rc.1", default-features = false, "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=5.0.0-rc.1", default-features = false, "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=5.0.0-rc.1", "tag" = "v5.0.0-rc.1", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }

View File

@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>5.0.0-beta.5</lance-core.version>
<lance-core.version>5.0.0-rc.1</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>

View File

@@ -284,9 +284,8 @@ class Permutations:
self.permutation_table = permutation_table
if permutation_table.schema.metadata is not None:
split_names = permutation_table.schema.metadata.get(
b"split_names", None
).decode("utf-8")
raw = permutation_table.schema.metadata.get(b"split_names")
split_names = raw.decode("utf-8") if raw is not None else None
if split_names is not None:
self.split_names = json.loads(split_names)
self.split_dict = {
@@ -460,9 +459,8 @@ class Permutation:
f"Cannot create a permutation on split `{split}`"
" because no split names are defined in the permutation table"
)
split_names = permutation_table.schema.metadata.get(
b"split_names", None
).decode("utf-8")
raw = permutation_table.schema.metadata.get(b"split_names")
split_names = raw.decode("utf-8") if raw is not None else None
if split_names is None:
raise ValueError(
f"Cannot create a permutation on split `{split}`"

View File

@@ -522,6 +522,50 @@ def test_no_split_names(some_table: Table):
assert permutations[1].num_rows == 500
def test_permutations_metadata_without_split_names_key(mem_db: DBConnection):
"""Regression: schema metadata present but missing split_names key must not crash.
Previously, `.get(b"split_names", None).decode()` was called unconditionally,
so any permutation table whose metadata dict had other keys but no split_names
raised AttributeError: 'NoneType' has no attribute 'decode'.
"""
base = mem_db.create_table("base_nosplit", pa.table({"x": range(10)}))
# Build a permutation-like table that carries some metadata but NOT split_names.
raw = pa.table(
{
"row_id": pa.array(range(10), type=pa.uint64()),
"split_id": pa.array([0] * 10, type=pa.uint32()),
}
).replace_schema_metadata({b"other_key": b"other_value"})
perm_tbl = mem_db.create_table("perm_nosplit", raw)
permutations = Permutations(base, perm_tbl)
assert permutations.split_names == []
assert permutations.split_dict == {}
def test_from_tables_string_split_missing_names_key(mem_db: DBConnection):
"""Regression: from_tables() with a string split must raise ValueError, not
AttributeError.
Previously, `.get(b"split_names", None).decode()` crashed with AttributeError
when the metadata dict existed but had no split_names key.
"""
base = mem_db.create_table("base_strsplit", pa.table({"x": range(10)}))
raw = pa.table(
{
"row_id": pa.array(range(10), type=pa.uint64()),
"split_id": pa.array([0] * 10, type=pa.uint32()),
}
).replace_schema_metadata({b"other_key": b"other_value"})
perm_tbl = mem_db.create_table("perm_strsplit", raw)
with pytest.raises(ValueError, match="no split names are defined"):
Permutation.from_tables(base, perm_tbl, split="train")
@pytest.fixture
def some_perm_table(some_table: Table) -> Table:
return (