From 4c2939d66e62676be853a7b9ea576089afddd6f1 Mon Sep 17 00:00:00 2001 From: lennylxx Date: Wed, 8 Apr 2026 16:04:13 -0700 Subject: [PATCH] fix(python): guard against None before .decode() on split_names metadata key (#3229) `.get(b"split_names", None).decode()` was called unconditionally in both Permutations.__init__ and Permutation.from_tables(), crashing with AttributeError when schema metadata existed but lacked the split_names key. Guard the decode behind a None check and add regression tests. --- python/python/lancedb/permutation.py | 10 +++--- python/python/tests/test_permutation.py | 44 +++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/python/python/lancedb/permutation.py b/python/python/lancedb/permutation.py index 5d133c309..724a0fd25 100644 --- a/python/python/lancedb/permutation.py +++ b/python/python/lancedb/permutation.py @@ -284,9 +284,8 @@ class Permutations: self.permutation_table = permutation_table if permutation_table.schema.metadata is not None: - split_names = permutation_table.schema.metadata.get( - b"split_names", None - ).decode("utf-8") + raw = permutation_table.schema.metadata.get(b"split_names") + split_names = raw.decode("utf-8") if raw is not None else None if split_names is not None: self.split_names = json.loads(split_names) self.split_dict = { @@ -460,9 +459,8 @@ class Permutation: f"Cannot create a permutation on split `{split}`" " because no split names are defined in the permutation table" ) - split_names = permutation_table.schema.metadata.get( - b"split_names", None - ).decode("utf-8") + raw = permutation_table.schema.metadata.get(b"split_names") + split_names = raw.decode("utf-8") if raw is not None else None if split_names is None: raise ValueError( f"Cannot create a permutation on split `{split}`" diff --git a/python/python/tests/test_permutation.py b/python/python/tests/test_permutation.py index 0223b829c..bb92ba0ba 100644 --- a/python/python/tests/test_permutation.py +++ b/python/python/tests/test_permutation.py @@ -522,6 +522,50 @@ def test_no_split_names(some_table: Table): assert permutations[1].num_rows == 500 +def test_permutations_metadata_without_split_names_key(mem_db: DBConnection): + """Regression: schema metadata present but missing split_names key must not crash. + + Previously, `.get(b"split_names", None).decode()` was called unconditionally, + so any permutation table whose metadata dict had other keys but no split_names + raised AttributeError: 'NoneType' has no attribute 'decode'. + """ + base = mem_db.create_table("base_nosplit", pa.table({"x": range(10)})) + + # Build a permutation-like table that carries some metadata but NOT split_names. + raw = pa.table( + { + "row_id": pa.array(range(10), type=pa.uint64()), + "split_id": pa.array([0] * 10, type=pa.uint32()), + } + ).replace_schema_metadata({b"other_key": b"other_value"}) + perm_tbl = mem_db.create_table("perm_nosplit", raw) + + permutations = Permutations(base, perm_tbl) + assert permutations.split_names == [] + assert permutations.split_dict == {} + + +def test_from_tables_string_split_missing_names_key(mem_db: DBConnection): + """Regression: from_tables() with a string split must raise ValueError, not + AttributeError. + + Previously, `.get(b"split_names", None).decode()` crashed with AttributeError + when the metadata dict existed but had no split_names key. + """ + base = mem_db.create_table("base_strsplit", pa.table({"x": range(10)})) + + raw = pa.table( + { + "row_id": pa.array(range(10), type=pa.uint64()), + "split_id": pa.array([0] * 10, type=pa.uint32()), + } + ).replace_schema_metadata({b"other_key": b"other_value"}) + perm_tbl = mem_db.create_table("perm_strsplit", raw) + + with pytest.raises(ValueError, match="no split names are defined"): + Permutation.from_tables(base, perm_tbl, split="train") + + @pytest.fixture def some_perm_table(some_table: Table) -> Table: return (