mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-03 11:00:40 +00:00
fix: support LargeList label list indexes (#3529)
## Summary This PR extends nested-field regression coverage across Rust local/remote, Python sync/async, and Node so canonical escaped paths stay consistent across scalar, vector, and FTS index lifecycle behavior. It also aligns LanceDB's LabelList type gate with Lance by accepting `LargeList<primitive>` columns while keeping `List<Struct<...>>` unsupported until Lance defines stable membership semantics for struct labels. Part of #3406.
This commit is contained in:
@@ -2399,18 +2399,32 @@ def test_create_scalar_index(mem_db: DBConnection):
|
||||
def test_create_index_nested_field_paths(mem_db: DBConnection):
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("rowId", pa.int32()),
|
||||
pa.field("row-id", pa.int32()),
|
||||
pa.field("userId", pa.int32()),
|
||||
pa.field("metadata", pa.struct([pa.field("user_id", pa.int32())])),
|
||||
pa.field("MetaData", pa.struct([pa.field("userId", pa.int32())])),
|
||||
pa.field(
|
||||
"image",
|
||||
pa.struct([pa.field("embedding", pa.list_(pa.float32(), 2))]),
|
||||
),
|
||||
pa.field("payload", pa.struct([pa.field("text", pa.string())])),
|
||||
pa.field("meta-data", pa.struct([pa.field("user-id", pa.int32())])),
|
||||
pa.field("literal", pa.struct([pa.field("a.b", pa.int32())])),
|
||||
]
|
||||
)
|
||||
data = pa.Table.from_pylist(
|
||||
[
|
||||
{
|
||||
"rowId": i,
|
||||
"row-id": i,
|
||||
"userId": i,
|
||||
"metadata": {"user_id": i},
|
||||
"MetaData": {"userId": i},
|
||||
"image": {"embedding": [float(i), float(i + 1)]},
|
||||
"payload": {"text": f"document {i}"},
|
||||
"meta-data": {"user-id": i},
|
||||
"literal": {"a.b": i},
|
||||
}
|
||||
for i in range(256)
|
||||
],
|
||||
@@ -2418,19 +2432,37 @@ def test_create_index_nested_field_paths(mem_db: DBConnection):
|
||||
)
|
||||
table = mem_db.create_table("nested_index_paths", data=data)
|
||||
|
||||
table.create_scalar_index("rowId", name="row_id_idx")
|
||||
table.create_scalar_index("`row-id`", name="row_dash_id_idx")
|
||||
table.create_scalar_index("userId", name="top_user_id_idx")
|
||||
table.create_scalar_index("metadata.user_id", name="metadata_user_id_idx")
|
||||
table.create_scalar_index("MetaData.userId", name="mixed_case_metadata_user_id_idx")
|
||||
table.create_scalar_index("`meta-data`.`user-id`", name="escaped_names_idx")
|
||||
table.create_scalar_index("literal.`a.b`", name="literal_dot_idx")
|
||||
table.create_index(
|
||||
vector_column_name="image.embedding",
|
||||
num_partitions=1,
|
||||
num_sub_vectors=1,
|
||||
name="image_embedding_idx",
|
||||
)
|
||||
table.create_fts_index("payload.text", with_position=False, name="payload_text_idx")
|
||||
|
||||
indices = sorted(table.list_indices(), key=lambda idx: idx.name)
|
||||
assert [(idx.name, idx.index_type, idx.columns) for idx in indices] == [
|
||||
("escaped_names_idx", "BTree", ["`meta-data`.`user-id`"]),
|
||||
("image_embedding_idx", "IvfPq", ["image.embedding"]),
|
||||
("literal_dot_idx", "BTree", ["literal.`a.b`"]),
|
||||
("metadata_user_id_idx", "BTree", ["metadata.user_id"]),
|
||||
("mixed_case_metadata_user_id_idx", "BTree", ["MetaData.userId"]),
|
||||
("payload_text_idx", "FTS", ["payload.text"]),
|
||||
("row_dash_id_idx", "BTree", ["`row-id`"]),
|
||||
("row_id_idx", "BTree", ["rowId"]),
|
||||
("top_user_id_idx", "BTree", ["userId"]),
|
||||
]
|
||||
for index in indices:
|
||||
stats = table.index_stats(index.name)
|
||||
assert stats is not None
|
||||
assert stats.num_indexed_rows == 256
|
||||
|
||||
vector_results = (
|
||||
table.search([0.0, 1.0], vector_column_name="image.embedding")
|
||||
@@ -2448,6 +2480,14 @@ def test_create_index_nested_field_paths(mem_db: DBConnection):
|
||||
assert len(filtered_results) == 1
|
||||
assert filtered_results[0]["metadata"]["user_id"] == 42
|
||||
|
||||
escaped_results = table.search().where("`row-id` = 43").limit(1).to_list()
|
||||
assert len(escaped_results) == 1
|
||||
assert escaped_results[0]["row-id"] == 43
|
||||
|
||||
fts_results = table.search("document 44", query_type="fts").limit(1).to_list()
|
||||
assert len(fts_results) == 1
|
||||
assert fts_results[0]["payload"]["text"] == "document 44"
|
||||
|
||||
|
||||
def test_empty_query(mem_db: DBConnection):
|
||||
table = mem_db.create_table(
|
||||
|
||||
Reference in New Issue
Block a user