Files
lancedb/python/python/tests/test_nested_fields.py
nuthalapativarun 04480c274a test(python): add nested field regression matrix tests (#3518)
## Summary

Closes #3406

Add a regression matrix in `python/python/tests/test_nested_fields.py`
that exercises the full nested field index lifecycle for both the sync
and async Python table APIs. The tests will fail if any implementation
regresses to leaf-only field names in `list_indices`, `index_stats`,
search, or filter results.

## Test scenarios covered

**Index types:** BTree scalar, IvfPq vector, FTS

**Field-name edge cases (per acceptance criteria):**
- `rowId` — camelCase top-level field
- `` `row-id` `` — hyphenated top-level field (escaped)
- `parent.`\``leaf.name`\`` ` — struct leaf whose name contains a
literal dot
- `MetaData.userId` — mixed-case nested path
- `` `meta-data`.`user-id` `` — hyphenated struct with hyphenated leaf

**Lifecycle operations per index type:**
- `create_index` / `create_scalar_index` / `create_fts_index`
- `list_indices` → verify canonical full dotted path (not leaf name)
- `index_stats` → verify row count and index type
- Filtered scan (`WHERE nested.field = value`)
- Vector search via nested embedding column
- FTS search via nested text column
- `add` (append) then re-check index listing
- `optimize` then re-check index listing

**Both sync and async APIs** are covered in parallel test classes.

## Notes

Lance forbids top-level field names that contain a literal `.`, so the
`` `a.b` `` acceptance-criterion variant is exercised as a *struct leaf*
field (`parent.`\``leaf.name`\``) rather than a top-level column.
2026-06-11 08:06:04 -07:00

687 lines
27 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
"""Regression matrix for nested field support across LanceDB Python APIs.
Covers the lifecycle described in lancedb/lancedb#3406:
- Nested scalar, vector, and FTS index creation with full dotted paths
- list_indices / index_stats return canonical full paths (not leaf names)
- search, filter, append, optimize behaviour
- Field-name edge cases: mixed case, literal-dot field names, same-name leaves
- Both sync and async Python table APIs
The matrix uses the following field-name variants from the acceptance criteria:
- rowId (camelCase top-level)
- `row-id` (hyphenated top-level, escaped)
- parent.`leaf.name` (struct leaf whose name contains a literal dot)
- MetaData.userId (mixed-case nested path)
- `meta-data`.`user-id` (hyphenated struct with hyphenated leaf)
Note: Lance forbids top-level field names that contain a '.', so the literal-dot
edge case is exercised via a struct leaf field (parent.`leaf.name`) instead.
"""
from datetime import timedelta
import pyarrow as pa
import pytest
import pytest_asyncio
import lancedb
from lancedb.db import AsyncConnection, DBConnection
from lancedb.index import BTree, FTS, IvfPq
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
DIM = 8
# IvfPq requires at least num_partitions * 256 rows by default; keeping rows
# small means we must drop num_sub_vectors and num_partitions very low.
NROWS = 256
def _vec(row: int) -> list:
return [float((row * DIM + i) % 256) for i in range(DIM)]
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def sync_db(tmp_path) -> DBConnection:
return lancedb.connect(tmp_path)
@pytest_asyncio.fixture
async def async_db(tmp_path) -> AsyncConnection:
return await lancedb.connect_async(
tmp_path, read_consistency_interval=timedelta(seconds=0)
)
# ---------------------------------------------------------------------------
# Schema / data builders
# ---------------------------------------------------------------------------
def _nested_scalar_schema() -> pa.Schema:
"""Schema with nested scalar fields covering the acceptance-criteria names.
Top-level columns:
- rowId int32 (camelCase top-level)
- row-id int32 (hyphenated top-level name)
- MetaData struct{userId int32} (mixed-case nested path)
- meta-data struct{user-id int32} (hyphenated struct + hyphenated leaf)
Lance disallows top-level field names that contain '.' (e.g. a field
literally named 'a.b'), so that edge case is tested separately using
_literal_dot_schema() below.
"""
return pa.schema(
[
pa.field("rowId", pa.int32()),
pa.field("row-id", pa.int32()),
pa.field(
"MetaData",
pa.struct([pa.field("userId", pa.int32())]),
),
pa.field(
"meta-data",
pa.struct([pa.field("user-id", pa.int32())]),
),
]
)
def _nested_scalar_data(nrows: int = NROWS) -> pa.Table:
schema = _nested_scalar_schema()
return pa.table(
{
"rowId": pa.array(list(range(nrows)), pa.int32()),
"row-id": pa.array(list(range(nrows)), pa.int32()),
"MetaData": pa.array(
[{"userId": i} for i in range(nrows)],
type=pa.struct([pa.field("userId", pa.int32())]),
),
"meta-data": pa.array(
[{"user-id": i} for i in range(nrows)],
type=pa.struct([pa.field("user-id", pa.int32())]),
),
},
schema=schema,
)
def _literal_dot_schema() -> pa.Schema:
"""Schema where a struct *leaf* field is named with a literal dot.
The path used in the index API is ``parent.`leaf.name` ``.
"""
return pa.schema(
[
pa.field("id", pa.int32()),
pa.field(
"parent",
pa.struct([pa.field("leaf.name", pa.int32())]),
),
]
)
def _literal_dot_data(nrows: int = NROWS) -> pa.Table:
parent_type = pa.struct([pa.field("leaf.name", pa.int32())])
return pa.table(
{
"id": pa.array(list(range(nrows)), pa.int32()),
"parent": pa.array(
[{"leaf.name": i} for i in range(nrows)],
type=parent_type,
),
},
schema=_literal_dot_schema(),
)
def _same_leaf_schema() -> pa.Schema:
return pa.schema(
[
pa.field("StructA", pa.struct([pa.field("userId", pa.int32())])),
pa.field("StructB", pa.struct([pa.field("userId", pa.int32())])),
]
)
def _same_leaf_data(nrows: int = NROWS) -> pa.Table:
t = pa.struct([pa.field("userId", pa.int32())])
return pa.table(
{
"StructA": pa.array([{"userId": i} for i in range(nrows)], type=t),
"StructB": pa.array([{"userId": i * 10} for i in range(nrows)], type=t),
},
schema=_same_leaf_schema(),
)
def _nested_vector_schema() -> pa.Schema:
return pa.schema(
[
pa.field("id", pa.int32()),
pa.field(
"image",
pa.struct([pa.field("embedding", pa.list_(pa.float32(), DIM))]),
),
pa.field(
"MetaData",
pa.struct([pa.field("userId", pa.int32())]),
),
]
)
def _nested_vector_data(nrows: int = NROWS) -> pa.Table:
embedding_type = pa.list_(pa.float32(), DIM)
image_type = pa.struct([pa.field("embedding", embedding_type)])
meta_type = pa.struct([pa.field("userId", pa.int32())])
return pa.table(
{
"id": pa.array(list(range(nrows)), pa.int32()),
"image": pa.array(
[{"embedding": _vec(i)} for i in range(nrows)],
type=image_type,
),
"MetaData": pa.array(
[{"userId": i} for i in range(nrows)],
type=meta_type,
),
},
schema=_nested_vector_schema(),
)
def _nested_fts_schema() -> pa.Schema:
return pa.schema(
[
pa.field("id", pa.int32()),
pa.field(
"payload",
pa.struct([pa.field("text", pa.utf8())]),
),
pa.field(
"MetaData",
pa.struct([pa.field("userId", pa.int32())]),
),
]
)
def _nested_fts_data(nrows: int = NROWS) -> pa.Table:
words = ["alpha", "bravo", "charlie", "delta", "echo"]
payload_type = pa.struct([pa.field("text", pa.utf8())])
meta_type = pa.struct([pa.field("userId", pa.int32())])
return pa.table(
{
"id": pa.array(list(range(nrows)), pa.int32()),
"payload": pa.array(
[{"text": words[i % len(words)]} for i in range(nrows)],
type=payload_type,
),
"MetaData": pa.array(
[{"userId": i} for i in range(nrows)],
type=meta_type,
),
},
schema=_nested_fts_schema(),
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _columns_by_name_sync(tbl) -> dict:
return {idx.name: idx.columns for idx in tbl.list_indices()}
async def _columns_by_name_async(tbl) -> dict:
return {idx.name: idx.columns for idx in await tbl.list_indices()}
# ===========================================================================
# SYNC TESTS
# ===========================================================================
#
# The sync LanceTable API uses:
# - create_scalar_index(column, ...) for scalar (BTree/Bitmap/LabelList) indices
# - create_fts_index(column, ...) for full-text-search indices
# - create_index(...) for vector indices (older positional API)
# ===========================================================================
class TestNestedScalarIndexSync:
"""Sync regression matrix for nested scalar (BTree) indices."""
def test_top_level_camelcase_field(self, sync_db):
"""list_indices must return the full camelCase field name."""
tbl = sync_db.create_table("t", _nested_scalar_data())
tbl.create_scalar_index("rowId", index_type="BTREE", name="rowid_idx")
col_map = _columns_by_name_sync(tbl)
assert col_map["rowid_idx"] == ["rowId"], (
"list_indices must return 'rowId', not a truncated leaf name"
)
def test_top_level_hyphenated_field_escaped(self, sync_db):
"""Top-level field 'row-id' (hyphenated) accessed via escaped path."""
tbl = sync_db.create_table("t", _nested_scalar_data())
tbl.create_scalar_index("`row-id`", index_type="BTREE", name="rowid_hyph_idx")
col_map = _columns_by_name_sync(tbl)
assert col_map["rowid_hyph_idx"] == ["`row-id`"], (
"list_indices must return escaped path '`row-id`'"
)
def test_struct_leaf_literal_dot_field_escaped(self, sync_db):
"""Struct leaf with a literal-dot name: parent.`leaf.name`.
The index listing must use the full escaped path, not just the leaf.
"""
tbl = sync_db.create_table("t", _literal_dot_data())
tbl.create_scalar_index(
"parent.`leaf.name`", index_type="BTREE", name="leaf_dot_idx"
)
col_map = _columns_by_name_sync(tbl)
assert col_map["leaf_dot_idx"] == ["parent.`leaf.name`"], (
"list_indices must return 'parent.`leaf.name`', not just '`leaf.name`'"
)
def test_nested_mixed_case_path(self, sync_db):
"""Nested path MetaData.userId (mixed case) must appear as full path."""
tbl = sync_db.create_table("t", _nested_scalar_data())
tbl.create_scalar_index(
"MetaData.userId", index_type="BTREE", name="metadata_userid_idx"
)
col_map = _columns_by_name_sync(tbl)
assert col_map["metadata_userid_idx"] == ["MetaData.userId"], (
"list_indices must return 'MetaData.userId', not leaf 'userId'"
)
def test_nested_hyphenated_path_escaped(self, sync_db):
"""`meta-data`.`user-id` path with both parts escaped."""
tbl = sync_db.create_table("t", _nested_scalar_data())
tbl.create_scalar_index(
"`meta-data`.`user-id`", index_type="BTREE", name="metauid_idx"
)
col_map = _columns_by_name_sync(tbl)
assert col_map["metauid_idx"] == ["`meta-data`.`user-id`"], (
"list_indices must return '`meta-data`.`user-id`', not 'user-id'"
)
def test_filter_on_nested_mixed_case(self, sync_db):
"""WHERE filter on a nested dotted path works after index creation."""
tbl = sync_db.create_table("t", _nested_scalar_data())
tbl.create_scalar_index(
"MetaData.userId", index_type="BTREE", name="metadata_userid_idx"
)
rows = tbl.search().where("MetaData.userId = 5").to_list()
assert len(rows) == 1
assert rows[0]["MetaData"]["userId"] == 5
def test_append_and_list_indices_stable(self, sync_db):
"""After appending rows the index listing must remain unchanged."""
tbl = sync_db.create_table("t", _nested_scalar_data())
tbl.create_scalar_index(
"MetaData.userId", index_type="BTREE", name="meta_uid_idx"
)
tbl.add(_nested_scalar_data(nrows=4))
col_map = _columns_by_name_sync(tbl)
assert col_map["meta_uid_idx"] == ["MetaData.userId"]
def test_optimize_and_list_indices_stable(self, tmp_path):
"""After optimize the index listing must still show full paths."""
db = lancedb.connect(tmp_path / "opt_db")
tbl = db.create_table("t", _nested_scalar_data())
tbl.create_scalar_index(
"MetaData.userId", index_type="BTREE", name="meta_uid_idx"
)
tbl.add(_nested_scalar_data(nrows=4))
tbl.optimize()
col_map = _columns_by_name_sync(tbl)
assert col_map["meta_uid_idx"] == ["MetaData.userId"]
def test_same_name_leaves_are_distinct(self, sync_db):
"""Two structs sharing a leaf name must produce distinct index paths."""
tbl = sync_db.create_table("same_leaf", _same_leaf_data())
tbl.create_scalar_index(
"StructA.userId", index_type="BTREE", name="a_userid_idx"
)
tbl.create_scalar_index(
"StructB.userId", index_type="BTREE", name="b_userid_idx"
)
col_map = _columns_by_name_sync(tbl)
assert col_map["a_userid_idx"] == ["StructA.userId"]
assert col_map["b_userid_idx"] == ["StructB.userId"]
def test_index_stats_canonical_path(self, sync_db):
"""index_stats round-trip: create on nested field, verify row count."""
tbl = sync_db.create_table("t", _nested_scalar_data())
tbl.create_scalar_index(
"MetaData.userId", index_type="BTREE", name="meta_uid_idx"
)
stats = tbl.index_stats("meta_uid_idx")
assert stats is not None
assert stats.index_type == "BTREE"
assert stats.num_indexed_rows == NROWS
class TestNestedVectorIndexSync:
"""Sync regression matrix for nested vector (IvfPq) indices."""
def test_nested_vector_index_full_path(self, sync_db):
"""Listing after vector index creation must use the full dotted path."""
tbl = sync_db.create_table("vt", _nested_vector_data())
tbl.create_index(
num_partitions=2,
num_sub_vectors=2,
vector_column_name="image.embedding",
name="image_emb_idx",
)
col_map = _columns_by_name_sync(tbl)
assert col_map["image_emb_idx"] == ["image.embedding"], (
"list_indices must return 'image.embedding', not leaf 'embedding'"
)
def test_nested_vector_search(self, sync_db):
"""Vector search on nested embedding field must return results."""
tbl = sync_db.create_table("vt", _nested_vector_data())
tbl.create_index(
num_partitions=2,
num_sub_vectors=2,
vector_column_name="image.embedding",
name="image_emb_idx",
)
results = (
tbl.search(_vec(0), vector_column_name="image.embedding").limit(5).to_list()
)
assert len(results) > 0
def test_nested_vector_index_stats(self, sync_db):
"""index_stats for a nested vector index must reflect correct row count."""
tbl = sync_db.create_table("vt", _nested_vector_data())
tbl.create_index(
num_partitions=2,
num_sub_vectors=2,
vector_column_name="image.embedding",
name="image_emb_idx",
)
stats = tbl.index_stats("image_emb_idx")
assert stats is not None
assert stats.num_indexed_rows == NROWS
def test_nested_vector_append_optimize(self, tmp_path):
"""After append and optimize the vector index listing must be stable."""
db = lancedb.connect(tmp_path / "vec_opt_db")
tbl = db.create_table("vt", _nested_vector_data())
tbl.create_index(
num_partitions=2,
num_sub_vectors=2,
vector_column_name="image.embedding",
name="image_emb_idx",
)
tbl.add(_nested_vector_data(nrows=4))
tbl.optimize()
col_map = _columns_by_name_sync(tbl)
assert col_map["image_emb_idx"] == ["image.embedding"]
class TestNestedFTSIndexSync:
"""Sync regression matrix for nested FTS indices."""
def test_nested_fts_index_full_path(self, sync_db):
"""FTS index on payload.text must be listed with the full path."""
tbl = sync_db.create_table("ft", _nested_fts_data())
tbl.create_fts_index("payload.text", name="payload_text_idx")
col_map = _columns_by_name_sync(tbl)
assert col_map["payload_text_idx"] == ["payload.text"], (
"list_indices must return 'payload.text', not leaf 'text'"
)
def test_nested_fts_search(self, sync_db):
"""FTS search on a nested text field must return correct results."""
tbl = sync_db.create_table("ft", _nested_fts_data())
tbl.create_fts_index("payload.text", name="payload_text_idx")
results = (
tbl.search("alpha", query_type="fts", fts_columns="payload.text")
.limit(10)
.to_list()
)
assert len(results) > 0
assert all(row["payload"]["text"] == "alpha" for row in results)
def test_nested_fts_append_optimize(self, tmp_path):
"""After append and optimize the FTS index listing must be stable."""
db = lancedb.connect(tmp_path / "fts_opt_db")
tbl = db.create_table("ft", _nested_fts_data())
tbl.create_fts_index("payload.text", name="payload_text_idx")
tbl.add(_nested_fts_data(nrows=4))
tbl.optimize()
col_map = _columns_by_name_sync(tbl)
assert col_map["payload_text_idx"] == ["payload.text"]
# ===========================================================================
# ASYNC TESTS
# ===========================================================================
#
# The async AsyncTable API uses create_index(column, config=...) uniformly
# for scalar, vector, and FTS indices.
# ===========================================================================
class TestNestedScalarIndexAsync:
"""Async regression matrix for nested scalar (BTree) indices."""
@pytest.mark.asyncio
async def test_top_level_camelcase_field(self, async_db):
"""list_indices must return the full camelCase field name."""
tbl = await async_db.create_table("t", _nested_scalar_data())
await tbl.create_index("rowId", config=BTree(), name="rowid_idx")
col_map = await _columns_by_name_async(tbl)
assert col_map["rowid_idx"] == ["rowId"]
@pytest.mark.asyncio
async def test_top_level_hyphenated_field_escaped(self, async_db):
"""Hyphenated top-level field accessed via escaped path."""
tbl = await async_db.create_table("t", _nested_scalar_data())
await tbl.create_index("`row-id`", config=BTree(), name="rowid_hyph_idx")
col_map = await _columns_by_name_async(tbl)
assert col_map["rowid_hyph_idx"] == ["`row-id`"]
@pytest.mark.asyncio
async def test_struct_leaf_literal_dot_field_escaped(self, async_db):
"""Struct leaf with a literal-dot name: parent.`leaf.name`."""
tbl = await async_db.create_table("t", _literal_dot_data())
await tbl.create_index(
"parent.`leaf.name`", config=BTree(), name="leaf_dot_idx"
)
col_map = await _columns_by_name_async(tbl)
assert col_map["leaf_dot_idx"] == ["parent.`leaf.name`"]
@pytest.mark.asyncio
async def test_nested_mixed_case_path(self, async_db):
"""Mixed-case nested path MetaData.userId must appear as full path."""
tbl = await async_db.create_table("t", _nested_scalar_data())
await tbl.create_index(
"MetaData.userId", config=BTree(), name="metadata_userid_idx"
)
col_map = await _columns_by_name_async(tbl)
assert col_map["metadata_userid_idx"] == ["MetaData.userId"]
@pytest.mark.asyncio
async def test_nested_hyphenated_path_escaped(self, async_db):
"""`meta-data`.`user-id` path with both parts escaped."""
tbl = await async_db.create_table("t", _nested_scalar_data())
await tbl.create_index(
"`meta-data`.`user-id`", config=BTree(), name="metauid_idx"
)
col_map = await _columns_by_name_async(tbl)
assert col_map["metauid_idx"] == ["`meta-data`.`user-id`"]
@pytest.mark.asyncio
async def test_filter_on_nested_mixed_case(self, async_db):
"""WHERE filter on a nested dotted path works after index creation."""
tbl = await async_db.create_table("t", _nested_scalar_data())
await tbl.create_index(
"MetaData.userId", config=BTree(), name="metadata_userid_idx"
)
rows = await tbl.query().where("MetaData.userId = 5").to_list()
assert len(rows) == 1
assert rows[0]["MetaData"]["userId"] == 5
@pytest.mark.asyncio
async def test_index_stats_canonical_path(self, async_db):
"""index_stats round-trip: create on nested field, verify stats."""
tbl = await async_db.create_table("t", _nested_scalar_data())
await tbl.create_index("MetaData.userId", config=BTree(), name="meta_uid_idx")
stats = await tbl.index_stats("meta_uid_idx")
assert stats is not None
assert stats.index_type == "BTREE"
assert stats.num_indexed_rows == NROWS
@pytest.mark.asyncio
async def test_append_and_list_indices_stable(self, async_db):
"""After appending rows the index listing must remain unchanged."""
tbl = await async_db.create_table("t", _nested_scalar_data())
await tbl.create_index("MetaData.userId", config=BTree(), name="meta_uid_idx")
await tbl.add(_nested_scalar_data(nrows=4))
col_map = await _columns_by_name_async(tbl)
assert col_map["meta_uid_idx"] == ["MetaData.userId"]
@pytest.mark.asyncio
async def test_optimize_and_list_indices_stable(self, tmp_path):
"""After optimize the index listing must still show full paths."""
db = await lancedb.connect_async(
tmp_path / "opt_db", read_consistency_interval=timedelta(seconds=0)
)
tbl = await db.create_table("t", _nested_scalar_data())
await tbl.create_index("MetaData.userId", config=BTree(), name="meta_uid_idx")
await tbl.add(_nested_scalar_data(nrows=4))
await tbl.optimize()
col_map = await _columns_by_name_async(tbl)
assert col_map["meta_uid_idx"] == ["MetaData.userId"]
@pytest.mark.asyncio
async def test_same_name_leaves_are_distinct(self, async_db):
"""Two structs sharing a leaf name must produce distinct index paths."""
tbl = await async_db.create_table("same_leaf", _same_leaf_data())
await tbl.create_index("StructA.userId", config=BTree(), name="a_userid_idx")
await tbl.create_index("StructB.userId", config=BTree(), name="b_userid_idx")
col_map = await _columns_by_name_async(tbl)
assert col_map["a_userid_idx"] == ["StructA.userId"]
assert col_map["b_userid_idx"] == ["StructB.userId"]
class TestNestedVectorIndexAsync:
"""Async regression matrix for nested vector (IvfPq) indices."""
@pytest.mark.asyncio
async def test_nested_vector_index_full_path(self, async_db):
"""Listing after vector index creation must use the full dotted path."""
tbl = await async_db.create_table("vt", _nested_vector_data())
await tbl.create_index(
"image.embedding",
config=IvfPq(num_partitions=2, num_sub_vectors=2),
name="image_emb_idx",
)
col_map = await _columns_by_name_async(tbl)
assert col_map["image_emb_idx"] == ["image.embedding"]
@pytest.mark.asyncio
async def test_nested_vector_search(self, async_db):
"""Vector search on nested embedding field must return results."""
tbl = await async_db.create_table("vt", _nested_vector_data())
await tbl.create_index(
"image.embedding",
config=IvfPq(num_partitions=2, num_sub_vectors=2),
name="image_emb_idx",
)
results = (
await tbl.query()
.nearest_to(_vec(0))
.column("image.embedding")
.limit(5)
.to_list()
)
assert len(results) > 0
@pytest.mark.asyncio
async def test_nested_vector_index_stats(self, async_db):
"""index_stats for a nested vector index must reflect correct row count."""
tbl = await async_db.create_table("vt", _nested_vector_data())
await tbl.create_index(
"image.embedding",
config=IvfPq(num_partitions=2, num_sub_vectors=2),
name="image_emb_idx",
)
stats = await tbl.index_stats("image_emb_idx")
assert stats is not None
assert stats.num_indexed_rows == NROWS
@pytest.mark.asyncio
async def test_nested_vector_append_optimize(self, tmp_path):
"""After append and optimize the vector index listing must be stable."""
db = await lancedb.connect_async(
tmp_path / "vec_opt_db", read_consistency_interval=timedelta(seconds=0)
)
tbl = await db.create_table("vt", _nested_vector_data())
await tbl.create_index(
"image.embedding",
config=IvfPq(num_partitions=2, num_sub_vectors=2),
name="image_emb_idx",
)
await tbl.add(_nested_vector_data(nrows=4))
await tbl.optimize()
col_map = await _columns_by_name_async(tbl)
assert col_map["image_emb_idx"] == ["image.embedding"]
class TestNestedFTSIndexAsync:
"""Async regression matrix for nested FTS indices."""
@pytest.mark.asyncio
async def test_nested_fts_index_full_path(self, async_db):
"""FTS index on payload.text must be listed with the full path."""
tbl = await async_db.create_table("ft", _nested_fts_data())
await tbl.create_index("payload.text", config=FTS(), name="payload_text_idx")
col_map = await _columns_by_name_async(tbl)
assert col_map["payload_text_idx"] == ["payload.text"]
@pytest.mark.asyncio
async def test_nested_fts_search(self, async_db):
"""FTS search on a nested text field must return correct results."""
tbl = await async_db.create_table("ft", _nested_fts_data())
await tbl.create_index("payload.text", config=FTS(), name="payload_text_idx")
results = (
await tbl.query()
.nearest_to_text("alpha", columns="payload.text")
.limit(10)
.to_list()
)
assert len(results) > 0
assert all(row["payload"]["text"] == "alpha" for row in results)
@pytest.mark.asyncio
async def test_nested_fts_append_optimize(self, tmp_path):
"""After append and optimize the FTS index listing must be stable."""
db = await lancedb.connect_async(
tmp_path / "fts_opt_db", read_consistency_interval=timedelta(seconds=0)
)
tbl = await db.create_table("ft", _nested_fts_data())
await tbl.create_index("payload.text", config=FTS(), name="payload_text_idx")
await tbl.add(_nested_fts_data(nrows=4))
await tbl.optimize()
col_map = await _columns_by_name_async(tbl)
assert col_map["payload_text_idx"] == ["payload.text"]