diff --git a/python/lancedb/fts.py b/python/lancedb/fts.py index f187be8d..eb5c37e3 100644 --- a/python/lancedb/fts.py +++ b/python/lancedb/fts.py @@ -103,10 +103,13 @@ def populate_index(index: tantivy.Index, table: LanceTable, fields: List[str]) - b = b.flatten() for i in range(b.num_rows): doc = tantivy.Document() - doc.add_integer("doc_id", row_id) for name in fields: - doc.add_text(name, b[name][i].as_py()) - writer.add_document(doc) + value = b[name][i].as_py() + if value is not None: + doc.add_text(name, value) + if not doc.is_empty: + doc.add_integer("doc_id", row_id) + writer.add_document(doc) row_id += 1 # commit changes writer.commit() diff --git a/python/tests/test_fts.py b/python/tests/test_fts.py index f09b44ef..baa07096 100644 --- a/python/tests/test_fts.py +++ b/python/tests/test_fts.py @@ -147,3 +147,18 @@ def test_search_index_with_filter(table): assert r["id"] == 1 assert rs == rs2 + + +def test_null_input(table): + table.add( + [ + { + "vector": np.random.randn(128), + "id": 101, + "text": None, + "text2": None, + "nested": {"text": None}, + } + ] + ) + table.create_fts_index("text")