mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 22:59:57 +00:00
chore(python): handle NaN input in fts ingestion (#763)
If the input text is None, Tantivy raises an error complaining it cannot add a NoneType. We handle this upstream so None's are not added to the document. If all of the indexed fields are None then we skip this document.
This commit is contained in:
@@ -103,10 +103,13 @@ def populate_index(index: tantivy.Index, table: LanceTable, fields: List[str]) -
|
||||
b = b.flatten()
|
||||
for i in range(b.num_rows):
|
||||
doc = tantivy.Document()
|
||||
doc.add_integer("doc_id", row_id)
|
||||
for name in fields:
|
||||
doc.add_text(name, b[name][i].as_py())
|
||||
writer.add_document(doc)
|
||||
value = b[name][i].as_py()
|
||||
if value is not None:
|
||||
doc.add_text(name, value)
|
||||
if not doc.is_empty:
|
||||
doc.add_integer("doc_id", row_id)
|
||||
writer.add_document(doc)
|
||||
row_id += 1
|
||||
# commit changes
|
||||
writer.commit()
|
||||
|
||||
@@ -147,3 +147,18 @@ def test_search_index_with_filter(table):
|
||||
assert r["id"] == 1
|
||||
|
||||
assert rs == rs2
|
||||
|
||||
|
||||
def test_null_input(table):
|
||||
table.add(
|
||||
[
|
||||
{
|
||||
"vector": np.random.randn(128),
|
||||
"id": 101,
|
||||
"text": None,
|
||||
"text2": None,
|
||||
"nested": {"text": None},
|
||||
}
|
||||
]
|
||||
)
|
||||
table.create_fts_index("text")
|
||||
|
||||
Reference in New Issue
Block a user