mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-09 13:22:58 +00:00
chore(python): handle NaN input in fts ingestion (#763)
If the input text is None, Tantivy raises an error complaining it cannot add a NoneType. We handle this upstream so None's are not added to the document. If all of the indexed fields are None then we skip this document.
This commit is contained in:
@@ -103,10 +103,13 @@ def populate_index(index: tantivy.Index, table: LanceTable, fields: List[str]) -
|
||||
b = b.flatten()
|
||||
for i in range(b.num_rows):
|
||||
doc = tantivy.Document()
|
||||
doc.add_integer("doc_id", row_id)
|
||||
for name in fields:
|
||||
doc.add_text(name, b[name][i].as_py())
|
||||
writer.add_document(doc)
|
||||
value = b[name][i].as_py()
|
||||
if value is not None:
|
||||
doc.add_text(name, value)
|
||||
if not doc.is_empty:
|
||||
doc.add_integer("doc_id", row_id)
|
||||
writer.add_document(doc)
|
||||
row_id += 1
|
||||
# commit changes
|
||||
writer.commit()
|
||||
|
||||
Reference in New Issue
Block a user