mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-07 04:12:59 +00:00
chore(python): handle NaN input in fts ingestion (#763)
If the input text is None, Tantivy raises an error complaining it cannot add a NoneType. We handle this upstream so None's are not added to the document. If all of the indexed fields are None then we skip this document.
This commit is contained in:
@@ -103,10 +103,13 @@ def populate_index(index: tantivy.Index, table: LanceTable, fields: List[str]) -
|
|||||||
b = b.flatten()
|
b = b.flatten()
|
||||||
for i in range(b.num_rows):
|
for i in range(b.num_rows):
|
||||||
doc = tantivy.Document()
|
doc = tantivy.Document()
|
||||||
doc.add_integer("doc_id", row_id)
|
|
||||||
for name in fields:
|
for name in fields:
|
||||||
doc.add_text(name, b[name][i].as_py())
|
value = b[name][i].as_py()
|
||||||
writer.add_document(doc)
|
if value is not None:
|
||||||
|
doc.add_text(name, value)
|
||||||
|
if not doc.is_empty:
|
||||||
|
doc.add_integer("doc_id", row_id)
|
||||||
|
writer.add_document(doc)
|
||||||
row_id += 1
|
row_id += 1
|
||||||
# commit changes
|
# commit changes
|
||||||
writer.commit()
|
writer.commit()
|
||||||
|
|||||||
@@ -147,3 +147,18 @@ def test_search_index_with_filter(table):
|
|||||||
assert r["id"] == 1
|
assert r["id"] == 1
|
||||||
|
|
||||||
assert rs == rs2
|
assert rs == rs2
|
||||||
|
|
||||||
|
|
||||||
|
def test_null_input(table):
|
||||||
|
table.add(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"vector": np.random.randn(128),
|
||||||
|
"id": 101,
|
||||||
|
"text": None,
|
||||||
|
"text2": None,
|
||||||
|
"nested": {"text": None},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
table.create_fts_index("text")
|
||||||
|
|||||||
Reference in New Issue
Block a user