mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-08 21:02:58 +00:00
feat(python): Set heap size to get faster fts indexing performance (#762)
By default tantivy-py uses 128MB heapsize. We change the default to 1GB and we allow the user to customize this locally this makes `test_fts.py` run 10x faster
This commit is contained in:
@@ -709,7 +709,11 @@ class LanceTable(Table):
|
||||
self._dataset.create_scalar_index(column, index_type="BTREE", replace=replace)
|
||||
|
||||
def create_fts_index(
|
||||
self, field_names: Union[str, List[str]], *, replace: bool = False
|
||||
self,
|
||||
field_names: Union[str, List[str]],
|
||||
*,
|
||||
replace: bool = False,
|
||||
writer_heap_size: Optional[int] = 1024 * 1024 * 1024,
|
||||
):
|
||||
"""Create a full-text search index on the table.
|
||||
|
||||
@@ -724,6 +728,7 @@ class LanceTable(Table):
|
||||
If True, replace the existing index if it exists. Note that this is
|
||||
not yet an atomic operation; the index will be temporarily
|
||||
unavailable while the new index is being created.
|
||||
writer_heap_size: int, default 1GB
|
||||
"""
|
||||
from .fts import create_index, populate_index
|
||||
|
||||
@@ -740,7 +745,7 @@ class LanceTable(Table):
|
||||
fs.delete_dir(path)
|
||||
|
||||
index = create_index(self._get_fts_index_path(), field_names)
|
||||
populate_index(index, self, field_names)
|
||||
populate_index(index, self, field_names, writer_heap_size=writer_heap_size)
|
||||
register_event("create_fts_index")
|
||||
|
||||
def _get_fts_index_path(self):
|
||||
|
||||
Reference in New Issue
Block a user