diff --git a/python/python/lancedb/index.py b/python/python/lancedb/index.py index a1b06a29..598d761e 100644 --- a/python/python/lancedb/index.py +++ b/python/python/lancedb/index.py @@ -110,7 +110,16 @@ class FTS: remove_stop_words: bool = False, ascii_folding: bool = False, ): - self._inner = LanceDbIndex.fts(with_position=with_position) + self._inner = LanceDbIndex.fts( + with_position=with_position, + base_tokenizer=base_tokenizer, + language=language, + max_token_length=max_token_length, + lower_case=lower_case, + stem=stem, + remove_stop_words=remove_stop_words, + ascii_folding=ascii_folding, + ) class HnswPq: diff --git a/python/python/lancedb/remote/table.py b/python/python/lancedb/remote/table.py index 9fb743c2..f00b8022 100644 --- a/python/python/lancedb/remote/table.py +++ b/python/python/lancedb/remote/table.py @@ -138,9 +138,28 @@ class RemoteTable(Table): *, replace: bool = False, with_position: bool = True, + # tokenizer configs: + base_tokenizer: str = "simple", + language: str = "English", + max_token_length: Optional[int] = 40, + lower_case: bool = True, + stem: bool = False, + remove_stop_words: bool = False, + ascii_folding: bool = False, ): - config = FTS(with_position=with_position) - LOOP.run(self._table.create_index(column, config=config, replace=replace)) + config = FTS( + with_position=with_position, + base_tokenizer=base_tokenizer, + language=language, + max_token_length=max_token_length, + lower_case=lower_case, + stem=stem, + remove_stop_words=remove_stop_words, + ascii_folding=ascii_folding, + ) + self._loop.run_until_complete( + self._table.create_index(column, config=config, replace=replace) + ) def create_index( self,