From 4c6b728a3186f8222c4a429134265537ad5851f1 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Fri, 8 Nov 2024 18:47:00 +0800 Subject: [PATCH] feat: support FTS options on RemoteTable Signed-off-by: BubbleCal --- python/python/lancedb/index.py | 11 ++++++++++- python/python/lancedb/remote/table.py | 19 ++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/python/python/lancedb/index.py b/python/python/lancedb/index.py index a1b06a29..598d761e 100644 --- a/python/python/lancedb/index.py +++ b/python/python/lancedb/index.py @@ -110,7 +110,16 @@ class FTS: remove_stop_words: bool = False, ascii_folding: bool = False, ): - self._inner = LanceDbIndex.fts(with_position=with_position) + self._inner = LanceDbIndex.fts( + with_position=with_position, + base_tokenizer=base_tokenizer, + language=language, + max_token_length=max_token_length, + lower_case=lower_case, + stem=stem, + remove_stop_words=remove_stop_words, + ascii_folding=ascii_folding, + ) class HnswPq: diff --git a/python/python/lancedb/remote/table.py b/python/python/lancedb/remote/table.py index e2d88b98..a375d30c 100644 --- a/python/python/lancedb/remote/table.py +++ b/python/python/lancedb/remote/table.py @@ -131,8 +131,25 @@ class RemoteTable(Table): *, replace: bool = False, with_position: bool = True, + # tokenizer configs: + base_tokenizer: str = "simple", + language: str = "English", + max_token_length: Optional[int] = 40, + lower_case: bool = True, + stem: bool = False, + remove_stop_words: bool = False, + ascii_folding: bool = False, ): - config = FTS(with_position=with_position) + config = FTS( + with_position=with_position, + base_tokenizer=base_tokenizer, + language=language, + max_token_length=max_token_length, + lower_case=lower_case, + stem=stem, + remove_stop_words=remove_stop_words, + ascii_folding=ascii_folding, + ) self._loop.run_until_complete( self._table.create_index(column, config=config, replace=replace) )