mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 07:09:57 +00:00
feat!: upgrade lance to 0.19.1 (#1762)
BREAKING CHANGE: default tokenizer no longer does stemming or stop-word removal. Users should explicitly turn that option on in the future. - upgrade lance to 0.19.1 - update the FTS docs - update the FTS API Upstream change notes: https://github.com/lancedb/lance/releases/tag/v0.19.1 --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com> Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
@@ -106,12 +106,41 @@ impl Index {
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[staticmethod]
|
||||
pub fn fts(with_position: Option<bool>) -> Self {
|
||||
pub fn fts(
|
||||
with_position: Option<bool>,
|
||||
base_tokenizer: Option<String>,
|
||||
language: Option<String>,
|
||||
max_token_length: Option<usize>,
|
||||
lower_case: Option<bool>,
|
||||
stem: Option<bool>,
|
||||
remove_stop_words: Option<bool>,
|
||||
ascii_folding: Option<bool>,
|
||||
) -> Self {
|
||||
let mut opts = FtsIndexBuilder::default();
|
||||
if let Some(with_position) = with_position {
|
||||
opts = opts.with_position(with_position);
|
||||
}
|
||||
if let Some(base_tokenizer) = base_tokenizer {
|
||||
opts.tokenizer_configs = opts.tokenizer_configs.base_tokenizer(base_tokenizer);
|
||||
}
|
||||
if let Some(language) = language {
|
||||
opts.tokenizer_configs = opts.tokenizer_configs.language(&language).unwrap();
|
||||
}
|
||||
opts.tokenizer_configs = opts.tokenizer_configs.max_token_length(max_token_length);
|
||||
if let Some(lower_case) = lower_case {
|
||||
opts.tokenizer_configs = opts.tokenizer_configs.lower_case(lower_case);
|
||||
}
|
||||
if let Some(stem) = stem {
|
||||
opts.tokenizer_configs = opts.tokenizer_configs.stem(stem);
|
||||
}
|
||||
if let Some(remove_stop_words) = remove_stop_words {
|
||||
opts.tokenizer_configs = opts.tokenizer_configs.remove_stop_words(remove_stop_words);
|
||||
}
|
||||
if let Some(ascii_folding) = ascii_folding {
|
||||
opts.tokenizer_configs = opts.tokenizer_configs.ascii_folding(ascii_folding);
|
||||
}
|
||||
Self {
|
||||
inner: Mutex::new(Some(LanceDbIndex::FTS(opts))),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user