feat!: upgrade lance to 0.19.1 (#1762)

BREAKING CHANGE: default tokenizer no longer does stemming or stop-word
removal. Users should explicitly turn that option on in the future.

- upgrade lance to 0.19.1
- update the FTS docs
- update the FTS API

Upstream change notes:
https://github.com/lancedb/lance/releases/tag/v0.19.1

---------

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
BubbleCal
2024-10-30 00:03:52 +08:00
committed by GitHub
parent b9802a0d23
commit 32fdcf97db
16 changed files with 459 additions and 166 deletions

View File

@@ -54,12 +54,14 @@ pub struct LabelListIndexBuilder {}
#[derive(Debug, Clone)]
pub struct FtsIndexBuilder {
pub(crate) with_position: bool,
pub tokenizer_configs: TokenizerConfig,
}
impl Default for FtsIndexBuilder {
fn default() -> Self {
Self {
with_position: true,
tokenizer_configs: TokenizerConfig::default(),
}
}
}
@@ -72,4 +74,5 @@ impl FtsIndexBuilder {
}
}
use lance_index::scalar::inverted::TokenizerConfig;
pub use lance_index::scalar::FullTextSearchQuery;

View File

@@ -1568,6 +1568,7 @@ impl NativeTable {
let mut dataset = self.dataset.get_mut().await?;
let fts_params = lance_index::scalar::InvertedIndexParams {
with_position: fts_opts.with_position,
tokenizer_config: fts_opts.tokenizer_configs,
};
dataset
.create_index(
@@ -2002,7 +2003,7 @@ impl TableInternal for NativeTable {
self.dataset
.get_mut()
.await?
.add_columns(transforms, read_columns)
.add_columns(transforms, read_columns, None)
.await?;
Ok(())
}