mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-20 21:40:43 +00:00
feat(node): support FTS options in nodejs (#1934)
Closes #1790 --------- Signed-off-by: BubbleCal <bubble-cal@outlook.com>
This commit is contained in:
@@ -349,6 +349,52 @@ export interface FtsOptions {
|
||||
* which will make the index smaller and faster to build, but will not support phrase queries.
|
||||
*/
|
||||
withPosition?: boolean;
|
||||
|
||||
/**
|
||||
* The tokenizer to use when building the index.
|
||||
* The default is "simple".
|
||||
*
|
||||
* The following tokenizers are available:
|
||||
*
|
||||
* "simple" - Simple tokenizer. This tokenizer splits the text into tokens using whitespace and punctuation as a delimiter.
|
||||
*
|
||||
* "whitespace" - Whitespace tokenizer. This tokenizer splits the text into tokens using whitespace as a delimiter.
|
||||
*
|
||||
* "raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token.
|
||||
*/
|
||||
baseTokenizer?: "simple" | "whitespace" | "raw";
|
||||
|
||||
/**
|
||||
* language for stemming and stop words
|
||||
* this is only used when `stem` or `remove_stop_words` is true
|
||||
*/
|
||||
language?: string;
|
||||
|
||||
/**
|
||||
* maximum token length
|
||||
* tokens longer than this length will be ignored
|
||||
*/
|
||||
maxTokenLength?: number;
|
||||
|
||||
/**
|
||||
* whether to lowercase tokens
|
||||
*/
|
||||
lowercase?: boolean;
|
||||
|
||||
/**
|
||||
* whether to stem tokens
|
||||
*/
|
||||
stem?: boolean;
|
||||
|
||||
/**
|
||||
* whether to remove stop words
|
||||
*/
|
||||
removeStopWords?: boolean;
|
||||
|
||||
/**
|
||||
* whether to remove punctuation
|
||||
*/
|
||||
asciiFolding?: boolean;
|
||||
}
|
||||
|
||||
export class Index {
|
||||
@@ -450,7 +496,18 @@ export class Index {
|
||||
* For now, the full text search index only supports English, and doesn't support phrase search.
|
||||
*/
|
||||
static fts(options?: Partial<FtsOptions>) {
|
||||
return new Index(LanceDbIndex.fts(options?.withPosition));
|
||||
return new Index(
|
||||
LanceDbIndex.fts(
|
||||
options?.withPosition,
|
||||
options?.baseTokenizer,
|
||||
options?.language,
|
||||
options?.maxTokenLength,
|
||||
options?.lowercase,
|
||||
options?.stem,
|
||||
options?.removeStopWords,
|
||||
options?.asciiFolding,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user