From 285071e5c8ed6aa428c644c26f27ab5e880706eb Mon Sep 17 00:00:00 2001 From: QianZhu Date: Wed, 20 Nov 2024 21:07:30 -0800 Subject: [PATCH] docs: full-text search doc update (#1861) Co-authored-by: BubbleCal --- docs/src/fts.md | 44 ++++++++++++++++++++++++++++++++++++++--- docs/src/fts_tantivy.md | 4 +--- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/docs/src/fts.md b/docs/src/fts.md index aba637eb..8bf9ca02 100644 --- a/docs/src/fts.md +++ b/docs/src/fts.md @@ -114,12 +114,45 @@ table.create_fts_index("text", LanceDB full text search supports to filter the search results by a condition, both pre-filtering and post-filtering are supported. -This can be invoked via the familiar `where` syntax: - +This can be invoked via the familiar `where` syntax. + +With pre-filtering: === "Python" ```python - table.search("puppy").limit(10).where("meta='foo'").to_list() + table.search("puppy").limit(10).where("meta='foo'", prefilte=True).to_list() + ``` + +=== "TypeScript" + + ```typescript + await tbl + .search("puppy") + .select(["id", "doc"]) + .limit(10) + .where("meta='foo'") + .prefilter(true) + .toArray(); + ``` + +=== "Rust" + + ```rust + table + .query() + .full_text_search(FullTextSearchQuery::new("puppy".to_owned())) + .select(lancedb::query::Select::Columns(vec!["doc".to_owned()])) + .limit(10) + .only_if("meta='foo'") + .execute() + .await?; + ``` + +With post-filtering: +=== "Python" + + ```python + table.search("puppy").limit(10).where("meta='foo'", prefilte=False).to_list() ``` === "TypeScript" @@ -130,6 +163,7 @@ This can be invoked via the familiar `where` syntax: .select(["id", "doc"]) .limit(10) .where("meta='foo'") + .prefilter(false) .toArray(); ``` @@ -140,6 +174,7 @@ This can be invoked via the familiar `where` syntax: .query() .full_text_search(FullTextSearchQuery::new(words[0].to_owned())) .select(lancedb::query::Select::Columns(vec!["doc".to_owned()])) + .postfilter() .limit(10) .only_if("meta='foo'") .execute() @@ -189,3 +224,6 @@ This can make the query more efficient, especially when the table is large and t tbl.add(more_data).execute().await?; tbl.optimize(OptimizeAction::All).execute().await?; ``` +!!! note + + New data added after creating the FTS index will appear in search results while incremental index is still progress, but with increased latency due to a flat search on the unindexed portion. LanceDB Cloud automates this merging process, minimizing the impact on search speed. \ No newline at end of file diff --git a/docs/src/fts_tantivy.md b/docs/src/fts_tantivy.md index 674f3c15..b41559dd 100644 --- a/docs/src/fts_tantivy.md +++ b/docs/src/fts_tantivy.md @@ -153,9 +153,7 @@ table.create_fts_index(["title", "content"], use_tantivy=True, writer_heap_size= ## Current limitations -1. Currently we do not yet support incremental writes. - If you add data after FTS index creation, it won't be reflected - in search results until you do a full reindex. +1. New data added after creating the FTS index will appear in search results, but with increased latency due to a flat search on the unindexed portion. Re-indexing with `create_fts_index` will reduce latency. LanceDB Cloud automates this merging process, minimizing the impact on search speed. 2. We currently only support local filesystem paths for the FTS index. This is a tantivy limitation. We've implemented an object store plugin