From b23d8abcdd928e66bf61b49195bad9652c81e370 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Mon, 18 Nov 2024 20:21:28 +0800 Subject: [PATCH] docs: introduce incremental indexing for FTS (#1789) don't merge it before https://github.com/lancedb/lancedb/pull/1769 merged --------- Signed-off-by: BubbleCal --- Cargo.toml | 2 +- docs/package-lock.json | 21 ++++++++++++--------- docs/src/fts.md | 29 +++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3ee7c0d2..abf6be41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ repository = "https://github.com/lancedb/lancedb" description = "Serverless, low-latency vector database for AI applications" keywords = ["lancedb", "lance", "database", "vector", "search"] categories = ["database-implementations"] -rust-version = "1.80.0" # TODO: lower this once we upgrade Lance again. +rust-version = "1.80.0" # TODO: lower this once we upgrade Lance again. [workspace.dependencies] lance = { "version" = "=0.19.3", "features" = [ diff --git a/docs/package-lock.json b/docs/package-lock.json index ead4a547..1baad851 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -19,7 +19,7 @@ }, "../node": { "name": "vectordb", - "version": "0.4.6", + "version": "0.12.0", "cpu": [ "x64", "arm64" @@ -31,9 +31,7 @@ "win32" ], "dependencies": { - "@apache-arrow/ts": "^14.0.2", "@neon-rs/load": "^0.0.74", - "apache-arrow": "^14.0.2", "axios": "^1.4.0" }, "devDependencies": { @@ -46,6 +44,7 @@ "@types/temp": "^0.9.1", "@types/uuid": "^9.0.3", "@typescript-eslint/eslint-plugin": "^5.59.1", + "apache-arrow-old": "npm:apache-arrow@13.0.0", "cargo-cp-artifact": "^0.1", "chai": "^4.3.7", "chai-as-promised": "^7.1.1", @@ -62,15 +61,19 @@ "ts-node-dev": "^2.0.0", "typedoc": "^0.24.7", "typedoc-plugin-markdown": "^3.15.3", - "typescript": "*", + "typescript": "^5.1.0", "uuid": "^9.0.0" }, "optionalDependencies": { - "@lancedb/vectordb-darwin-arm64": "0.4.6", - "@lancedb/vectordb-darwin-x64": "0.4.6", - "@lancedb/vectordb-linux-arm64-gnu": "0.4.6", - "@lancedb/vectordb-linux-x64-gnu": "0.4.6", - "@lancedb/vectordb-win32-x64-msvc": "0.4.6" + "@lancedb/vectordb-darwin-arm64": "0.12.0", + "@lancedb/vectordb-darwin-x64": "0.12.0", + "@lancedb/vectordb-linux-arm64-gnu": "0.12.0", + "@lancedb/vectordb-linux-x64-gnu": "0.12.0", + "@lancedb/vectordb-win32-x64-msvc": "0.12.0" + }, + "peerDependencies": { + "@apache-arrow/ts": "^14.0.2", + "apache-arrow": "^14.0.2" } }, "../node/node_modules/apache-arrow": { diff --git a/docs/src/fts.md b/docs/src/fts.md index 57838a9e..aba637eb 100644 --- a/docs/src/fts.md +++ b/docs/src/fts.md @@ -160,3 +160,32 @@ To search for a phrase, the index must be created with `with_position=True`: table.create_fts_index("text", use_tantivy=False, with_position=True) ``` This will allow you to search for phrases, but it will also significantly increase the index size and indexing time. + + +## Incremental indexing + +LanceDB supports incremental indexing, which means you can add new records to the table without reindexing the entire table. + +This can make the query more efficient, especially when the table is large and the new records are relatively small. + +=== "Python" + + ```python + table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}]) + table.optimize() + ``` + +=== "TypeScript" + + ```typescript + await tbl.add([{ vector: [3.1, 4.1], text: "Frodo was a happy puppy" }]); + await tbl.optimize(); + ``` + +=== "Rust" + + ```rust + let more_data: Box = create_some_records()?; + tbl.add(more_data).execute().await?; + tbl.optimize(OptimizeAction::All).execute().await?; + ```