feat: add prewarm_index function (#2342)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Added the ability to prewarm (load into memory) table indexes via new
methods in Python, Node.js, and Rust APIs, potentially reducing
cold-start query latency.
- **Bug Fixes**
- Ensured prewarming an index does not interfere with subsequent search
operations.
- **Tests**
- Introduced new test cases to verify full-text search index creation,
prewarming, and search functionalities in both Python and Node.js.
- **Chores**
  - Updated dependencies for improved compatibility and performance.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Lu Qiu <luqiujob@gmail.com>
This commit is contained in:
Weston Pace
2025-04-17 17:14:36 -05:00
committed by GitHub
parent ef3a2b5357
commit 26080ee4c1
14 changed files with 215 additions and 54 deletions

View File

@@ -1312,6 +1312,28 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
expect(results2[0].text).toBe(data[1].text);
});
test("prewarm full text search index", async () => {
const db = await connect(tmpDir.name);
const data = [
{ text: ["lance database", "the", "search"], vector: [0.1, 0.2, 0.3] },
{ text: ["lance database"], vector: [0.4, 0.5, 0.6] },
{ text: ["lance", "search"], vector: [0.7, 0.8, 0.9] },
{ text: ["database", "search"], vector: [1.0, 1.1, 1.2] },
{ text: ["unrelated", "doc"], vector: [1.3, 1.4, 1.5] },
];
const table = await db.createTable("test", data);
await table.createIndex("text", {
config: Index.fts(),
});
// For the moment, we just confirm we can call prewarmIndex without error
// and still search it afterwards
await table.prewarmIndex("text_idx");
const results = await table.search("lance").toArray();
expect(results.length).toBe(3);
});
test("full text index on list", async () => {
const db = await connect(tmpDir.name);
const data = [

View File

@@ -235,6 +235,17 @@ export abstract class Table {
*/
abstract dropIndex(name: string): Promise<void>;
/**
* Prewarm an index in the table.
*
* @param name The name of the index.
*
* This will load the index into memory. This may reduce the cold-start time for
* future queries. If the index does not fit in the cache then this call may be
* wasteful.
*/
abstract prewarmIndex(name: string): Promise<void>;
/**
* Create a {@link Query} Builder.
*
@@ -565,6 +576,10 @@ export class LocalTable extends Table {
await this.inner.dropIndex(name);
}
async prewarmIndex(name: string): Promise<void> {
await this.inner.prewarmIndex(name);
}
query(): Query {
return new Query(this.inner);
}

View File

@@ -327,6 +327,7 @@ impl JsFullTextQuery {
}
#[napi(factory)]
#[allow(clippy::use_self)] // NAPI doesn't allow Self here but clippy reports it
pub fn boost_query(
positive: &JsFullTextQuery,
negative: &JsFullTextQuery,
@@ -349,11 +350,8 @@ impl JsFullTextQuery {
boosts: Option<Vec<f64>>,
) -> napi::Result<Self> {
let q = match boosts {
Some(boosts) => MultiMatchQuery::try_new_with_boosts(
query,
columns,
boosts.into_iter().map(|v| v as f32).collect(),
),
Some(boosts) => MultiMatchQuery::try_new(query, columns)
.and_then(|q| q.try_with_boosts(boosts.into_iter().map(|v| v as f32).collect())),
None => MultiMatchQuery::try_new(query, columns),
}
.map_err(|e| {

View File

@@ -132,6 +132,14 @@ impl Table {
.default_error()
}
#[napi(catch_unwind)]
pub async fn prewarm_index(&self, index_name: String) -> napi::Result<()> {
self.inner_ref()?
.prewarm_index(&index_name)
.await
.default_error()
}
#[napi(catch_unwind)]
pub async fn update(
&self,