feat: add new table API to wait for async indexing (#2338)

* Add new wait_for_index() table operation that polls until indices are
created/fully indexed
* Add an optional wait timeout parameter to all create_index operations
* Python and NodeJS interfaces

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

## Summary by CodeRabbit

- **New Features**
- Added optional waiting for index creation completion with configurable
timeout.
- Introduced methods to poll and wait for indices to be fully built
across sync and async tables.
  - Extended index creation APIs to accept a wait timeout parameter.
- **Bug Fixes**
- Added a new timeout error variant for improved error reporting on
index operations.
- **Tests**
- Added tests covering successful index readiness waiting, timeout
scenarios, and missing index cases.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Ryan Green
2025-04-21 08:41:21 -02:30
committed by GitHub
parent 4f07fea6df
commit 3ae90dde80
16 changed files with 582 additions and 33 deletions

View File

@@ -507,6 +507,15 @@ describe("When creating an index", () => {
expect(indices2.length).toBe(0);
});
it("should wait for index readiness", async () => {
// Create an index and then wait for it to be ready
await tbl.createIndex("vec");
const indices = await tbl.listIndices();
expect(indices.length).toBeGreaterThan(0);
const idxName = indices[0].name;
await expect(tbl.waitForIndex([idxName], 5)).resolves.toBeUndefined();
});
it("should search with distance range", async () => {
await tbl.createIndex("vec");
@@ -824,6 +833,7 @@ describe("When creating an index", () => {
// Only build index over v1
await tbl.createIndex("vec", {
config: Index.ivfPq({ numPartitions: 2, numSubVectors: 2 }),
waitTimeoutSeconds: 30,
});
const rst = await tbl

View File

@@ -681,4 +681,6 @@ export interface IndexOptions {
* The default is true
*/
replace?: boolean;
waitTimeoutSeconds?: number;
}

View File

@@ -246,6 +246,19 @@ export abstract class Table {
*/
abstract prewarmIndex(name: string): Promise<void>;
/**
* Waits for asynchronous indexing to complete on the table.
*
* @param indexNames The name of the indices to wait for
* @param timeoutSeconds The number of seconds to wait before timing out
*
* This will raise an error if the indices are not created and fully indexed within the timeout.
*/
abstract waitForIndex(
indexNames: string[],
timeoutSeconds: number,
): Promise<void>;
/**
* Create a {@link Query} Builder.
*
@@ -569,7 +582,12 @@ export class LocalTable extends Table {
// Bit of a hack to get around the fact that TS has no package-scope.
// biome-ignore lint/suspicious/noExplicitAny: skip
const nativeIndex = (options?.config as any)?.inner;
await this.inner.createIndex(nativeIndex, column, options?.replace);
await this.inner.createIndex(
nativeIndex,
column,
options?.replace,
options?.waitTimeoutSeconds,
);
}
async dropIndex(name: string): Promise<void> {
@@ -580,6 +598,13 @@ export class LocalTable extends Table {
await this.inner.prewarmIndex(name);
}
async waitForIndex(
indexNames: string[],
timeoutSeconds: number,
): Promise<void> {
await this.inner.waitForIndex(indexNames, timeoutSeconds);
}
query(): Query {
return new Query(this.inner);
}

View File

@@ -111,6 +111,7 @@ impl Table {
index: Option<&Index>,
column: String,
replace: Option<bool>,
wait_timeout_s: Option<i64>,
) -> napi::Result<()> {
let lancedb_index = if let Some(index) = index {
index.consume()?
@@ -121,6 +122,10 @@ impl Table {
if let Some(replace) = replace {
builder = builder.replace(replace);
}
if let Some(timeout) = wait_timeout_s {
builder =
builder.wait_timeout(std::time::Duration::from_secs(timeout.try_into().unwrap()));
}
builder.execute().await.default_error()
}
@@ -140,6 +145,18 @@ impl Table {
.default_error()
}
#[napi(catch_unwind)]
pub async fn wait_for_index(&self, index_names: Vec<String>, timeout_s: i64) -> Result<()> {
let timeout = std::time::Duration::from_secs(timeout_s.try_into().unwrap());
let index_names: Vec<&str> = index_names.iter().map(|s| s.as_str()).collect();
let slice: &[&str] = &index_names;
self.inner_ref()?
.wait_for_index(slice, timeout)
.await
.default_error()
}
#[napi(catch_unwind)]
pub async fn update(
&self,