feat: allow setting train=False and name on indices (#2586)

Enables two new parameters when building indices:

* `name`: Allows explicitly setting a name on the index. Default is
`{col_name}_idx`.
* `train` (default `True`): When set to `False`, an empty index will be
immediately created.

The upgrade of Lance means there are also additional behaviors from
cd76a993b8:

* When a scalar index is created on a Table, it will be kept around even
if all rows are deleted or updated.
* Scalar indices can be created on empty tables. They will default to
`train=False` if the table is empty.

---------

Co-authored-by: Weston Pace <weston.pace@gmail.com>
This commit is contained in:
Will Jones
2025-08-15 14:00:26 -07:00
committed by GitHub
parent 0c34ffb252
commit ad09234d59
14 changed files with 620 additions and 353 deletions

View File

@@ -857,6 +857,40 @@ describe("When creating an index", () => {
expect(stats).toBeUndefined();
});
test("should support name and train parameters", async () => {
// Test with custom name
await tbl.createIndex("vec", {
config: Index.ivfPq({ numPartitions: 4 }),
name: "my_custom_vector_index",
});
const indices = await tbl.listIndices();
expect(indices).toHaveLength(1);
expect(indices[0].name).toBe("my_custom_vector_index");
// Test scalar index with train=false
await tbl.createIndex("id", {
config: Index.btree(),
name: "btree_empty",
train: false,
});
const allIndices = await tbl.listIndices();
expect(allIndices).toHaveLength(2);
expect(allIndices.some((idx) => idx.name === "btree_empty")).toBe(true);
// Test with both name and train=true (use tags column)
await tbl.createIndex("tags", {
config: Index.labelList(),
name: "tags_trained",
train: true,
});
const finalIndices = await tbl.listIndices();
expect(finalIndices).toHaveLength(3);
expect(finalIndices.some((idx) => idx.name === "tags_trained")).toBe(true);
});
test("create ivf_flat with binary vectors", async () => {
const db = await connect(tmpDir.name);
const binarySchema = new Schema([

View File

@@ -700,5 +700,27 @@ export interface IndexOptions {
*/
replace?: boolean;
/**
* Timeout in seconds to wait for index creation to complete.
*
* If not specified, the method will return immediately after starting the index creation.
*/
waitTimeoutSeconds?: number;
/**
* Optional custom name for the index.
*
* If not provided, a default name will be generated based on the column name.
*/
name?: string;
/**
* Whether to train the index with existing data.
*
* If true (default), the index will be trained with existing data in the table.
* If false, the index will be created empty and populated as new data is added.
*
* Note: This option is only supported for scalar indices. Vector indices always train.
*/
train?: boolean;
}

View File

@@ -662,6 +662,8 @@ export class LocalTable extends Table {
column,
options?.replace,
options?.waitTimeoutSeconds,
options?.name,
options?.train,
);
}

View File

@@ -114,6 +114,8 @@ impl Table {
column: String,
replace: Option<bool>,
wait_timeout_s: Option<i64>,
name: Option<String>,
train: Option<bool>,
) -> napi::Result<()> {
let lancedb_index = if let Some(index) = index {
index.consume()?
@@ -128,6 +130,12 @@ impl Table {
builder =
builder.wait_timeout(std::time::Duration::from_secs(timeout.try_into().unwrap()));
}
if let Some(name) = name {
builder = builder.name(name);
}
if let Some(train) = train {
builder = builder.train(train);
}
builder.execute().await.default_error()
}