feat: add name parameter to remaining Python create index calls (#2617)

## Summary
This PR adds the missing `name` parameter to `create_scalar_index` and
`create_fts_index` methods in the Python SDK, which was inadvertently
omitted when it was added to `create_index` in PR #2586.

## Changes
- Add `name: Optional[str] = None` parameter to abstract
`Table.create_scalar_index` and `Table.create_fts_index` methods
- Update `LanceTable` implementation to accept and pass the `name`
parameter to the underlying Rust layer
- Update `RemoteTable` implementation to accept and pass the `name`
parameter
- Enhanced tests to verify custom index names work correctly for both
scalar and FTS indices
- When `name` is not provided, default names are generated (e.g.,
`{column}_idx`)

## Test plan
- [x] Added test cases for custom names in scalar index creation
- [x] Added test cases for custom names in FTS index creation  
- [x] Verified existing tests continue to pass
- [x] Code formatting and linting checks pass

This ensures API consistency across all index creation methods in the
LanceDB Python SDK.

Fixes #2616

🤖 Generated with [Claude Code](https://claude.ai/code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Will Jones
2025-08-27 14:02:48 -07:00
committed by GitHub
parent faf8973624
commit f6846004ca
5 changed files with 99 additions and 19 deletions

View File

@@ -115,6 +115,7 @@ class RemoteTable(Table):
*,
replace: bool = False,
wait_timeout: timedelta = None,
name: Optional[str] = None,
):
"""Creates a scalar index
Parameters
@@ -139,7 +140,11 @@ class RemoteTable(Table):
LOOP.run(
self._table.create_index(
column, config=config, replace=replace, wait_timeout=wait_timeout
column,
config=config,
replace=replace,
wait_timeout=wait_timeout,
name=name,
)
)
@@ -161,6 +166,7 @@ class RemoteTable(Table):
ngram_min_length: int = 3,
ngram_max_length: int = 3,
prefix_only: bool = False,
name: Optional[str] = None,
):
config = FTS(
with_position=with_position,
@@ -177,7 +183,11 @@ class RemoteTable(Table):
)
LOOP.run(
self._table.create_index(
column, config=config, replace=replace, wait_timeout=wait_timeout
column,
config=config,
replace=replace,
wait_timeout=wait_timeout,
name=name,
)
)

View File

@@ -783,6 +783,7 @@ class Table(ABC):
replace: bool = True,
index_type: ScalarIndexType = "BTREE",
wait_timeout: Optional[timedelta] = None,
name: Optional[str] = None,
):
"""Create a scalar index on a column.
@@ -797,6 +798,8 @@ class Table(ABC):
The type of index to create.
wait_timeout: timedelta, optional
The timeout to wait if indexing is asynchronous.
name: str, optional
The name of the index. If not provided, a default name will be generated.
Examples
--------
@@ -859,6 +862,7 @@ class Table(ABC):
ngram_max_length: int = 3,
prefix_only: bool = False,
wait_timeout: Optional[timedelta] = None,
name: Optional[str] = None,
):
"""Create a full-text search index on the table.
@@ -923,6 +927,8 @@ class Table(ABC):
Whether to only index the prefix of the token for ngram tokenizer.
wait_timeout: timedelta, optional
The timeout to wait if indexing is asynchronous.
name: str, optional
The name of the index. If not provided, a default name will be generated.
"""
raise NotImplementedError
@@ -2105,6 +2111,7 @@ class LanceTable(Table):
*,
replace: bool = True,
index_type: ScalarIndexType = "BTREE",
name: Optional[str] = None,
):
if index_type == "BTREE":
config = BTree()
@@ -2115,7 +2122,7 @@ class LanceTable(Table):
else:
raise ValueError(f"Unknown index type {index_type}")
return LOOP.run(
self._table.create_index(column, replace=replace, config=config)
self._table.create_index(column, replace=replace, config=config, name=name)
)
def create_fts_index(
@@ -2139,6 +2146,7 @@ class LanceTable(Table):
ngram_min_length: int = 3,
ngram_max_length: int = 3,
prefix_only: bool = False,
name: Optional[str] = None,
):
if not use_tantivy:
if not isinstance(field_names, str):
@@ -2176,6 +2184,7 @@ class LanceTable(Table):
field_names,
replace=replace,
config=config,
name=name,
)
)
return

View File

@@ -157,7 +157,16 @@ def test_create_index_with_stemming(tmp_path, table):
def test_create_inverted_index(table, use_tantivy, with_position):
if use_tantivy and not with_position:
pytest.skip("we don't support building a tantivy index without position")
table.create_fts_index("text", use_tantivy=use_tantivy, with_position=with_position)
table.create_fts_index(
"text",
use_tantivy=use_tantivy,
with_position=with_position,
name="custom_fts_index",
)
if not use_tantivy:
indices = table.list_indices()
fts_indices = [i for i in indices if i.index_type == "FTS"]
assert any(i.name == "custom_fts_index" for i in fts_indices)
def test_populate_index(tmp_path, table):

View File

@@ -271,12 +271,21 @@ def test_table_add_in_threadpool():
def test_table_create_indices():
# Track received index creation requests to validate name parameter
received_requests = []
def handler(request):
index_stats = dict(
index_type="IVF_PQ", num_indexed_rows=1000, num_unindexed_rows=0
)
if request.path == "/v1/table/test/create_index/":
# Capture the request body to validate name parameter
content_len = int(request.headers.get("Content-Length", 0))
if content_len > 0:
body = request.rfile.read(content_len)
body_data = json.loads(body)
received_requests.append(body_data)
request.send_response(200)
request.end_headers()
elif request.path == "/v1/table/test/create/?mode=create":
@@ -307,34 +316,34 @@ def test_table_create_indices():
dict(
indexes=[
{
"index_name": "id_idx",
"index_name": "custom_scalar_idx",
"columns": ["id"],
},
{
"index_name": "text_idx",
"index_name": "custom_fts_idx",
"columns": ["text"],
},
{
"index_name": "vector_idx",
"index_name": "custom_vector_idx",
"columns": ["vector"],
},
]
)
)
request.wfile.write(payload.encode())
elif request.path == "/v1/table/test/index/id_idx/stats/":
elif request.path == "/v1/table/test/index/custom_scalar_idx/stats/":
request.send_response(200)
request.send_header("Content-Type", "application/json")
request.end_headers()
payload = json.dumps(index_stats)
request.wfile.write(payload.encode())
elif request.path == "/v1/table/test/index/text_idx/stats/":
elif request.path == "/v1/table/test/index/custom_fts_idx/stats/":
request.send_response(200)
request.send_header("Content-Type", "application/json")
request.end_headers()
payload = json.dumps(index_stats)
request.wfile.write(payload.encode())
elif request.path == "/v1/table/test/index/vector_idx/stats/":
elif request.path == "/v1/table/test/index/custom_vector_idx/stats/":
request.send_response(200)
request.send_header("Content-Type", "application/json")
request.end_headers()
@@ -351,16 +360,49 @@ def test_table_create_indices():
# Parameters are well-tested through local and async tests.
# This is a smoke-test.
table = db.create_table("test", [{"id": 1}])
table.create_scalar_index("id", wait_timeout=timedelta(seconds=2))
table.create_fts_index("text", wait_timeout=timedelta(seconds=2))
table.create_index(
vector_column_name="vector", wait_timeout=timedelta(seconds=10)
# Test create_scalar_index with custom name
table.create_scalar_index(
"id", wait_timeout=timedelta(seconds=2), name="custom_scalar_idx"
)
table.wait_for_index(["id_idx"], timedelta(seconds=2))
table.wait_for_index(["text_idx", "vector_idx"], timedelta(seconds=2))
table.drop_index("vector_idx")
table.drop_index("id_idx")
table.drop_index("text_idx")
# Test create_fts_index with custom name
table.create_fts_index(
"text", wait_timeout=timedelta(seconds=2), name="custom_fts_idx"
)
# Test create_index with custom name
table.create_index(
vector_column_name="vector",
wait_timeout=timedelta(seconds=10),
name="custom_vector_idx",
)
# Validate that the name parameter was passed correctly in requests
assert len(received_requests) == 3
# Check scalar index request has custom name
scalar_req = received_requests[0]
assert "name" in scalar_req
assert scalar_req["name"] == "custom_scalar_idx"
# Check FTS index request has custom name
fts_req = received_requests[1]
assert "name" in fts_req
assert fts_req["name"] == "custom_fts_idx"
# Check vector index request has custom name
vector_req = received_requests[2]
assert "name" in vector_req
assert vector_req["name"] == "custom_vector_idx"
table.wait_for_index(["custom_scalar_idx"], timedelta(seconds=2))
table.wait_for_index(
["custom_fts_idx", "custom_vector_idx"], timedelta(seconds=2)
)
table.drop_index("custom_vector_idx")
table.drop_index("custom_scalar_idx")
table.drop_index("custom_fts_idx")
def test_table_wait_for_index_timeout():

View File

@@ -1274,11 +1274,13 @@ def test_create_scalar_index(mem_db: DBConnection):
"my_table",
data=test_data,
)
# Test with default name
table.create_scalar_index("x")
indices = table.list_indices()
assert len(indices) == 1
scalar_index = indices[0]
assert scalar_index.index_type == "BTree"
assert scalar_index.name == "x_idx" # Default name
# Confirm that prefiltering still works with the scalar index column
results = table.search().where("x = 'c'").to_arrow()
@@ -1292,6 +1294,14 @@ def test_create_scalar_index(mem_db: DBConnection):
indices = table.list_indices()
assert len(indices) == 0
# Test with custom name
table.create_scalar_index("y", name="custom_y_index")
indices = table.list_indices()
assert len(indices) == 1
scalar_index = indices[0]
assert scalar_index.index_type == "BTree"
assert scalar_index.name == "custom_y_index"
def test_empty_query(mem_db: DBConnection):
table = mem_db.create_table(