feat(python): unify sync create_index API to match async API (#2882)

## Summary

- Transitions `LanceTable` and `RemoteTable` to use the unified
`create_index()` API matching `AsyncTable`
- Deprecates `create_scalar_index()` and `create_fts_index()` with
deprecation warnings
- Adds detection logic to distinguish legacy vs new API calls
- Adds `@overload` decorators for type checker compatibility
- Adds `accelerator` parameter to IVF config classes for GPU support

**New API:**
```python
table.create_index("vec", config=IvfPq(distance_type="l2"))
table.create_index("col", config=BTree())
table.create_index("text_col", config=FTS(with_position=True))
```

**Legacy API (deprecated):**
```python
table.create_index("l2", vector_column_name="vec")  # emits DeprecationWarning
table.create_scalar_index("col", index_type="BTREE")  # deprecated
table.create_fts_index("text_col")  # deprecated
```

Fixes #2879

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Will Jones
2026-05-28 16:41:47 -07:00
committed by GitHub
parent ab982d7f65
commit d05fe8ec44
6 changed files with 780 additions and 166 deletions

View File

@@ -215,11 +215,12 @@ def test_reject_legacy_tantivy_index(table):
@pytest.mark.parametrize("with_position", [True, False])
def test_create_inverted_index(table, with_position):
table.create_fts_index(
"text",
with_position=with_position,
name="custom_fts_index",
)
with pytest.warns(DeprecationWarning, match="create_fts_index"):
table.create_fts_index(
"text",
with_position=with_position,
name="custom_fts_index",
)
indices = table.list_indices()
fts_indices = [i for i in indices if i.index_type == "FTS"]
assert any(i.name == "custom_fts_index" for i in fts_indices)

View File

@@ -436,22 +436,25 @@ def test_table_create_indices():
# This is a smoke-test.
table = db.create_table("test", [{"id": 1}])
# Test create_scalar_index with custom name
table.create_scalar_index(
"id", wait_timeout=timedelta(seconds=2), name="custom_scalar_idx"
)
# Test create_scalar_index with custom name (legacy method)
with pytest.warns(DeprecationWarning, match="create_scalar_index"):
table.create_scalar_index(
"id", wait_timeout=timedelta(seconds=2), name="custom_scalar_idx"
)
# Test create_fts_index with custom name
table.create_fts_index(
"text", wait_timeout=timedelta(seconds=2), name="custom_fts_idx"
)
# Test create_fts_index with custom name (legacy method)
with pytest.warns(DeprecationWarning, match="create_fts_index"):
table.create_fts_index(
"text", wait_timeout=timedelta(seconds=2), name="custom_fts_idx"
)
# Test create_index with custom name
table.create_index(
vector_column_name="vector",
wait_timeout=timedelta(seconds=10),
name="custom_vector_idx",
)
# Test create_index with custom name (legacy form: vector_column_name kwarg)
with pytest.warns(DeprecationWarning, match="create_index"):
table.create_index(
vector_column_name="vector",
wait_timeout=timedelta(seconds=10),
name="custom_vector_idx",
)
# Validate that the name parameter was passed correctly in requests
assert len(received_requests) == 3
@@ -480,6 +483,68 @@ def test_table_create_indices():
table.drop_index("custom_fts_idx")
def test_remote_create_index_new_api():
received_requests = []
def handler(request):
if request.path == "/v1/table/test/create_index/":
content_len = int(request.headers.get("Content-Length", 0))
body = request.rfile.read(content_len) if content_len > 0 else b""
received_requests.append(json.loads(body) if body else {})
request.send_response(200)
request.end_headers()
elif request.path == "/v1/table/test/create/?mode=create":
request.send_response(200)
request.send_header("Content-Type", "application/json")
request.end_headers()
request.wfile.write(b"{}")
elif request.path == "/v1/table/test/describe/":
request.send_response(200)
request.send_header("Content-Type", "application/json")
request.end_headers()
request.wfile.write(
json.dumps(
dict(
version=1,
schema=dict(
fields=[
dict(name="id", type={"type": "int64"}, nullable=False)
]
),
)
).encode()
)
else:
request.send_response(404)
request.end_headers()
from lancedb.index import BTree, FTS, IvfPq, IvfRq
with mock_lancedb_connection(handler) as db:
table = db.create_table("test", [{"id": 1}])
# New API: column-first, config= kwarg. Should NOT emit DeprecationWarning.
import warnings as _warnings
with _warnings.catch_warnings():
_warnings.simplefilter("error", DeprecationWarning)
table.create_index("vector", config=IvfPq(distance_type="l2"))
table.create_index("category", config=BTree())
table.create_index("text", config=FTS())
# IvfRq via new API
table.create_index("vector", config=IvfRq(distance_type="l2"))
# Legacy index_type="IVF_RQ" routes to IvfRq config under the hood.
with pytest.warns(DeprecationWarning, match="create_index"):
table.create_index(
vector_column_name="vector",
index_type="IVF_RQ",
num_partitions=8,
)
assert len(received_requests) == 5
def test_table_wait_for_index_timeout():
def handler(request):
index_stats = dict(

View File

@@ -4,6 +4,7 @@
import os
import sys
import warnings
from datetime import date, datetime, timedelta
from time import sleep
from typing import List
@@ -11,7 +12,7 @@ from unittest.mock import patch
import lancedb
from lancedb.dependencies import _PANDAS_AVAILABLE
from lancedb.index import HnswFlat, HnswPq, HnswSq, IvfPq
from lancedb.index import BTree, FTS, HnswFlat, HnswPq, HnswSq, IvfPq
import numpy as np
import polars as pl
import pyarrow as pa
@@ -928,7 +929,12 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
num_bits=4,
)
mock_create_index.assert_called_with(
"vector", replace=True, config=expected_config, name=None, train=True
"vector",
replace=True,
config=expected_config,
wait_timeout=None,
name=None,
train=True,
)
# Test with target_partition_size
@@ -948,7 +954,12 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
target_partition_size=8192,
)
mock_create_index.assert_called_with(
"vector", replace=True, config=expected_config, name=None, train=True
"vector",
replace=True,
config=expected_config,
wait_timeout=None,
name=None,
train=True,
)
# target_partition_size has a default value,
@@ -967,7 +978,12 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
num_bits=4,
)
mock_create_index.assert_called_with(
"vector", replace=True, config=expected_config, name=None, train=True
"vector",
replace=True,
config=expected_config,
wait_timeout=None,
name=None,
train=True,
)
table.create_index(
@@ -978,7 +994,12 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
)
expected_config = HnswPq(distance_type="dot")
mock_create_index.assert_called_with(
"my_vector", replace=False, config=expected_config, name=None, train=True
"my_vector",
replace=False,
config=expected_config,
wait_timeout=None,
name=None,
train=True,
)
table.create_index(
@@ -993,7 +1014,12 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
distance_type="cosine", sample_rate=0.1, m=29, ef_construction=10
)
mock_create_index.assert_called_with(
"my_vector", replace=True, config=expected_config, name=None, train=True
"my_vector",
replace=True,
config=expected_config,
wait_timeout=None,
name=None,
train=True,
)
table.create_index(
@@ -1008,7 +1034,12 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
distance_type="cosine", sample_rate=0.1, m=29, ef_construction=10
)
mock_create_index.assert_called_with(
"my_vector", replace=True, config=expected_config, name=None, train=True
"my_vector",
replace=True,
config=expected_config,
wait_timeout=None,
name=None,
train=True,
)
@@ -1032,6 +1063,7 @@ def test_create_index_name_and_train_parameters(
"vector",
replace=True,
config=expected_config,
wait_timeout=None,
name="my_custom_index",
train=True,
)
@@ -1039,13 +1071,82 @@ def test_create_index_name_and_train_parameters(
# Test with train=False
table.create_index(vector_column_name="vector", train=False)
mock_create_index.assert_called_with(
"vector", replace=True, config=expected_config, name=None, train=False
"vector",
replace=True,
config=expected_config,
wait_timeout=None,
name=None,
train=False,
)
# Test with both name and train
table.create_index(vector_column_name="vector", name="my_index_name", train=True)
mock_create_index.assert_called_with(
"vector", replace=True, config=expected_config, name="my_index_name", train=True
"vector",
replace=True,
config=expected_config,
wait_timeout=None,
name="my_index_name",
train=True,
)
@patch("lancedb.table.AsyncTable.create_index")
def test_create_index_legacy_emits_deprecation_warning(
mock_create_index, mem_db: DBConnection
):
table = mem_db.create_table(
"test",
data=[{"vector": [3.1, 4.1]}, {"vector": [5.9, 26.5]}],
)
with pytest.warns(DeprecationWarning, match="create_index"):
table.create_index(metric="l2", num_partitions=8, vector_column_name="vector")
@patch("lancedb.table.AsyncTable.create_index")
def test_create_index_new_api(mock_create_index, mem_db: DBConnection):
table = mem_db.create_table(
"test",
data=[
{"vector": [3.1, 4.1], "category": "a", "text": "hello world"},
{"vector": [5.9, 26.5], "category": "b", "text": "goodbye"},
],
)
# Vector index via new API should not warn
with warnings.catch_warnings():
warnings.simplefilter("error", DeprecationWarning)
table.create_index("vector", config=IvfPq(distance_type="l2"))
mock_create_index.assert_called_with(
"vector",
replace=True,
config=IvfPq(distance_type="l2"),
wait_timeout=None,
name=None,
train=True,
)
# Scalar index via new API
table.create_index("category", config=BTree())
mock_create_index.assert_called_with(
"category",
replace=True,
config=BTree(),
wait_timeout=None,
name=None,
train=True,
)
# FTS index via new API
table.create_index("text", config=FTS(with_position=True))
mock_create_index.assert_called_with(
"text",
replace=True,
config=FTS(with_position=True),
wait_timeout=None,
name=None,
train=True,
)
@@ -1861,8 +1962,9 @@ def test_create_scalar_index(mem_db: DBConnection):
"my_table",
data=test_data,
)
# Test with default name
table.create_scalar_index("x")
# Test with default name; confirm DeprecationWarning fires
with pytest.warns(DeprecationWarning, match="create_scalar_index"):
table.create_scalar_index("x")
indices = table.list_indices()
assert len(indices) == 1
scalar_index = indices[0]