diff --git a/python/python/lancedb/remote/table.py b/python/python/lancedb/remote/table.py index 96cb719b..ca8788a1 100644 --- a/python/python/lancedb/remote/table.py +++ b/python/python/lancedb/remote/table.py @@ -115,6 +115,7 @@ class RemoteTable(Table): *, replace: bool = False, wait_timeout: timedelta = None, + name: Optional[str] = None, ): """Creates a scalar index Parameters @@ -139,7 +140,11 @@ class RemoteTable(Table): LOOP.run( self._table.create_index( - column, config=config, replace=replace, wait_timeout=wait_timeout + column, + config=config, + replace=replace, + wait_timeout=wait_timeout, + name=name, ) ) @@ -161,6 +166,7 @@ class RemoteTable(Table): ngram_min_length: int = 3, ngram_max_length: int = 3, prefix_only: bool = False, + name: Optional[str] = None, ): config = FTS( with_position=with_position, @@ -177,7 +183,11 @@ class RemoteTable(Table): ) LOOP.run( self._table.create_index( - column, config=config, replace=replace, wait_timeout=wait_timeout + column, + config=config, + replace=replace, + wait_timeout=wait_timeout, + name=name, ) ) diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 0f36d00f..b8290907 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -783,6 +783,7 @@ class Table(ABC): replace: bool = True, index_type: ScalarIndexType = "BTREE", wait_timeout: Optional[timedelta] = None, + name: Optional[str] = None, ): """Create a scalar index on a column. @@ -797,6 +798,8 @@ class Table(ABC): The type of index to create. wait_timeout: timedelta, optional The timeout to wait if indexing is asynchronous. + name: str, optional + The name of the index. If not provided, a default name will be generated. Examples -------- @@ -859,6 +862,7 @@ class Table(ABC): ngram_max_length: int = 3, prefix_only: bool = False, wait_timeout: Optional[timedelta] = None, + name: Optional[str] = None, ): """Create a full-text search index on the table. @@ -923,6 +927,8 @@ class Table(ABC): Whether to only index the prefix of the token for ngram tokenizer. wait_timeout: timedelta, optional The timeout to wait if indexing is asynchronous. + name: str, optional + The name of the index. If not provided, a default name will be generated. """ raise NotImplementedError @@ -2105,6 +2111,7 @@ class LanceTable(Table): *, replace: bool = True, index_type: ScalarIndexType = "BTREE", + name: Optional[str] = None, ): if index_type == "BTREE": config = BTree() @@ -2115,7 +2122,7 @@ class LanceTable(Table): else: raise ValueError(f"Unknown index type {index_type}") return LOOP.run( - self._table.create_index(column, replace=replace, config=config) + self._table.create_index(column, replace=replace, config=config, name=name) ) def create_fts_index( @@ -2139,6 +2146,7 @@ class LanceTable(Table): ngram_min_length: int = 3, ngram_max_length: int = 3, prefix_only: bool = False, + name: Optional[str] = None, ): if not use_tantivy: if not isinstance(field_names, str): @@ -2176,6 +2184,7 @@ class LanceTable(Table): field_names, replace=replace, config=config, + name=name, ) ) return diff --git a/python/python/tests/test_fts.py b/python/python/tests/test_fts.py index a6dc55ed..7f3bc1ee 100644 --- a/python/python/tests/test_fts.py +++ b/python/python/tests/test_fts.py @@ -157,7 +157,16 @@ def test_create_index_with_stemming(tmp_path, table): def test_create_inverted_index(table, use_tantivy, with_position): if use_tantivy and not with_position: pytest.skip("we don't support building a tantivy index without position") - table.create_fts_index("text", use_tantivy=use_tantivy, with_position=with_position) + table.create_fts_index( + "text", + use_tantivy=use_tantivy, + with_position=with_position, + name="custom_fts_index", + ) + if not use_tantivy: + indices = table.list_indices() + fts_indices = [i for i in indices if i.index_type == "FTS"] + assert any(i.name == "custom_fts_index" for i in fts_indices) def test_populate_index(tmp_path, table): diff --git a/python/python/tests/test_remote_db.py b/python/python/tests/test_remote_db.py index 5435e210..53321d12 100644 --- a/python/python/tests/test_remote_db.py +++ b/python/python/tests/test_remote_db.py @@ -271,12 +271,21 @@ def test_table_add_in_threadpool(): def test_table_create_indices(): + # Track received index creation requests to validate name parameter + received_requests = [] + def handler(request): index_stats = dict( index_type="IVF_PQ", num_indexed_rows=1000, num_unindexed_rows=0 ) if request.path == "/v1/table/test/create_index/": + # Capture the request body to validate name parameter + content_len = int(request.headers.get("Content-Length", 0)) + if content_len > 0: + body = request.rfile.read(content_len) + body_data = json.loads(body) + received_requests.append(body_data) request.send_response(200) request.end_headers() elif request.path == "/v1/table/test/create/?mode=create": @@ -307,34 +316,34 @@ def test_table_create_indices(): dict( indexes=[ { - "index_name": "id_idx", + "index_name": "custom_scalar_idx", "columns": ["id"], }, { - "index_name": "text_idx", + "index_name": "custom_fts_idx", "columns": ["text"], }, { - "index_name": "vector_idx", + "index_name": "custom_vector_idx", "columns": ["vector"], }, ] ) ) request.wfile.write(payload.encode()) - elif request.path == "/v1/table/test/index/id_idx/stats/": + elif request.path == "/v1/table/test/index/custom_scalar_idx/stats/": request.send_response(200) request.send_header("Content-Type", "application/json") request.end_headers() payload = json.dumps(index_stats) request.wfile.write(payload.encode()) - elif request.path == "/v1/table/test/index/text_idx/stats/": + elif request.path == "/v1/table/test/index/custom_fts_idx/stats/": request.send_response(200) request.send_header("Content-Type", "application/json") request.end_headers() payload = json.dumps(index_stats) request.wfile.write(payload.encode()) - elif request.path == "/v1/table/test/index/vector_idx/stats/": + elif request.path == "/v1/table/test/index/custom_vector_idx/stats/": request.send_response(200) request.send_header("Content-Type", "application/json") request.end_headers() @@ -351,16 +360,49 @@ def test_table_create_indices(): # Parameters are well-tested through local and async tests. # This is a smoke-test. table = db.create_table("test", [{"id": 1}]) - table.create_scalar_index("id", wait_timeout=timedelta(seconds=2)) - table.create_fts_index("text", wait_timeout=timedelta(seconds=2)) - table.create_index( - vector_column_name="vector", wait_timeout=timedelta(seconds=10) + + # Test create_scalar_index with custom name + table.create_scalar_index( + "id", wait_timeout=timedelta(seconds=2), name="custom_scalar_idx" ) - table.wait_for_index(["id_idx"], timedelta(seconds=2)) - table.wait_for_index(["text_idx", "vector_idx"], timedelta(seconds=2)) - table.drop_index("vector_idx") - table.drop_index("id_idx") - table.drop_index("text_idx") + + # Test create_fts_index with custom name + table.create_fts_index( + "text", wait_timeout=timedelta(seconds=2), name="custom_fts_idx" + ) + + # Test create_index with custom name + table.create_index( + vector_column_name="vector", + wait_timeout=timedelta(seconds=10), + name="custom_vector_idx", + ) + + # Validate that the name parameter was passed correctly in requests + assert len(received_requests) == 3 + + # Check scalar index request has custom name + scalar_req = received_requests[0] + assert "name" in scalar_req + assert scalar_req["name"] == "custom_scalar_idx" + + # Check FTS index request has custom name + fts_req = received_requests[1] + assert "name" in fts_req + assert fts_req["name"] == "custom_fts_idx" + + # Check vector index request has custom name + vector_req = received_requests[2] + assert "name" in vector_req + assert vector_req["name"] == "custom_vector_idx" + + table.wait_for_index(["custom_scalar_idx"], timedelta(seconds=2)) + table.wait_for_index( + ["custom_fts_idx", "custom_vector_idx"], timedelta(seconds=2) + ) + table.drop_index("custom_vector_idx") + table.drop_index("custom_scalar_idx") + table.drop_index("custom_fts_idx") def test_table_wait_for_index_timeout(): diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py index c634cf12..8730002f 100644 --- a/python/python/tests/test_table.py +++ b/python/python/tests/test_table.py @@ -1274,11 +1274,13 @@ def test_create_scalar_index(mem_db: DBConnection): "my_table", data=test_data, ) + # Test with default name table.create_scalar_index("x") indices = table.list_indices() assert len(indices) == 1 scalar_index = indices[0] assert scalar_index.index_type == "BTree" + assert scalar_index.name == "x_idx" # Default name # Confirm that prefiltering still works with the scalar index column results = table.search().where("x = 'c'").to_arrow() @@ -1292,6 +1294,14 @@ def test_create_scalar_index(mem_db: DBConnection): indices = table.list_indices() assert len(indices) == 0 + # Test with custom name + table.create_scalar_index("y", name="custom_y_index") + indices = table.list_indices() + assert len(indices) == 1 + scalar_index = indices[0] + assert scalar_index.index_type == "BTree" + assert scalar_index.name == "custom_y_index" + def test_empty_query(mem_db: DBConnection): table = mem_db.create_table(