feat: support to specify num_partitions and num_bits (#2488)

This commit is contained in:
BubbleCal
2025-07-09 11:36:09 +08:00
committed by GitHub
parent b64252d4fd
commit cab36d94b2
2 changed files with 126 additions and 52 deletions

View File

@@ -18,7 +18,7 @@ from lancedb._lancedb import (
UpdateResult,
)
from lancedb.embeddings.base import EmbeddingFunctionConfig
from lancedb.index import FTS, BTree, Bitmap, HnswPq, HnswSq, IvfFlat, IvfPq, LabelList
from lancedb.index import FTS, BTree, Bitmap, HnswSq, IvfFlat, IvfPq, LabelList
from lancedb.remote.db import LOOP
import pyarrow as pa
@@ -186,6 +186,8 @@ class RemoteTable(Table):
accelerator: Optional[str] = None,
index_type="vector",
wait_timeout: Optional[timedelta] = None,
*,
num_bits: int = 8,
):
"""Create an index on the table.
Currently, the only parameters that matter are
@@ -220,11 +222,6 @@ class RemoteTable(Table):
>>> table.create_index("l2", "vector") # doctest: +SKIP
"""
if num_partitions is not None:
logging.warning(
"num_partitions is not supported on LanceDB cloud."
"This parameter will be tuned automatically."
)
if num_sub_vectors is not None:
logging.warning(
"num_sub_vectors is not supported on LanceDB cloud."
@@ -244,13 +241,21 @@ class RemoteTable(Table):
index_type = index_type.upper()
if index_type == "VECTOR" or index_type == "IVF_PQ":
config = IvfPq(distance_type=metric)
config = IvfPq(
distance_type=metric,
num_partitions=num_partitions,
num_sub_vectors=num_sub_vectors,
num_bits=num_bits,
)
elif index_type == "IVF_HNSW_PQ":
config = HnswPq(distance_type=metric)
raise ValueError(
"IVF_HNSW_PQ is not supported on LanceDB cloud."
"Please use IVF_HNSW_SQ instead."
)
elif index_type == "IVF_HNSW_SQ":
config = HnswSq(distance_type=metric)
config = HnswSq(distance_type=metric, num_partitions=num_partitions)
elif index_type == "IVF_FLAT":
config = IvfFlat(distance_type=metric)
config = IvfFlat(distance_type=metric, num_partitions=num_partitions)
else:
raise ValueError(
f"Unknown vector index type: {index_type}. Valid options are"