diff --git a/docs/src/ann_indexes.md b/docs/src/ann_indexes.md index 75de5c43..41f26985 100644 --- a/docs/src/ann_indexes.md +++ b/docs/src/ann_indexes.md @@ -41,7 +41,7 @@ Querying vector indexes is done via the [search](https://lancedb.github.io/lance There are a couple of parameters that can be used to fine-tune the search: - **limit** (default: 10): The amount of results that will be returned -- **metric** (default: "l2"): The distance metric to use. By default we use euclidean distance. We also support cosine distance. +- **metric** (default: "L2"): The distance metric to use. By default we use euclidean distance. We also support cosine distance. - **nprobes** (default: 20): The number of probes used. A higher number makes search more accurate but also slower. - **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory. A higher number makes search more accurate but also slower. diff --git a/python/lancedb/query.py b/python/lancedb/query.py index 949e5ef7..1adb8ccb 100644 --- a/python/lancedb/query.py +++ b/python/lancedb/query.py @@ -24,7 +24,7 @@ class LanceQueryBuilder: """ def __init__(self, table: "lancedb.table.LanceTable", query: np.ndarray): - self._metric = "l2" + self._metric = "L2" self._nprobes = 20 self._refine_factor = None self._table = table diff --git a/python/lancedb/table.py b/python/lancedb/table.py index f798fb37..f633cce5 100644 --- a/python/lancedb/table.py +++ b/python/lancedb/table.py @@ -106,11 +106,14 @@ class LanceTable: def _dataset_uri(self) -> str: return os.path.join(self._conn.uri, f"{self.name}.lance") - def create_index(self, num_partitions=256, num_sub_vectors=96): + def create_index(self, metric="L2", num_partitions=256, num_sub_vectors=96): """Create an index on the table. Parameters ---------- + metric: str, default "L2" + The distance metric to use when creating the index. Valid values are "L2" or "cosine". + L2 is euclidean distance. num_partitions: int The number of IVF partitions to use when creating the index. Default is 256. @@ -121,6 +124,7 @@ class LanceTable: self._dataset.create_index( column=VECTOR_COLUMN_NAME, index_type="IVF_PQ", + metric=metric, num_partitions=num_partitions, num_sub_vectors=num_sub_vectors, )