Merge branch 'main' into changhiskhan/improve-index-docs

This commit is contained in:
Chang She
2023-04-25 21:04:01 -07:00
committed by GitHub
6 changed files with 51 additions and 12 deletions

View File

@@ -24,6 +24,7 @@ class LanceQueryBuilder:
"""
def __init__(self, table: "lancedb.table.LanceTable", query: np.ndarray):
self._metric = "L2"
self._nprobes = 20
self._refine_factor = None
self._table = table
@@ -77,6 +78,21 @@ class LanceQueryBuilder:
self._where = where
return self
def metric(self, metric: str) -> LanceQueryBuilder:
"""Set the distance metric to use.
Parameters
----------
metric: str
The distance metric to use. By default "l2" is used.
Returns
-------
The LanceQueryBuilder object.
"""
self._metric = metric
return self
def nprobes(self, nprobes: int) -> LanceQueryBuilder:
"""Set the number of probes to use.
@@ -123,6 +139,7 @@ class LanceQueryBuilder:
"column": VECTOR_COLUMN_NAME,
"q": self._query,
"k": self._limit,
"metric": self._metric,
"nprobes": self._nprobes,
"refine_factor": self._refine_factor,
},

View File

@@ -106,11 +106,14 @@ class LanceTable:
def _dataset_uri(self) -> str:
return os.path.join(self._conn.uri, f"{self.name}.lance")
def create_index(self, num_partitions=256, num_sub_vectors=96):
def create_index(self, metric="L2", num_partitions=256, num_sub_vectors=96):
"""Create an index on the table.
Parameters
----------
metric: str, default "L2"
The distance metric to use when creating the index. Valid values are "L2" or "cosine".
L2 is euclidean distance.
num_partitions: int
The number of IVF partitions to use when creating the index.
Default is 256.
@@ -121,6 +124,7 @@ class LanceTable:
self._dataset.create_index(
column=VECTOR_COLUMN_NAME,
index_type="IVF_PQ",
metric=metric,
num_partitions=num_partitions,
num_sub_vectors=num_sub_vectors,
)