mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-08 21:02:58 +00:00
fix: metric type inconsistency (#2122)
PR fixes #2113 --------- Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
@@ -150,7 +150,7 @@ class HnswPq:
|
||||
Parameters
|
||||
----------
|
||||
|
||||
distance_type: str, default "L2"
|
||||
distance_type: str, default "l2"
|
||||
|
||||
The distance metric used to train the index.
|
||||
|
||||
@@ -158,18 +158,18 @@ class HnswPq:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
num_partitions, default sqrt(num_rows)
|
||||
|
||||
@@ -271,7 +271,7 @@ class HnswSq:
|
||||
Parameters
|
||||
----------
|
||||
|
||||
distance_type: str, default "L2"
|
||||
distance_type: str, default "l2"
|
||||
|
||||
The distance metric used to train the index.
|
||||
|
||||
@@ -279,18 +279,18 @@ class HnswSq:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
num_partitions, default sqrt(num_rows)
|
||||
|
||||
@@ -369,7 +369,7 @@ class IvfFlat:
|
||||
|
||||
Attributes
|
||||
----------
|
||||
distance_type: str, default "L2"
|
||||
distance_type: str, default "l2"
|
||||
The distance metric used to train the index
|
||||
|
||||
This is used when training the index to calculate the IVF partitions
|
||||
@@ -383,13 +383,13 @@ class IvfFlat:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
Note: the cosine distance is undefined when one (or both) of the vectors
|
||||
@@ -398,7 +398,7 @@ class IvfFlat:
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
"hamming" - Hamming distance. Hamming distance is a distance metric
|
||||
calculated as the number of positions at which the corresponding bits are
|
||||
@@ -475,7 +475,7 @@ class IvfPq:
|
||||
|
||||
Attributes
|
||||
----------
|
||||
distance_type: str, default "L2"
|
||||
distance_type: str, default "l2"
|
||||
The distance metric used to train the index
|
||||
|
||||
This is used when training the index to calculate the IVF partitions
|
||||
@@ -489,13 +489,13 @@ class IvfPq:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
Note: the cosine distance is undefined when one (or both) of the vectors
|
||||
@@ -504,7 +504,7 @@ class IvfPq:
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
num_partitions: int, default sqrt(num_rows)
|
||||
The number of IVF partitions to create.
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ class Query(pydantic.BaseModel):
|
||||
metric : str
|
||||
the distance metric between a pair of vectors,
|
||||
|
||||
can support L2 (default), Cosine and Dot.
|
||||
can support l2 (default), Cosine and Dot.
|
||||
[metric definitions][search]
|
||||
columns : Optional[List[str]]
|
||||
which columns to return in the results
|
||||
@@ -113,7 +113,7 @@ class Query(pydantic.BaseModel):
|
||||
k: Optional[int] = None
|
||||
|
||||
# # metrics
|
||||
metric: str = "L2"
|
||||
metric: str = "l2"
|
||||
|
||||
# which columns to return in the results
|
||||
columns: Optional[Union[List[str], Dict[str, str]]] = None
|
||||
@@ -597,7 +597,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
if self._limit is None:
|
||||
self._limit = 10
|
||||
self._query = query
|
||||
self._distance_type = "L2"
|
||||
self._distance_type = "l2"
|
||||
self._nprobes = 20
|
||||
self._lower_bound = None
|
||||
self._upper_bound = None
|
||||
@@ -608,7 +608,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
self._str_query = str_query
|
||||
self._fast_search = fast_search
|
||||
|
||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
||||
def metric(self, metric: Literal["l2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
||||
"""Set the distance metric to use.
|
||||
|
||||
This is an alias for distance_type() and may be deprecated in the future.
|
||||
@@ -616,8 +616,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric: "L2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "L2" is used.
|
||||
metric: "l2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "l2" is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -627,7 +627,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
return self.distance_type(metric)
|
||||
|
||||
def distance_type(
|
||||
self, distance_type: Literal["L2", "cosine", "dot"]
|
||||
self, distance_type: Literal["l2", "cosine", "dot"]
|
||||
) -> "LanceVectorQueryBuilder":
|
||||
"""Set the distance metric to use.
|
||||
|
||||
@@ -641,8 +641,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distance_type: "L2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "L2" is used.
|
||||
distance_type: "l2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "l2" is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -1414,7 +1414,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
self._ef = ef
|
||||
return self
|
||||
|
||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
|
||||
def metric(self, metric: Literal["l2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
|
||||
"""Set the distance metric to use.
|
||||
|
||||
This is an alias for distance_type() and may be deprecated in the future.
|
||||
@@ -1422,8 +1422,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric: "L2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "L2" is used.
|
||||
metric: "l2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "l2" is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -1433,7 +1433,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
return self.distance_type(metric)
|
||||
|
||||
def distance_type(
|
||||
self, distance_type: Literal["L2", "cosine", "dot"]
|
||||
self, distance_type: Literal["l2", "cosine", "dot"]
|
||||
) -> "LanceHybridQueryBuilder":
|
||||
"""Set the distance metric to use.
|
||||
|
||||
@@ -1447,8 +1447,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distance_type: "L2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "L2" is used.
|
||||
distance_type: "l2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "l2" is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
@@ -154,7 +154,7 @@ class RemoteTable(Table):
|
||||
|
||||
def create_index(
|
||||
self,
|
||||
metric="L2",
|
||||
metric="l2",
|
||||
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||
index_cache_size: Optional[int] = None,
|
||||
num_partitions: Optional[int] = None,
|
||||
@@ -170,7 +170,7 @@ class RemoteTable(Table):
|
||||
Parameters
|
||||
----------
|
||||
metric : str
|
||||
The metric to use for the index. Default is "L2".
|
||||
The metric to use for the index. Default is "l2".
|
||||
vector_column_name : str
|
||||
The name of the vector column. Default is "vector".
|
||||
|
||||
@@ -193,7 +193,7 @@ class RemoteTable(Table):
|
||||
... table_name, # doctest: +SKIP
|
||||
... schema=schema, # doctest: +SKIP
|
||||
... )
|
||||
>>> table.create_index("L2", "vector") # doctest: +SKIP
|
||||
>>> table.create_index("l2", "vector") # doctest: +SKIP
|
||||
"""
|
||||
|
||||
if num_partitions is not None:
|
||||
|
||||
@@ -577,7 +577,7 @@ class Table(ABC):
|
||||
|
||||
def create_index(
|
||||
self,
|
||||
metric="L2",
|
||||
metric="l2",
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||
@@ -596,10 +596,10 @@ class Table(ABC):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric: str, default "L2"
|
||||
metric: str, default "l2"
|
||||
The distance metric to use when creating the index.
|
||||
Valid values are "L2", "cosine", "dot", or "hamming".
|
||||
L2 is euclidean distance.
|
||||
Valid values are "l2", "cosine", "dot", or "hamming".
|
||||
l2 is euclidean distance.
|
||||
Hamming is available only for binary vectors.
|
||||
num_partitions: int, default 256
|
||||
The number of IVF partitions to use when creating the index.
|
||||
|
||||
@@ -452,7 +452,7 @@ def test_query_builder_with_metric(table):
|
||||
df_default = LanceVectorQueryBuilder(table, query, vector_column_name).to_pandas()
|
||||
df_l2 = (
|
||||
LanceVectorQueryBuilder(table, query, vector_column_name)
|
||||
.distance_type("L2")
|
||||
.distance_type("l2")
|
||||
.to_pandas()
|
||||
)
|
||||
tm.assert_frame_equal(df_default, df_l2)
|
||||
|
||||
@@ -480,7 +480,7 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
||||
)
|
||||
|
||||
table.create_index(
|
||||
metric="L2",
|
||||
metric="l2",
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
vector_column_name="vector",
|
||||
@@ -489,7 +489,7 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
||||
num_bits=4,
|
||||
)
|
||||
expected_config = IvfPq(
|
||||
distance_type="L2",
|
||||
distance_type="l2",
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
num_bits=4,
|
||||
@@ -1237,7 +1237,7 @@ def test_hybrid_search_metric_type(tmp_db: DBConnection):
|
||||
# This test uses an FTS index
|
||||
pytest.importorskip("lancedb.fts")
|
||||
|
||||
# Need to use nonnorm as the embedding function so L2 and dot results
|
||||
# Need to use nonnorm as the embedding function so l2 and dot results
|
||||
# are different
|
||||
table, _, _ = setup_hybrid_search_table(tmp_db, "nonnorm")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user