mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-22 21:09:58 +00:00
fix: metric type inconsistency (#2122)
PR fixes #2113 --------- Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
39
Cargo.lock
generated
39
Cargo.lock
generated
@@ -1095,16 +1095,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "blake3"
|
||||
version = "1.6.0"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1230237285e3e10cde447185e8975408ae24deaa67205ce684805c25bc0c7937"
|
||||
checksum = "675f87afced0413c9bb02843499dbbd3882a237645883f71a2b59644a6d2f753"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrayvec",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"constant_time_eq",
|
||||
"memmap2 0.9.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1145,9 +1144,9 @@ checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.21.0"
|
||||
version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3"
|
||||
checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae"
|
||||
dependencies = [
|
||||
"bytemuck_derive",
|
||||
]
|
||||
@@ -2039,7 +2038,7 @@ dependencies = [
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"paste",
|
||||
"petgraph",
|
||||
"petgraph 0.7.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2494,6 +2493,12 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fixedbitset"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
|
||||
|
||||
[[package]]
|
||||
name = "fixedbitset"
|
||||
version = "0.5.7"
|
||||
@@ -4990,13 +4995,23 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7"
|
||||
|
||||
[[package]]
|
||||
name = "petgraph"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
|
||||
dependencies = [
|
||||
"fixedbitset 0.4.2",
|
||||
"indexmap 2.7.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "petgraph"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
|
||||
dependencies = [
|
||||
"fixedbitset",
|
||||
"fixedbitset 0.5.7",
|
||||
"indexmap 2.7.1",
|
||||
]
|
||||
|
||||
@@ -5510,11 +5525,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"itertools 0.14.0",
|
||||
"itertools 0.13.0",
|
||||
"log",
|
||||
"multimap",
|
||||
"once_cell",
|
||||
"petgraph",
|
||||
"petgraph 0.6.5",
|
||||
"prettyplease",
|
||||
"prost",
|
||||
"prost-types",
|
||||
@@ -5530,7 +5545,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools 0.14.0",
|
||||
"itertools 0.13.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.98",
|
||||
@@ -6039,9 +6054,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "roaring"
|
||||
version = "0.10.10"
|
||||
version = "0.10.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a652edd001c53df0b3f96a36a8dc93fce6866988efc16808235653c6bcac8bf2"
|
||||
checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
|
||||
@@ -171,7 +171,7 @@ paths:
|
||||
distance_type:
|
||||
type: string
|
||||
description: |
|
||||
The distance metric to use for search. L2, Cosine, Dot and Hamming are supported. Default is L2.
|
||||
The distance metric to use for search. l2, Cosine, Dot and Hamming are supported. Default is l2.
|
||||
bypass_vector_index:
|
||||
type: boolean
|
||||
description: |
|
||||
@@ -450,7 +450,7 @@ paths:
|
||||
type: string
|
||||
nullable: false
|
||||
description: |
|
||||
The metric type to use for the index. L2, Cosine, Dot are supported.
|
||||
The metric type to use for the index. l2, Cosine, Dot are supported.
|
||||
index_type:
|
||||
type: string
|
||||
responses:
|
||||
|
||||
@@ -69,7 +69,7 @@ Lance supports `IVF_PQ` index type by default.
|
||||
|
||||
The following IVF_PQ paramters can be specified:
|
||||
|
||||
- **distance_type**: The distance metric to use. By default it uses euclidean distance "`L2`".
|
||||
- **distance_type**: The distance metric to use. By default it uses euclidean distance "`l2`".
|
||||
We also support "cosine" and "dot" distance as well.
|
||||
- **num_partitions**: The number of partitions in the index. The default is the square root
|
||||
of the number of rows.
|
||||
|
||||
@@ -59,7 +59,7 @@ Then the greedy search routine operates as follows:
|
||||
|
||||
There are three key parameters to set when constructing an HNSW index:
|
||||
|
||||
* `metric`: Use an `L2` euclidean distance metric. We also support `dot` and `cosine` distance.
|
||||
* `metric`: Use an `l2` euclidean distance metric. We also support `dot` and `cosine` distance.
|
||||
* `m`: The number of neighbors to select for each vector in the HNSW graph.
|
||||
* `ef_construction`: The number of candidates to evaluate during the construction of the HNSW graph.
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ We can combine the above concepts to understand how to build and query an IVF-PQ
|
||||
|
||||
There are three key parameters to set when constructing an IVF-PQ index:
|
||||
|
||||
* `metric`: Use an `L2` euclidean distance metric. We also support `dot` and `cosine` distance.
|
||||
* `metric`: Use an `l2` euclidean distance metric. We also support `dot` and `cosine` distance.
|
||||
* `num_partitions`: The number of partitions in the IVF portion of the index.
|
||||
* `num_sub_vectors`: The number of sub-vectors that will be created during Product Quantization (PQ).
|
||||
|
||||
@@ -56,7 +56,7 @@ In Python, the index can be created as follows:
|
||||
```python
|
||||
# Create and train the index for a 1536-dimensional vector
|
||||
# Make sure you have enough data in the table for an effective training step
|
||||
tbl.create_index(metric="L2", num_partitions=256, num_sub_vectors=96)
|
||||
tbl.create_index(metric="l2", num_partitions=256, num_sub_vectors=96)
|
||||
```
|
||||
!!! note
|
||||
`num_partitions`=256 and `num_sub_vectors`=96 does not work for every dataset. Those values needs to be adjusted for your particular dataset.
|
||||
|
||||
@@ -54,7 +54,7 @@ As mentioned, after creating embedding, each data point is represented as a vect
|
||||
|
||||
Points that are close to each other in vector space are considered similar (or appear in similar contexts), and points that are far away are considered dissimilar. To quantify this closeness, we use distance as a metric which can be measured in the following way -
|
||||
|
||||
1. **Euclidean Distance (L2)**: It calculates the straight-line distance between two points (vectors) in a multidimensional space.
|
||||
1. **Euclidean Distance (l2)**: It calculates the straight-line distance between two points (vectors) in a multidimensional space.
|
||||
2. **Cosine Similarity**: It measures the cosine of the angle between two vectors, providing a normalized measure of similarity based on their direction.
|
||||
3. **Dot product**: It is calculated as the sum of the products of their corresponding components. To measure relatedness it considers both the magnitude and direction of the vectors.
|
||||
|
||||
|
||||
@@ -108,7 +108,7 @@ This method creates a scalar(for non-vector cols) or a vector index on a table.
|
||||
|:---|:---|:---|:---|
|
||||
|`vector_col`|`Optional[str]`| Provide if you want to create index on a vector column. |`None`|
|
||||
|`col_name`|`Optional[str]`| Provide if you want to create index on a non-vector column. |`None`|
|
||||
|`metric`|`Optional[str]` |Provide the metric to use for vector index. choice of metrics: 'L2', 'dot', 'cosine'. |`L2`|
|
||||
|`metric`|`Optional[str]` |Provide the metric to use for vector index. choice of metrics: 'l2', 'dot', 'cosine'. |`l2`|
|
||||
|`num_partitions`|`Optional[int]`|Number of partitions to use for the index.|`256`|
|
||||
|`num_sub_vectors`|`Optional[int]` |Number of sub-vectors to use for the index.|`96`|
|
||||
|`index_cache_size`|`Optional[int]` |Size of the index cache.|`None`|
|
||||
|
||||
@@ -125,7 +125,7 @@ The exhaustive list of parameters for `LanceDBVectorStore` vector store are :
|
||||
```
|
||||
- **_table_exists(self, tbl_name: `Optional[str]` = `None`) -> `bool`** : Returns `True` if `tbl_name` exists in database.
|
||||
- __create_index(
|
||||
self, scalar: `Optional[bool]` = False, col_name: `Optional[str]` = None, num_partitions: `Optional[int]` = 256, num_sub_vectors: `Optional[int]` = 96, index_cache_size: `Optional[int]` = None, metric: `Optional[str]` = "L2",
|
||||
self, scalar: `Optional[bool]` = False, col_name: `Optional[str]` = None, num_partitions: `Optional[int]` = 256, num_sub_vectors: `Optional[int]` = 96, index_cache_size: `Optional[int]` = None, metric: `Optional[str]` = "l2",
|
||||
) -> `None`__ : Creates a scalar(for non-vector cols) or a vector index on a table.
|
||||
Make sure your vector column has enough data before creating an index on it.
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ Distance metrics type.
|
||||
|
||||
- [Cosine](MetricType.md#cosine)
|
||||
- [Dot](MetricType.md#dot)
|
||||
- [L2](MetricType.md#l2)
|
||||
- [l2](MetricType.md#l2)
|
||||
|
||||
## Enumeration Members
|
||||
|
||||
|
||||
@@ -85,7 +85,7 @@ ___
|
||||
|
||||
• `Optional` **metric\_type**: [`MetricType`](../enums/MetricType.md)
|
||||
|
||||
Metric type, L2 or Cosine
|
||||
Metric type, l2 or Cosine
|
||||
|
||||
#### Defined in
|
||||
|
||||
|
||||
@@ -24,18 +24,18 @@ The following distance types are available:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
***
|
||||
|
||||
|
||||
@@ -24,18 +24,18 @@ The following distance types are available:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
***
|
||||
|
||||
|
||||
@@ -31,13 +31,13 @@ The following distance types are available:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
Note: the cosine distance is undefined when one (or both) of the vectors
|
||||
@@ -46,7 +46,7 @@ never be returned from a vector search.
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
***
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ Currently, LanceDB supports the following metrics:
|
||||
|
||||
| Metric | Description |
|
||||
| --------- | --------------------------------------------------------------------------- |
|
||||
| `l2` | [Euclidean / L2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) |
|
||||
| `l2` | [Euclidean / l2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) |
|
||||
| `cosine` | [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity) |
|
||||
| `dot` | [Dot Production](https://en.wikipedia.org/wiki/Dot_product) |
|
||||
| `hamming` | [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) |
|
||||
|
||||
@@ -1299,7 +1299,7 @@ export interface IvfPQIndexConfig {
|
||||
index_name?: string
|
||||
|
||||
/**
|
||||
* Metric type, L2 or Cosine
|
||||
* Metric type, l2 or Cosine
|
||||
*/
|
||||
metric_type?: MetricType
|
||||
|
||||
|
||||
@@ -62,13 +62,13 @@ export interface IvfPqOptions {
|
||||
*
|
||||
* "l2" - Euclidean distance. This is a very common distance metric that
|
||||
* accounts for both magnitude and direction when determining the distance
|
||||
* between vectors. L2 distance has a range of [0, ∞).
|
||||
* between vectors. l2 distance has a range of [0, ∞).
|
||||
*
|
||||
* "cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
* calculated from the cosine similarity between two vectors. Cosine
|
||||
* similarity is a measure of similarity between two non-zero vectors of an
|
||||
* inner product space. It is defined to equal the cosine of the angle
|
||||
* between them. Unlike L2, the cosine distance is not affected by the
|
||||
* between them. Unlike l2, the cosine distance is not affected by the
|
||||
* magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
*
|
||||
* Note: the cosine distance is undefined when one (or both) of the vectors
|
||||
@@ -77,7 +77,7 @@ export interface IvfPqOptions {
|
||||
*
|
||||
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
* L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
* l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
*/
|
||||
distanceType?: "l2" | "cosine" | "dot";
|
||||
|
||||
@@ -125,18 +125,18 @@ export interface HnswPqOptions {
|
||||
*
|
||||
* "l2" - Euclidean distance. This is a very common distance metric that
|
||||
* accounts for both magnitude and direction when determining the distance
|
||||
* between vectors. L2 distance has a range of [0, ∞).
|
||||
* between vectors. l2 distance has a range of [0, ∞).
|
||||
*
|
||||
* "cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
* calculated from the cosine similarity between two vectors. Cosine
|
||||
* similarity is a measure of similarity between two non-zero vectors of an
|
||||
* inner product space. It is defined to equal the cosine of the angle
|
||||
* between them. Unlike L2, the cosine distance is not affected by the
|
||||
* between them. Unlike l2, the cosine distance is not affected by the
|
||||
* magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
*
|
||||
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
* L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
* l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
*/
|
||||
distanceType?: "l2" | "cosine" | "dot";
|
||||
|
||||
@@ -241,18 +241,18 @@ export interface HnswSqOptions {
|
||||
*
|
||||
* "l2" - Euclidean distance. This is a very common distance metric that
|
||||
* accounts for both magnitude and direction when determining the distance
|
||||
* between vectors. L2 distance has a range of [0, ∞).
|
||||
* between vectors. l2 distance has a range of [0, ∞).
|
||||
*
|
||||
* "cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
* calculated from the cosine similarity between two vectors. Cosine
|
||||
* similarity is a measure of similarity between two non-zero vectors of an
|
||||
* inner product space. It is defined to equal the cosine of the angle
|
||||
* between them. Unlike L2, the cosine distance is not affected by the
|
||||
* between them. Unlike l2, the cosine distance is not affected by the
|
||||
* magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
*
|
||||
* "dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
* distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
* L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
* l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
*/
|
||||
distanceType?: "l2" | "cosine" | "dot";
|
||||
|
||||
|
||||
@@ -150,7 +150,7 @@ class HnswPq:
|
||||
Parameters
|
||||
----------
|
||||
|
||||
distance_type: str, default "L2"
|
||||
distance_type: str, default "l2"
|
||||
|
||||
The distance metric used to train the index.
|
||||
|
||||
@@ -158,18 +158,18 @@ class HnswPq:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
num_partitions, default sqrt(num_rows)
|
||||
|
||||
@@ -271,7 +271,7 @@ class HnswSq:
|
||||
Parameters
|
||||
----------
|
||||
|
||||
distance_type: str, default "L2"
|
||||
distance_type: str, default "l2"
|
||||
|
||||
The distance metric used to train the index.
|
||||
|
||||
@@ -279,18 +279,18 @@ class HnswSq:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
num_partitions, default sqrt(num_rows)
|
||||
|
||||
@@ -369,7 +369,7 @@ class IvfFlat:
|
||||
|
||||
Attributes
|
||||
----------
|
||||
distance_type: str, default "L2"
|
||||
distance_type: str, default "l2"
|
||||
The distance metric used to train the index
|
||||
|
||||
This is used when training the index to calculate the IVF partitions
|
||||
@@ -383,13 +383,13 @@ class IvfFlat:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
Note: the cosine distance is undefined when one (or both) of the vectors
|
||||
@@ -398,7 +398,7 @@ class IvfFlat:
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
"hamming" - Hamming distance. Hamming distance is a distance metric
|
||||
calculated as the number of positions at which the corresponding bits are
|
||||
@@ -475,7 +475,7 @@ class IvfPq:
|
||||
|
||||
Attributes
|
||||
----------
|
||||
distance_type: str, default "L2"
|
||||
distance_type: str, default "l2"
|
||||
The distance metric used to train the index
|
||||
|
||||
This is used when training the index to calculate the IVF partitions
|
||||
@@ -489,13 +489,13 @@ class IvfPq:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
between vectors. l2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
between them. Unlike l2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
Note: the cosine distance is undefined when one (or both) of the vectors
|
||||
@@ -504,7 +504,7 @@ class IvfPq:
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
num_partitions: int, default sqrt(num_rows)
|
||||
The number of IVF partitions to create.
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ class Query(pydantic.BaseModel):
|
||||
metric : str
|
||||
the distance metric between a pair of vectors,
|
||||
|
||||
can support L2 (default), Cosine and Dot.
|
||||
can support l2 (default), Cosine and Dot.
|
||||
[metric definitions][search]
|
||||
columns : Optional[List[str]]
|
||||
which columns to return in the results
|
||||
@@ -113,7 +113,7 @@ class Query(pydantic.BaseModel):
|
||||
k: Optional[int] = None
|
||||
|
||||
# # metrics
|
||||
metric: str = "L2"
|
||||
metric: str = "l2"
|
||||
|
||||
# which columns to return in the results
|
||||
columns: Optional[Union[List[str], Dict[str, str]]] = None
|
||||
@@ -597,7 +597,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
if self._limit is None:
|
||||
self._limit = 10
|
||||
self._query = query
|
||||
self._distance_type = "L2"
|
||||
self._distance_type = "l2"
|
||||
self._nprobes = 20
|
||||
self._lower_bound = None
|
||||
self._upper_bound = None
|
||||
@@ -608,7 +608,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
self._str_query = str_query
|
||||
self._fast_search = fast_search
|
||||
|
||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
||||
def metric(self, metric: Literal["l2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
||||
"""Set the distance metric to use.
|
||||
|
||||
This is an alias for distance_type() and may be deprecated in the future.
|
||||
@@ -616,8 +616,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric: "L2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "L2" is used.
|
||||
metric: "l2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "l2" is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -627,7 +627,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
return self.distance_type(metric)
|
||||
|
||||
def distance_type(
|
||||
self, distance_type: Literal["L2", "cosine", "dot"]
|
||||
self, distance_type: Literal["l2", "cosine", "dot"]
|
||||
) -> "LanceVectorQueryBuilder":
|
||||
"""Set the distance metric to use.
|
||||
|
||||
@@ -641,8 +641,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distance_type: "L2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "L2" is used.
|
||||
distance_type: "l2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "l2" is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -1414,7 +1414,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
self._ef = ef
|
||||
return self
|
||||
|
||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
|
||||
def metric(self, metric: Literal["l2", "cosine", "dot"]) -> LanceHybridQueryBuilder:
|
||||
"""Set the distance metric to use.
|
||||
|
||||
This is an alias for distance_type() and may be deprecated in the future.
|
||||
@@ -1422,8 +1422,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric: "L2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "L2" is used.
|
||||
metric: "l2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "l2" is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -1433,7 +1433,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
return self.distance_type(metric)
|
||||
|
||||
def distance_type(
|
||||
self, distance_type: Literal["L2", "cosine", "dot"]
|
||||
self, distance_type: Literal["l2", "cosine", "dot"]
|
||||
) -> "LanceHybridQueryBuilder":
|
||||
"""Set the distance metric to use.
|
||||
|
||||
@@ -1447,8 +1447,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distance_type: "L2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "L2" is used.
|
||||
distance_type: "l2" or "cosine" or "dot"
|
||||
The distance metric to use. By default "l2" is used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
@@ -154,7 +154,7 @@ class RemoteTable(Table):
|
||||
|
||||
def create_index(
|
||||
self,
|
||||
metric="L2",
|
||||
metric="l2",
|
||||
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||
index_cache_size: Optional[int] = None,
|
||||
num_partitions: Optional[int] = None,
|
||||
@@ -170,7 +170,7 @@ class RemoteTable(Table):
|
||||
Parameters
|
||||
----------
|
||||
metric : str
|
||||
The metric to use for the index. Default is "L2".
|
||||
The metric to use for the index. Default is "l2".
|
||||
vector_column_name : str
|
||||
The name of the vector column. Default is "vector".
|
||||
|
||||
@@ -193,7 +193,7 @@ class RemoteTable(Table):
|
||||
... table_name, # doctest: +SKIP
|
||||
... schema=schema, # doctest: +SKIP
|
||||
... )
|
||||
>>> table.create_index("L2", "vector") # doctest: +SKIP
|
||||
>>> table.create_index("l2", "vector") # doctest: +SKIP
|
||||
"""
|
||||
|
||||
if num_partitions is not None:
|
||||
|
||||
@@ -577,7 +577,7 @@ class Table(ABC):
|
||||
|
||||
def create_index(
|
||||
self,
|
||||
metric="L2",
|
||||
metric="l2",
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
vector_column_name: str = VECTOR_COLUMN_NAME,
|
||||
@@ -596,10 +596,10 @@ class Table(ABC):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric: str, default "L2"
|
||||
metric: str, default "l2"
|
||||
The distance metric to use when creating the index.
|
||||
Valid values are "L2", "cosine", "dot", or "hamming".
|
||||
L2 is euclidean distance.
|
||||
Valid values are "l2", "cosine", "dot", or "hamming".
|
||||
l2 is euclidean distance.
|
||||
Hamming is available only for binary vectors.
|
||||
num_partitions: int, default 256
|
||||
The number of IVF partitions to use when creating the index.
|
||||
|
||||
@@ -452,7 +452,7 @@ def test_query_builder_with_metric(table):
|
||||
df_default = LanceVectorQueryBuilder(table, query, vector_column_name).to_pandas()
|
||||
df_l2 = (
|
||||
LanceVectorQueryBuilder(table, query, vector_column_name)
|
||||
.distance_type("L2")
|
||||
.distance_type("l2")
|
||||
.to_pandas()
|
||||
)
|
||||
tm.assert_frame_equal(df_default, df_l2)
|
||||
|
||||
@@ -480,7 +480,7 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
||||
)
|
||||
|
||||
table.create_index(
|
||||
metric="L2",
|
||||
metric="l2",
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
vector_column_name="vector",
|
||||
@@ -489,7 +489,7 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
||||
num_bits=4,
|
||||
)
|
||||
expected_config = IvfPq(
|
||||
distance_type="L2",
|
||||
distance_type="l2",
|
||||
num_partitions=256,
|
||||
num_sub_vectors=96,
|
||||
num_bits=4,
|
||||
@@ -1237,7 +1237,7 @@ def test_hybrid_search_metric_type(tmp_db: DBConnection):
|
||||
# This test uses an FTS index
|
||||
pytest.importorskip("lancedb.fts")
|
||||
|
||||
# Need to use nonnorm as the embedding function so L2 and dot results
|
||||
# Need to use nonnorm as the embedding function so l2 and dot results
|
||||
# are different
|
||||
table, _, _ = setup_hybrid_search_table(tmp_db, "nonnorm")
|
||||
|
||||
|
||||
@@ -103,7 +103,7 @@ async fn create_index(table: &Table) -> Result<()> {
|
||||
// we are creating an index that my have better recall than the
|
||||
// default but is also larger and slower.
|
||||
IvfPqIndexBuilder::default()
|
||||
// This overrides the default distance type of L2
|
||||
// This overrides the default distance type of l2
|
||||
.distance_type(DistanceType::Cosine)
|
||||
// With 1000 rows this have been ~31 by default
|
||||
.num_partitions(50)
|
||||
|
||||
@@ -224,13 +224,13 @@ pub use table::Table;
|
||||
pub enum DistanceType {
|
||||
/// Euclidean distance. This is a very common distance metric that
|
||||
/// accounts for both magnitude and direction when determining the distance
|
||||
/// between vectors. L2 distance has a range of [0, ∞).
|
||||
/// between vectors. l2 distance has a range of [0, ∞).
|
||||
L2,
|
||||
/// Cosine distance. Cosine distance is a distance metric
|
||||
/// calculated from the cosine similarity between two vectors. Cosine
|
||||
/// similarity is a measure of similarity between two non-zero vectors of an
|
||||
/// inner product space. It is defined to equal the cosine of the angle
|
||||
/// between them. Unlike L2, the cosine distance is not affected by the
|
||||
/// between them. Unlike l2, the cosine distance is not affected by the
|
||||
/// magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
///
|
||||
/// Note: the cosine distance is undefined when one (or both) of the vectors
|
||||
@@ -239,7 +239,7 @@ pub enum DistanceType {
|
||||
Cosine,
|
||||
/// Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
/// distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
/// L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
/// l2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
Dot,
|
||||
/// Hamming distance. Hamming distance is a distance metric that measures
|
||||
/// the number of positions at which the corresponding elements are different.
|
||||
|
||||
Reference in New Issue
Block a user