diff --git a/Cargo.lock b/Cargo.lock index ad29ea93..390523a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1095,16 +1095,15 @@ dependencies = [ [[package]] name = "blake3" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1230237285e3e10cde447185e8975408ae24deaa67205ce684805c25bc0c7937" +checksum = "675f87afced0413c9bb02843499dbbd3882a237645883f71a2b59644a6d2f753" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", - "memmap2 0.9.5", ] [[package]] @@ -1145,9 +1144,9 @@ checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "bytemuck" -version = "1.21.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" +checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae" dependencies = [ "bytemuck_derive", ] @@ -2039,7 +2038,7 @@ dependencies = [ "itertools 0.14.0", "log", "paste", - "petgraph", + "petgraph 0.7.1", ] [[package]] @@ -2494,6 +2493,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -4990,13 +4995,23 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset 0.4.2", + "indexmap 2.7.1", +] + [[package]] name = "petgraph" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ - "fixedbitset", + "fixedbitset 0.5.7", "indexmap 2.7.1", ] @@ -5510,11 +5525,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck 0.5.0", - "itertools 0.14.0", + "itertools 0.13.0", "log", "multimap", "once_cell", - "petgraph", + "petgraph 0.6.5", "prettyplease", "prost", "prost-types", @@ -5530,7 +5545,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.98", @@ -6039,9 +6054,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.10.10" +version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a652edd001c53df0b3f96a36a8dc93fce6866988efc16808235653c6bcac8bf2" +checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661" dependencies = [ "bytemuck", "byteorder", diff --git a/docs/openapi.yml b/docs/openapi.yml index ed4b83c3..b07e47e0 100644 --- a/docs/openapi.yml +++ b/docs/openapi.yml @@ -171,7 +171,7 @@ paths: distance_type: type: string description: | - The distance metric to use for search. L2, Cosine, Dot and Hamming are supported. Default is L2. + The distance metric to use for search. l2, Cosine, Dot and Hamming are supported. Default is l2. bypass_vector_index: type: boolean description: | @@ -450,7 +450,7 @@ paths: type: string nullable: false description: | - The metric type to use for the index. L2, Cosine, Dot are supported. + The metric type to use for the index. l2, Cosine, Dot are supported. index_type: type: string responses: diff --git a/docs/src/ann_indexes.md b/docs/src/ann_indexes.md index 710ec8e3..b323ad05 100644 --- a/docs/src/ann_indexes.md +++ b/docs/src/ann_indexes.md @@ -69,7 +69,7 @@ Lance supports `IVF_PQ` index type by default. The following IVF_PQ paramters can be specified: -- **distance_type**: The distance metric to use. By default it uses euclidean distance "`L2`". +- **distance_type**: The distance metric to use. By default it uses euclidean distance "`l2`". We also support "cosine" and "dot" distance as well. - **num_partitions**: The number of partitions in the index. The default is the square root of the number of rows. diff --git a/docs/src/concepts/index_hnsw.md b/docs/src/concepts/index_hnsw.md index 93e1f1b7..ca74b2ed 100644 --- a/docs/src/concepts/index_hnsw.md +++ b/docs/src/concepts/index_hnsw.md @@ -59,7 +59,7 @@ Then the greedy search routine operates as follows: There are three key parameters to set when constructing an HNSW index: -* `metric`: Use an `L2` euclidean distance metric. We also support `dot` and `cosine` distance. +* `metric`: Use an `l2` euclidean distance metric. We also support `dot` and `cosine` distance. * `m`: The number of neighbors to select for each vector in the HNSW graph. * `ef_construction`: The number of candidates to evaluate during the construction of the HNSW graph. diff --git a/docs/src/concepts/index_ivfpq.md b/docs/src/concepts/index_ivfpq.md index 7220d2c8..0a9de0d4 100644 --- a/docs/src/concepts/index_ivfpq.md +++ b/docs/src/concepts/index_ivfpq.md @@ -47,7 +47,7 @@ We can combine the above concepts to understand how to build and query an IVF-PQ There are three key parameters to set when constructing an IVF-PQ index: -* `metric`: Use an `L2` euclidean distance metric. We also support `dot` and `cosine` distance. +* `metric`: Use an `l2` euclidean distance metric. We also support `dot` and `cosine` distance. * `num_partitions`: The number of partitions in the IVF portion of the index. * `num_sub_vectors`: The number of sub-vectors that will be created during Product Quantization (PQ). @@ -56,7 +56,7 @@ In Python, the index can be created as follows: ```python # Create and train the index for a 1536-dimensional vector # Make sure you have enough data in the table for an effective training step -tbl.create_index(metric="L2", num_partitions=256, num_sub_vectors=96) +tbl.create_index(metric="l2", num_partitions=256, num_sub_vectors=96) ``` !!! note `num_partitions`=256 and `num_sub_vectors`=96 does not work for every dataset. Those values needs to be adjusted for your particular dataset. diff --git a/docs/src/embeddings/understanding_embeddings.md b/docs/src/embeddings/understanding_embeddings.md index 211296c6..bd12db1b 100644 --- a/docs/src/embeddings/understanding_embeddings.md +++ b/docs/src/embeddings/understanding_embeddings.md @@ -54,7 +54,7 @@ As mentioned, after creating embedding, each data point is represented as a vect Points that are close to each other in vector space are considered similar (or appear in similar contexts), and points that are far away are considered dissimilar. To quantify this closeness, we use distance as a metric which can be measured in the following way - -1. **Euclidean Distance (L2)**: It calculates the straight-line distance between two points (vectors) in a multidimensional space. +1. **Euclidean Distance (l2)**: It calculates the straight-line distance between two points (vectors) in a multidimensional space. 2. **Cosine Similarity**: It measures the cosine of the angle between two vectors, providing a normalized measure of similarity based on their direction. 3. **Dot product**: It is calculated as the sum of the products of their corresponding components. To measure relatedness it considers both the magnitude and direction of the vectors. diff --git a/docs/src/integrations/langchain.md b/docs/src/integrations/langchain.md index 157428ba..b0a6196c 100644 --- a/docs/src/integrations/langchain.md +++ b/docs/src/integrations/langchain.md @@ -108,7 +108,7 @@ This method creates a scalar(for non-vector cols) or a vector index on a table. |:---|:---|:---|:---| |`vector_col`|`Optional[str]`| Provide if you want to create index on a vector column. |`None`| |`col_name`|`Optional[str]`| Provide if you want to create index on a non-vector column. |`None`| -|`metric`|`Optional[str]` |Provide the metric to use for vector index. choice of metrics: 'L2', 'dot', 'cosine'. |`L2`| +|`metric`|`Optional[str]` |Provide the metric to use for vector index. choice of metrics: 'l2', 'dot', 'cosine'. |`l2`| |`num_partitions`|`Optional[int]`|Number of partitions to use for the index.|`256`| |`num_sub_vectors`|`Optional[int]` |Number of sub-vectors to use for the index.|`96`| |`index_cache_size`|`Optional[int]` |Size of the index cache.|`None`| diff --git a/docs/src/integrations/llamaIndex.md b/docs/src/integrations/llamaIndex.md index 210388ac..e608b83f 100644 --- a/docs/src/integrations/llamaIndex.md +++ b/docs/src/integrations/llamaIndex.md @@ -125,7 +125,7 @@ The exhaustive list of parameters for `LanceDBVectorStore` vector store are : ``` - **_table_exists(self, tbl_name: `Optional[str]` = `None`) -> `bool`** : Returns `True` if `tbl_name` exists in database. - __create_index( - self, scalar: `Optional[bool]` = False, col_name: `Optional[str]` = None, num_partitions: `Optional[int]` = 256, num_sub_vectors: `Optional[int]` = 96, index_cache_size: `Optional[int]` = None, metric: `Optional[str]` = "L2", + self, scalar: `Optional[bool]` = False, col_name: `Optional[str]` = None, num_partitions: `Optional[int]` = 256, num_sub_vectors: `Optional[int]` = 96, index_cache_size: `Optional[int]` = None, metric: `Optional[str]` = "l2", ) -> `None`__ : Creates a scalar(for non-vector cols) or a vector index on a table. Make sure your vector column has enough data before creating an index on it. diff --git a/docs/src/javascript/enums/MetricType.md b/docs/src/javascript/enums/MetricType.md index 9f9f0977..8c1300b2 100644 --- a/docs/src/javascript/enums/MetricType.md +++ b/docs/src/javascript/enums/MetricType.md @@ -10,7 +10,7 @@ Distance metrics type. - [Cosine](MetricType.md#cosine) - [Dot](MetricType.md#dot) -- [L2](MetricType.md#l2) +- [l2](MetricType.md#l2) ## Enumeration Members diff --git a/docs/src/javascript/interfaces/IvfPQIndexConfig.md b/docs/src/javascript/interfaces/IvfPQIndexConfig.md index 9211dda3..526787cc 100644 --- a/docs/src/javascript/interfaces/IvfPQIndexConfig.md +++ b/docs/src/javascript/interfaces/IvfPQIndexConfig.md @@ -85,7 +85,7 @@ ___ • `Optional` **metric\_type**: [`MetricType`](../enums/MetricType.md) -Metric type, L2 or Cosine +Metric type, l2 or Cosine #### Defined in diff --git a/docs/src/js/interfaces/HnswPqOptions.md b/docs/src/js/interfaces/HnswPqOptions.md index 4dde12b1..65e6ea0f 100644 --- a/docs/src/js/interfaces/HnswPqOptions.md +++ b/docs/src/js/interfaces/HnswPqOptions.md @@ -24,18 +24,18 @@ The following distance types are available: "l2" - Euclidean distance. This is a very common distance metric that accounts for both magnitude and direction when determining the distance -between vectors. L2 distance has a range of [0, ∞). +between vectors. l2 distance has a range of [0, ∞). "cosine" - Cosine distance. Cosine distance is a distance metric calculated from the cosine similarity between two vectors. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space. It is defined to equal the cosine of the angle -between them. Unlike L2, the cosine distance is not affected by the +between them. Unlike l2, the cosine distance is not affected by the magnitude of the vectors. Cosine distance has a range of [0, 2]. "dot" - Dot product. Dot distance is the dot product of two vectors. Dot distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their -L2 norm is 1), then dot distance is equivalent to the cosine distance. +l2 norm is 1), then dot distance is equivalent to the cosine distance. *** diff --git a/docs/src/js/interfaces/HnswSqOptions.md b/docs/src/js/interfaces/HnswSqOptions.md index e365388c..585737a5 100644 --- a/docs/src/js/interfaces/HnswSqOptions.md +++ b/docs/src/js/interfaces/HnswSqOptions.md @@ -24,18 +24,18 @@ The following distance types are available: "l2" - Euclidean distance. This is a very common distance metric that accounts for both magnitude and direction when determining the distance -between vectors. L2 distance has a range of [0, ∞). +between vectors. l2 distance has a range of [0, ∞). "cosine" - Cosine distance. Cosine distance is a distance metric calculated from the cosine similarity between two vectors. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space. It is defined to equal the cosine of the angle -between them. Unlike L2, the cosine distance is not affected by the +between them. Unlike l2, the cosine distance is not affected by the magnitude of the vectors. Cosine distance has a range of [0, 2]. "dot" - Dot product. Dot distance is the dot product of two vectors. Dot distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their -L2 norm is 1), then dot distance is equivalent to the cosine distance. +l2 norm is 1), then dot distance is equivalent to the cosine distance. *** diff --git a/docs/src/js/interfaces/IvfPqOptions.md b/docs/src/js/interfaces/IvfPqOptions.md index a2b1bda1..7b47c8e5 100644 --- a/docs/src/js/interfaces/IvfPqOptions.md +++ b/docs/src/js/interfaces/IvfPqOptions.md @@ -31,13 +31,13 @@ The following distance types are available: "l2" - Euclidean distance. This is a very common distance metric that accounts for both magnitude and direction when determining the distance -between vectors. L2 distance has a range of [0, ∞). +between vectors. l2 distance has a range of [0, ∞). "cosine" - Cosine distance. Cosine distance is a distance metric calculated from the cosine similarity between two vectors. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space. It is defined to equal the cosine of the angle -between them. Unlike L2, the cosine distance is not affected by the +between them. Unlike l2, the cosine distance is not affected by the magnitude of the vectors. Cosine distance has a range of [0, 2]. Note: the cosine distance is undefined when one (or both) of the vectors @@ -46,7 +46,7 @@ never be returned from a vector search. "dot" - Dot product. Dot distance is the dot product of two vectors. Dot distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their -L2 norm is 1), then dot distance is equivalent to the cosine distance. +l2 norm is 1), then dot distance is equivalent to the cosine distance. *** diff --git a/docs/src/search.md b/docs/src/search.md index 3806f5c0..7703a2bd 100644 --- a/docs/src/search.md +++ b/docs/src/search.md @@ -15,7 +15,7 @@ Currently, LanceDB supports the following metrics: | Metric | Description | | --------- | --------------------------------------------------------------------------- | -| `l2` | [Euclidean / L2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) | +| `l2` | [Euclidean / l2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) | | `cosine` | [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity) | | `dot` | [Dot Production](https://en.wikipedia.org/wiki/Dot_product) | | `hamming` | [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) | diff --git a/node/src/index.ts b/node/src/index.ts index 705bbac1..ccac74e5 100644 --- a/node/src/index.ts +++ b/node/src/index.ts @@ -1299,7 +1299,7 @@ export interface IvfPQIndexConfig { index_name?: string /** - * Metric type, L2 or Cosine + * Metric type, l2 or Cosine */ metric_type?: MetricType diff --git a/nodejs/lancedb/indices.ts b/nodejs/lancedb/indices.ts index 7c27e42e..1f3af0d6 100644 --- a/nodejs/lancedb/indices.ts +++ b/nodejs/lancedb/indices.ts @@ -62,13 +62,13 @@ export interface IvfPqOptions { * * "l2" - Euclidean distance. This is a very common distance metric that * accounts for both magnitude and direction when determining the distance - * between vectors. L2 distance has a range of [0, ∞). + * between vectors. l2 distance has a range of [0, ∞). * * "cosine" - Cosine distance. Cosine distance is a distance metric * calculated from the cosine similarity between two vectors. Cosine * similarity is a measure of similarity between two non-zero vectors of an * inner product space. It is defined to equal the cosine of the angle - * between them. Unlike L2, the cosine distance is not affected by the + * between them. Unlike l2, the cosine distance is not affected by the * magnitude of the vectors. Cosine distance has a range of [0, 2]. * * Note: the cosine distance is undefined when one (or both) of the vectors @@ -77,7 +77,7 @@ export interface IvfPqOptions { * * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their - * L2 norm is 1), then dot distance is equivalent to the cosine distance. + * l2 norm is 1), then dot distance is equivalent to the cosine distance. */ distanceType?: "l2" | "cosine" | "dot"; @@ -125,18 +125,18 @@ export interface HnswPqOptions { * * "l2" - Euclidean distance. This is a very common distance metric that * accounts for both magnitude and direction when determining the distance - * between vectors. L2 distance has a range of [0, ∞). + * between vectors. l2 distance has a range of [0, ∞). * * "cosine" - Cosine distance. Cosine distance is a distance metric * calculated from the cosine similarity between two vectors. Cosine * similarity is a measure of similarity between two non-zero vectors of an * inner product space. It is defined to equal the cosine of the angle - * between them. Unlike L2, the cosine distance is not affected by the + * between them. Unlike l2, the cosine distance is not affected by the * magnitude of the vectors. Cosine distance has a range of [0, 2]. * * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their - * L2 norm is 1), then dot distance is equivalent to the cosine distance. + * l2 norm is 1), then dot distance is equivalent to the cosine distance. */ distanceType?: "l2" | "cosine" | "dot"; @@ -241,18 +241,18 @@ export interface HnswSqOptions { * * "l2" - Euclidean distance. This is a very common distance metric that * accounts for both magnitude and direction when determining the distance - * between vectors. L2 distance has a range of [0, ∞). + * between vectors. l2 distance has a range of [0, ∞). * * "cosine" - Cosine distance. Cosine distance is a distance metric * calculated from the cosine similarity between two vectors. Cosine * similarity is a measure of similarity between two non-zero vectors of an * inner product space. It is defined to equal the cosine of the angle - * between them. Unlike L2, the cosine distance is not affected by the + * between them. Unlike l2, the cosine distance is not affected by the * magnitude of the vectors. Cosine distance has a range of [0, 2]. * * "dot" - Dot product. Dot distance is the dot product of two vectors. Dot * distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their - * L2 norm is 1), then dot distance is equivalent to the cosine distance. + * l2 norm is 1), then dot distance is equivalent to the cosine distance. */ distanceType?: "l2" | "cosine" | "dot"; diff --git a/python/python/lancedb/index.py b/python/python/lancedb/index.py index 6d855f52..fc665f93 100644 --- a/python/python/lancedb/index.py +++ b/python/python/lancedb/index.py @@ -150,7 +150,7 @@ class HnswPq: Parameters ---------- - distance_type: str, default "L2" + distance_type: str, default "l2" The distance metric used to train the index. @@ -158,18 +158,18 @@ class HnswPq: "l2" - Euclidean distance. This is a very common distance metric that accounts for both magnitude and direction when determining the distance - between vectors. L2 distance has a range of [0, ∞). + between vectors. l2 distance has a range of [0, ∞). "cosine" - Cosine distance. Cosine distance is a distance metric calculated from the cosine similarity between two vectors. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space. It is defined to equal the cosine of the angle - between them. Unlike L2, the cosine distance is not affected by the + between them. Unlike l2, the cosine distance is not affected by the magnitude of the vectors. Cosine distance has a range of [0, 2]. "dot" - Dot product. Dot distance is the dot product of two vectors. Dot distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their - L2 norm is 1), then dot distance is equivalent to the cosine distance. + l2 norm is 1), then dot distance is equivalent to the cosine distance. num_partitions, default sqrt(num_rows) @@ -271,7 +271,7 @@ class HnswSq: Parameters ---------- - distance_type: str, default "L2" + distance_type: str, default "l2" The distance metric used to train the index. @@ -279,18 +279,18 @@ class HnswSq: "l2" - Euclidean distance. This is a very common distance metric that accounts for both magnitude and direction when determining the distance - between vectors. L2 distance has a range of [0, ∞). + between vectors. l2 distance has a range of [0, ∞). "cosine" - Cosine distance. Cosine distance is a distance metric calculated from the cosine similarity between two vectors. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space. It is defined to equal the cosine of the angle - between them. Unlike L2, the cosine distance is not affected by the + between them. Unlike l2, the cosine distance is not affected by the magnitude of the vectors. Cosine distance has a range of [0, 2]. "dot" - Dot product. Dot distance is the dot product of two vectors. Dot distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their - L2 norm is 1), then dot distance is equivalent to the cosine distance. + l2 norm is 1), then dot distance is equivalent to the cosine distance. num_partitions, default sqrt(num_rows) @@ -369,7 +369,7 @@ class IvfFlat: Attributes ---------- - distance_type: str, default "L2" + distance_type: str, default "l2" The distance metric used to train the index This is used when training the index to calculate the IVF partitions @@ -383,13 +383,13 @@ class IvfFlat: "l2" - Euclidean distance. This is a very common distance metric that accounts for both magnitude and direction when determining the distance - between vectors. L2 distance has a range of [0, ∞). + between vectors. l2 distance has a range of [0, ∞). "cosine" - Cosine distance. Cosine distance is a distance metric calculated from the cosine similarity between two vectors. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space. It is defined to equal the cosine of the angle - between them. Unlike L2, the cosine distance is not affected by the + between them. Unlike l2, the cosine distance is not affected by the magnitude of the vectors. Cosine distance has a range of [0, 2]. Note: the cosine distance is undefined when one (or both) of the vectors @@ -398,7 +398,7 @@ class IvfFlat: "dot" - Dot product. Dot distance is the dot product of two vectors. Dot distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their - L2 norm is 1), then dot distance is equivalent to the cosine distance. + l2 norm is 1), then dot distance is equivalent to the cosine distance. "hamming" - Hamming distance. Hamming distance is a distance metric calculated as the number of positions at which the corresponding bits are @@ -475,7 +475,7 @@ class IvfPq: Attributes ---------- - distance_type: str, default "L2" + distance_type: str, default "l2" The distance metric used to train the index This is used when training the index to calculate the IVF partitions @@ -489,13 +489,13 @@ class IvfPq: "l2" - Euclidean distance. This is a very common distance metric that accounts for both magnitude and direction when determining the distance - between vectors. L2 distance has a range of [0, ∞). + between vectors. l2 distance has a range of [0, ∞). "cosine" - Cosine distance. Cosine distance is a distance metric calculated from the cosine similarity between two vectors. Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space. It is defined to equal the cosine of the angle - between them. Unlike L2, the cosine distance is not affected by the + between them. Unlike l2, the cosine distance is not affected by the magnitude of the vectors. Cosine distance has a range of [0, 2]. Note: the cosine distance is undefined when one (or both) of the vectors @@ -504,7 +504,7 @@ class IvfPq: "dot" - Dot product. Dot distance is the dot product of two vectors. Dot distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their - L2 norm is 1), then dot distance is equivalent to the cosine distance. + l2 norm is 1), then dot distance is equivalent to the cosine distance. num_partitions: int, default sqrt(num_rows) The number of IVF partitions to create. diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index 4e589dc4..413d26a3 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -68,7 +68,7 @@ class Query(pydantic.BaseModel): metric : str the distance metric between a pair of vectors, - can support L2 (default), Cosine and Dot. + can support l2 (default), Cosine and Dot. [metric definitions][search] columns : Optional[List[str]] which columns to return in the results @@ -113,7 +113,7 @@ class Query(pydantic.BaseModel): k: Optional[int] = None # # metrics - metric: str = "L2" + metric: str = "l2" # which columns to return in the results columns: Optional[Union[List[str], Dict[str, str]]] = None @@ -597,7 +597,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): if self._limit is None: self._limit = 10 self._query = query - self._distance_type = "L2" + self._distance_type = "l2" self._nprobes = 20 self._lower_bound = None self._upper_bound = None @@ -608,7 +608,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): self._str_query = str_query self._fast_search = fast_search - def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder: + def metric(self, metric: Literal["l2", "cosine", "dot"]) -> LanceVectorQueryBuilder: """Set the distance metric to use. This is an alias for distance_type() and may be deprecated in the future. @@ -616,8 +616,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): Parameters ---------- - metric: "L2" or "cosine" or "dot" - The distance metric to use. By default "L2" is used. + metric: "l2" or "cosine" or "dot" + The distance metric to use. By default "l2" is used. Returns ------- @@ -627,7 +627,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): return self.distance_type(metric) def distance_type( - self, distance_type: Literal["L2", "cosine", "dot"] + self, distance_type: Literal["l2", "cosine", "dot"] ) -> "LanceVectorQueryBuilder": """Set the distance metric to use. @@ -641,8 +641,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): Parameters ---------- - distance_type: "L2" or "cosine" or "dot" - The distance metric to use. By default "L2" is used. + distance_type: "l2" or "cosine" or "dot" + The distance metric to use. By default "l2" is used. Returns ------- @@ -1414,7 +1414,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder): self._ef = ef return self - def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceHybridQueryBuilder: + def metric(self, metric: Literal["l2", "cosine", "dot"]) -> LanceHybridQueryBuilder: """Set the distance metric to use. This is an alias for distance_type() and may be deprecated in the future. @@ -1422,8 +1422,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder): Parameters ---------- - metric: "L2" or "cosine" or "dot" - The distance metric to use. By default "L2" is used. + metric: "l2" or "cosine" or "dot" + The distance metric to use. By default "l2" is used. Returns ------- @@ -1433,7 +1433,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder): return self.distance_type(metric) def distance_type( - self, distance_type: Literal["L2", "cosine", "dot"] + self, distance_type: Literal["l2", "cosine", "dot"] ) -> "LanceHybridQueryBuilder": """Set the distance metric to use. @@ -1447,8 +1447,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder): Parameters ---------- - distance_type: "L2" or "cosine" or "dot" - The distance metric to use. By default "L2" is used. + distance_type: "l2" or "cosine" or "dot" + The distance metric to use. By default "l2" is used. Returns ------- diff --git a/python/python/lancedb/remote/table.py b/python/python/lancedb/remote/table.py index 688ee8de..64fe5973 100644 --- a/python/python/lancedb/remote/table.py +++ b/python/python/lancedb/remote/table.py @@ -154,7 +154,7 @@ class RemoteTable(Table): def create_index( self, - metric="L2", + metric="l2", vector_column_name: str = VECTOR_COLUMN_NAME, index_cache_size: Optional[int] = None, num_partitions: Optional[int] = None, @@ -170,7 +170,7 @@ class RemoteTable(Table): Parameters ---------- metric : str - The metric to use for the index. Default is "L2". + The metric to use for the index. Default is "l2". vector_column_name : str The name of the vector column. Default is "vector". @@ -193,7 +193,7 @@ class RemoteTable(Table): ... table_name, # doctest: +SKIP ... schema=schema, # doctest: +SKIP ... ) - >>> table.create_index("L2", "vector") # doctest: +SKIP + >>> table.create_index("l2", "vector") # doctest: +SKIP """ if num_partitions is not None: diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index a4d7c40d..8b5f44c9 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -577,7 +577,7 @@ class Table(ABC): def create_index( self, - metric="L2", + metric="l2", num_partitions=256, num_sub_vectors=96, vector_column_name: str = VECTOR_COLUMN_NAME, @@ -596,10 +596,10 @@ class Table(ABC): Parameters ---------- - metric: str, default "L2" + metric: str, default "l2" The distance metric to use when creating the index. - Valid values are "L2", "cosine", "dot", or "hamming". - L2 is euclidean distance. + Valid values are "l2", "cosine", "dot", or "hamming". + l2 is euclidean distance. Hamming is available only for binary vectors. num_partitions: int, default 256 The number of IVF partitions to use when creating the index. diff --git a/python/python/tests/test_query.py b/python/python/tests/test_query.py index f98ba7ad..54c6c69b 100644 --- a/python/python/tests/test_query.py +++ b/python/python/tests/test_query.py @@ -452,7 +452,7 @@ def test_query_builder_with_metric(table): df_default = LanceVectorQueryBuilder(table, query, vector_column_name).to_pandas() df_l2 = ( LanceVectorQueryBuilder(table, query, vector_column_name) - .distance_type("L2") + .distance_type("l2") .to_pandas() ) tm.assert_frame_equal(df_default, df_l2) diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py index 0d0d0bed..d1da31ca 100644 --- a/python/python/tests/test_table.py +++ b/python/python/tests/test_table.py @@ -480,7 +480,7 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection): ) table.create_index( - metric="L2", + metric="l2", num_partitions=256, num_sub_vectors=96, vector_column_name="vector", @@ -489,7 +489,7 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection): num_bits=4, ) expected_config = IvfPq( - distance_type="L2", + distance_type="l2", num_partitions=256, num_sub_vectors=96, num_bits=4, @@ -1237,7 +1237,7 @@ def test_hybrid_search_metric_type(tmp_db: DBConnection): # This test uses an FTS index pytest.importorskip("lancedb.fts") - # Need to use nonnorm as the embedding function so L2 and dot results + # Need to use nonnorm as the embedding function so l2 and dot results # are different table, _, _ = setup_hybrid_search_table(tmp_db, "nonnorm") diff --git a/rust/lancedb/examples/ivf_pq.rs b/rust/lancedb/examples/ivf_pq.rs index a92f9486..9ce561c1 100644 --- a/rust/lancedb/examples/ivf_pq.rs +++ b/rust/lancedb/examples/ivf_pq.rs @@ -103,7 +103,7 @@ async fn create_index(table: &Table) -> Result<()> { // we are creating an index that my have better recall than the // default but is also larger and slower. IvfPqIndexBuilder::default() - // This overrides the default distance type of L2 + // This overrides the default distance type of l2 .distance_type(DistanceType::Cosine) // With 1000 rows this have been ~31 by default .num_partitions(50) diff --git a/rust/lancedb/src/lib.rs b/rust/lancedb/src/lib.rs index eefa951d..b19b7fc0 100644 --- a/rust/lancedb/src/lib.rs +++ b/rust/lancedb/src/lib.rs @@ -224,13 +224,13 @@ pub use table::Table; pub enum DistanceType { /// Euclidean distance. This is a very common distance metric that /// accounts for both magnitude and direction when determining the distance - /// between vectors. L2 distance has a range of [0, ∞). + /// between vectors. l2 distance has a range of [0, ∞). L2, /// Cosine distance. Cosine distance is a distance metric /// calculated from the cosine similarity between two vectors. Cosine /// similarity is a measure of similarity between two non-zero vectors of an /// inner product space. It is defined to equal the cosine of the angle - /// between them. Unlike L2, the cosine distance is not affected by the + /// between them. Unlike l2, the cosine distance is not affected by the /// magnitude of the vectors. Cosine distance has a range of [0, 2]. /// /// Note: the cosine distance is undefined when one (or both) of the vectors @@ -239,7 +239,7 @@ pub enum DistanceType { Cosine, /// Dot product. Dot distance is the dot product of two vectors. Dot /// distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their - /// L2 norm is 1), then dot distance is equivalent to the cosine distance. + /// l2 norm is 1), then dot distance is equivalent to the cosine distance. Dot, /// Hamming distance. Hamming distance is a distance metric that measures /// the number of positions at which the corresponding elements are different.