diff --git a/docs/src/js/classes/Index.md b/docs/src/js/classes/Index.md index 0ec245b2..372a1ac4 100644 --- a/docs/src/js/classes/Index.md +++ b/docs/src/js/classes/Index.md @@ -194,6 +194,37 @@ currently is also a memory intensive operation. *** +### ivfRq() + +```ts +static ivfRq(options?): Index +``` + +Create an IvfRq index + +IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization +and organizes them into IVF partitions. + +The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits. +The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff +between index size (and thus search speed) and index accuracy. + +The partitioning process is called IVF and the `num_partitions` parameter controls how +many groups to create. + +Note that training an IVF RQ index on a large dataset is a slow operation and +currently is also a memory intensive operation. + +#### Parameters + +* **options?**: `Partial`<[`IvfRqOptions`](../interfaces/IvfRqOptions.md)> + +#### Returns + +[`Index`](Index.md) + +*** + ### labelList() ```ts diff --git a/docs/src/js/globals.md b/docs/src/js/globals.md index 3f38cb4c..757e47e9 100644 --- a/docs/src/js/globals.md +++ b/docs/src/js/globals.md @@ -68,6 +68,7 @@ - [IndexStatistics](interfaces/IndexStatistics.md) - [IvfFlatOptions](interfaces/IvfFlatOptions.md) - [IvfPqOptions](interfaces/IvfPqOptions.md) +- [IvfRqOptions](interfaces/IvfRqOptions.md) - [MergeResult](interfaces/MergeResult.md) - [OpenTableOptions](interfaces/OpenTableOptions.md) - [OptimizeOptions](interfaces/OptimizeOptions.md) diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index 540f74b3..51e3a048 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -861,6 +861,15 @@ describe("When creating an index", () => { }); }); + it("should be able to create IVF_RQ", async () => { + await tbl.createIndex("vec", { + config: Index.ivfRq({ + numPartitions: 10, + numBits: 1, + }), + }); + }); + it("should allow me to replace (or not) an existing index", async () => { await tbl.createIndex("id"); // Default is replace=true diff --git a/nodejs/lancedb/index.ts b/nodejs/lancedb/index.ts index 5b45cf8d..57069221 100644 --- a/nodejs/lancedb/index.ts +++ b/nodejs/lancedb/index.ts @@ -85,6 +85,7 @@ export { Index, IndexOptions, IvfPqOptions, + IvfRqOptions, IvfFlatOptions, HnswPqOptions, HnswSqOptions, diff --git a/nodejs/lancedb/indices.ts b/nodejs/lancedb/indices.ts index 6c0fc3fe..b15106c2 100644 --- a/nodejs/lancedb/indices.ts +++ b/nodejs/lancedb/indices.ts @@ -112,6 +112,77 @@ export interface IvfPqOptions { sampleRate?: number; } +export interface IvfRqOptions { + /** + * The number of IVF partitions to create. + * + * This value should generally scale with the number of rows in the dataset. + * By default the number of partitions is the square root of the number of + * rows. + * + * If this value is too large then the first part of the search (picking the + * right partition) will be slow. If this value is too small then the second + * part of the search (searching within a partition) will be slow. + */ + numPartitions?: number; + + /** + * Number of bits per dimension for residual quantization. + * + * This value controls how much each residual component is compressed. The more + * bits, the more accurate the index will be but the slower search. Typical values + * are small integers; the default is 1 bit per dimension. + */ + numBits?: number; + + /** + * Distance type to use to build the index. + * + * Default value is "l2". + * + * This is used when training the index to calculate the IVF partitions + * (vectors are grouped in partitions with similar vectors according to this + * distance type) and during quantization. + * + * The distance type used to train an index MUST match the distance type used + * to search the index. Failure to do so will yield inaccurate results. + * + * The following distance types are available: + * + * "l2" - Euclidean distance. + * "cosine" - Cosine distance. + * "dot" - Dot product. + */ + distanceType?: "l2" | "cosine" | "dot"; + + /** + * Max iterations to train IVF kmeans. + * + * When training an IVF index we use kmeans to calculate the partitions. This parameter + * controls how many iterations of kmeans to run. + * + * The default value is 50. + */ + maxIterations?: number; + + /** + * The number of vectors, per partition, to sample when training IVF kmeans. + * + * When an IVF index is trained, we need to calculate partitions. These are groups + * of vectors that are similar to each other. To do this we use an algorithm called kmeans. + * + * Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a + * random sample of the data. This parameter controls the size of the sample. The total + * number of vectors used to train the index is `sample_rate * num_partitions`. + * + * Increasing this value might improve the quality of the index but in most cases the + * default should be sufficient. + * + * The default value is 256. + */ + sampleRate?: number; +} + /** * Options to create an `HNSW_PQ` index */ @@ -523,6 +594,35 @@ export class Index { options?.distanceType, options?.numPartitions, options?.numSubVectors, + options?.numBits, + options?.maxIterations, + options?.sampleRate, + ), + ); + } + + /** + * Create an IvfRq index + * + * IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization + * and organizes them into IVF partitions. + * + * The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits. + * The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff + * between index size (and thus search speed) and index accuracy. + * + * The partitioning process is called IVF and the `num_partitions` parameter controls how + * many groups to create. + * + * Note that training an IVF RQ index on a large dataset is a slow operation and + * currently is also a memory intensive operation. + */ + static ivfRq(options?: Partial) { + return new Index( + LanceDbIndex.ivfRq( + options?.distanceType, + options?.numPartitions, + options?.numBits, options?.maxIterations, options?.sampleRate, ), diff --git a/nodejs/src/index.rs b/nodejs/src/index.rs index 37b775f6..bf8b280a 100644 --- a/nodejs/src/index.rs +++ b/nodejs/src/index.rs @@ -6,6 +6,7 @@ use std::sync::Mutex; use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder}; use lancedb::index::vector::{ IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder, + IvfRqIndexBuilder, }; use lancedb::index::Index as LanceDbIndex; use napi_derive::napi; @@ -65,6 +66,36 @@ impl Index { }) } + #[napi(factory)] + pub fn ivf_rq( + distance_type: Option, + num_partitions: Option, + num_bits: Option, + max_iterations: Option, + sample_rate: Option, + ) -> napi::Result { + let mut ivf_rq_builder = IvfRqIndexBuilder::default(); + if let Some(distance_type) = distance_type { + let distance_type = parse_distance_type(distance_type)?; + ivf_rq_builder = ivf_rq_builder.distance_type(distance_type); + } + if let Some(num_partitions) = num_partitions { + ivf_rq_builder = ivf_rq_builder.num_partitions(num_partitions); + } + if let Some(num_bits) = num_bits { + ivf_rq_builder = ivf_rq_builder.num_bits(num_bits); + } + if let Some(max_iterations) = max_iterations { + ivf_rq_builder = ivf_rq_builder.max_iterations(max_iterations); + } + if let Some(sample_rate) = sample_rate { + ivf_rq_builder = ivf_rq_builder.sample_rate(sample_rate); + } + Ok(Self { + inner: Mutex::new(Some(LanceDbIndex::IvfRq(ivf_rq_builder))), + }) + } + #[napi(factory)] pub fn ivf_flat( distance_type: Option, diff --git a/python/python/lancedb/index.py b/python/python/lancedb/index.py index 0ddb29c9..020e8e05 100644 --- a/python/python/lancedb/index.py +++ b/python/python/lancedb/index.py @@ -605,9 +605,53 @@ class IvfPq: target_partition_size: Optional[int] = None +@dataclass +class IvfRq: + """Describes an IVF RQ Index + + IVF-RQ (Residual Quantization) stores a compressed copy of each vector using + residual quantization and organizes them into IVF partitions. Parameters + largely mirror IVF-PQ for consistency. + + Attributes + ---------- + distance_type: str, default "l2" + Distance metric used to train the index and for quantization. + + The following distance types are available: + + "l2" - Euclidean distance. + "cosine" - Cosine distance. + "dot" - Dot product. + + num_partitions: int, default sqrt(num_rows) + Number of IVF partitions to create. + + num_bits: int, default 1 + Number of bits to encode each dimension. + + max_iterations: int, default 50 + Max iterations to train kmeans when computing IVF partitions. + + sample_rate: int, default 256 + Controls the number of training vectors: sample_rate * num_partitions. + + target_partition_size, default is 8192 + Target size of each partition. + """ + + distance_type: Literal["l2", "cosine", "dot"] = "l2" + num_partitions: Optional[int] = None + num_bits: int = 1 + max_iterations: int = 50 + sample_rate: int = 256 + target_partition_size: Optional[int] = None + + __all__ = [ "BTree", "IvfPq", + "IvfRq", "IvfFlat", "HnswPq", "HnswSq", diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 45133567..7ee0bf01 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -44,7 +44,7 @@ import numpy as np from .common import DATA, VEC, VECTOR_COLUMN_NAME from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry -from .index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS +from .index import BTree, IvfFlat, IvfPq, Bitmap, IvfRq, LabelList, HnswPq, HnswSq, FTS from .merge import LanceMergeInsertBuilder from .pydantic import LanceModel, model_to_dict from .query import ( @@ -1991,7 +1991,7 @@ class LanceTable(Table): index_cache_size: Optional[int] = None, num_bits: int = 8, index_type: Literal[ - "IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ" + "IVF_FLAT", "IVF_PQ", "IVF_RQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ" ] = "IVF_PQ", max_iterations: int = 50, sample_rate: int = 256, @@ -2039,6 +2039,15 @@ class LanceTable(Table): sample_rate=sample_rate, target_partition_size=target_partition_size, ) + elif index_type == "IVF_RQ": + config = IvfRq( + distance_type=metric, + num_partitions=num_partitions, + num_bits=num_bits, + max_iterations=max_iterations, + sample_rate=sample_rate, + target_partition_size=target_partition_size, + ) elif index_type == "IVF_HNSW_PQ": config = HnswPq( distance_type=metric, @@ -3330,7 +3339,7 @@ class AsyncTable: *, replace: Optional[bool] = None, config: Optional[ - Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS] + Union[IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS] ] = None, wait_timeout: Optional[timedelta] = None, name: Optional[str] = None, @@ -3369,11 +3378,12 @@ class AsyncTable: """ if config is not None: if not isinstance( - config, (IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS) + config, + (IvfFlat, IvfPq, IvfRq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS), ): raise TypeError( - "config must be an instance of IvfPq, HnswPq, HnswSq, BTree," - " Bitmap, LabelList, or FTS" + "config must be an instance of IvfPq, IvfRq, HnswPq, HnswSq, BTree," + " Bitmap, LabelList, or FTS, but got " + str(type(config)) ) try: await self._inner.create_index( diff --git a/python/python/lancedb/types.py b/python/python/lancedb/types.py index 61df76c4..6ca72de2 100644 --- a/python/python/lancedb/types.py +++ b/python/python/lancedb/types.py @@ -18,10 +18,17 @@ AddMode = Literal["append", "overwrite"] CreateMode = Literal["create", "overwrite"] # Index type literals -VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ"] +VectorIndexType = Literal["IVF_FLAT", "IVF_PQ", "IVF_HNSW_SQ", "IVF_HNSW_PQ", "IVF_RQ"] ScalarIndexType = Literal["BTREE", "BITMAP", "LABEL_LIST"] IndexType = Literal[ - "IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST" + "IVF_PQ", + "IVF_HNSW_PQ", + "IVF_HNSW_SQ", + "FTS", + "BTREE", + "BITMAP", + "LABEL_LIST", + "IVF_RQ", ] # Tokenizer literals diff --git a/python/python/tests/test_index.py b/python/python/tests/test_index.py index 51e7c378..4c594edf 100644 --- a/python/python/tests/test_index.py +++ b/python/python/tests/test_index.py @@ -8,7 +8,17 @@ import pyarrow as pa import pytest import pytest_asyncio from lancedb import AsyncConnection, AsyncTable, connect_async -from lancedb.index import BTree, IvfFlat, IvfPq, Bitmap, LabelList, HnswPq, HnswSq, FTS +from lancedb.index import ( + BTree, + IvfFlat, + IvfPq, + IvfRq, + Bitmap, + LabelList, + HnswPq, + HnswSq, + FTS, +) @pytest_asyncio.fixture @@ -195,6 +205,16 @@ async def test_create_4bit_ivfpq_index(some_table: AsyncTable): assert stats.loss >= 0.0 +@pytest.mark.asyncio +async def test_create_ivfrq_index(some_table: AsyncTable): + await some_table.create_index("vector", config=IvfRq(num_bits=1)) + indices = await some_table.list_indices() + assert len(indices) == 1 + assert indices[0].index_type == "IvfRq" + assert indices[0].columns == ["vector"] + assert indices[0].name == "vector_idx" + + @pytest.mark.asyncio async def test_create_hnswpq_index(some_table: AsyncTable): await some_table.create_index("vector", config=HnswPq(num_partitions=10)) diff --git a/python/src/index.rs b/python/src/index.rs index e0eb927a..82f61c2e 100644 --- a/python/src/index.rs +++ b/python/src/index.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors -use lancedb::index::vector::IvfFlatIndexBuilder; +use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder}; use lancedb::index::{ scalar::{BTreeIndexBuilder, FtsIndexBuilder}, vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder}, @@ -87,6 +87,22 @@ pub fn extract_index_params(source: &Option>) -> PyResult { + let params = source.extract::()?; + let distance_type = parse_distance_type(params.distance_type)?; + let mut ivf_rq_builder = IvfRqIndexBuilder::default() + .distance_type(distance_type) + .max_iterations(params.max_iterations) + .sample_rate(params.sample_rate) + .num_bits(params.num_bits); + if let Some(num_partitions) = params.num_partitions { + ivf_rq_builder = ivf_rq_builder.num_partitions(num_partitions); + } + if let Some(target_partition_size) = params.target_partition_size { + ivf_rq_builder = ivf_rq_builder.target_partition_size(target_partition_size); + } + Ok(LanceDbIndex::IvfRq(ivf_rq_builder)) + }, "HnswPq" => { let params = source.extract::()?; let distance_type = parse_distance_type(params.distance_type)?; @@ -170,6 +186,16 @@ struct IvfPqParams { target_partition_size: Option, } +#[derive(FromPyObject)] +struct IvfRqParams { + distance_type: String, + num_partitions: Option, + num_bits: u32, + max_iterations: u32, + sample_rate: u32, + target_partition_size: Option, +} + #[derive(FromPyObject)] struct IvfHnswPqParams { distance_type: String, diff --git a/rust/lancedb/src/index.rs b/rust/lancedb/src/index.rs index c3938645..83814ffa 100644 --- a/rust/lancedb/src/index.rs +++ b/rust/lancedb/src/index.rs @@ -8,6 +8,7 @@ use std::sync::Arc; use std::time::Duration; use vector::IvfFlatIndexBuilder; +use crate::index::vector::IvfRqIndexBuilder; use crate::{table::BaseTable, DistanceType, Error, Result}; use self::{ @@ -53,6 +54,9 @@ pub enum Index { /// IVF index with Product Quantization IvfPq(IvfPqIndexBuilder), + /// IVF index with RabitQ Quantization + IvfRq(IvfRqIndexBuilder), + /// IVF-HNSW index with Product Quantization /// It is a variant of the HNSW algorithm that uses product quantization to compress the vectors. IvfHnswPq(IvfHnswPqIndexBuilder), @@ -275,6 +279,8 @@ pub enum IndexType { IvfFlat, #[serde(alias = "IVF_PQ")] IvfPq, + #[serde(alias = "IVF_RQ")] + IvfRq, #[serde(alias = "IVF_HNSW_PQ")] IvfHnswPq, #[serde(alias = "IVF_HNSW_SQ")] @@ -296,6 +302,7 @@ impl std::fmt::Display for IndexType { match self { Self::IvfFlat => write!(f, "IVF_FLAT"), Self::IvfPq => write!(f, "IVF_PQ"), + Self::IvfRq => write!(f, "IVF_RQ"), Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"), Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"), Self::BTree => write!(f, "BTREE"), @@ -317,6 +324,7 @@ impl std::str::FromStr for IndexType { "FTS" | "INVERTED" => Ok(Self::FTS), "IVF_FLAT" => Ok(Self::IvfFlat), "IVF_PQ" => Ok(Self::IvfPq), + "IVF_RQ" => Ok(Self::IvfRq), "IVF_HNSW_PQ" => Ok(Self::IvfHnswPq), "IVF_HNSW_SQ" => Ok(Self::IvfHnswSq), _ => Err(Error::InvalidInput { diff --git a/rust/lancedb/src/index/vector.rs b/rust/lancedb/src/index/vector.rs index bf16ec6c..d23c7cc1 100644 --- a/rust/lancedb/src/index/vector.rs +++ b/rust/lancedb/src/index/vector.rs @@ -291,6 +291,52 @@ pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 { } } +/// Builder for an IVF RQ index. +/// +/// This index stores a compressed (quantized) copy of every vector. Each dimension +/// is quantized into a small number of bits. +/// The parameters `num_bits` control this process, providing a tradeoff +/// between index size (and thus search speed) and index accuracy. +/// +/// The partitioning process is called IVF and the `num_partitions` parameter controls how +/// many groups to create. +/// +/// Note that training an IVF RQ index on a large dataset is a slow operation and +/// currently is also a memory intensive operation. +#[derive(Debug, Clone)] +pub struct IvfRqIndexBuilder { + // IVF + pub(crate) distance_type: DistanceType, + pub(crate) num_partitions: Option, + pub(crate) num_bits: Option, + pub(crate) sample_rate: u32, + pub(crate) max_iterations: u32, + pub(crate) target_partition_size: Option, +} + +impl Default for IvfRqIndexBuilder { + fn default() -> Self { + Self { + distance_type: DistanceType::L2, + num_partitions: None, + num_bits: None, + sample_rate: 256, + max_iterations: 50, + target_partition_size: None, + } + } +} + +impl IvfRqIndexBuilder { + impl_distance_type_setter!(); + impl_ivf_params_setter!(); + + pub fn num_bits(mut self, num_bits: u32) -> Self { + self.num_bits = Some(num_bits); + self + } +} + /// Builder for an IVF HNSW PQ index. /// /// This index is a combination of IVF and HNSW. diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index fa2d5108..60c601f1 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -1843,6 +1843,18 @@ impl NativeTable { ); Ok(Box::new(lance_idx_params)) } + Index::IvfRq(index) => { + Self::validate_index_type(field, "IVF RQ", supported_vector_data_type)?; + let num_partitions = self + .get_num_partitions(index.num_partitions, false, None) + .await?; + let lance_idx_params = VectorIndexParams::ivf_rq( + num_partitions as usize, + index.num_bits.unwrap_or(1) as u8, + index.distance_type.into(), + ); + Ok(Box::new(lance_idx_params)) + } Index::IvfHnswPq(index) => { Self::validate_index_type(field, "IVF HNSW PQ", supported_vector_data_type)?; let dim = Self::get_vector_dimension(field)?; @@ -1912,9 +1924,11 @@ impl NativeTable { Index::Bitmap(_) => IndexType::Bitmap, Index::LabelList(_) => IndexType::LabelList, Index::FTS(_) => IndexType::Inverted, - Index::IvfFlat(_) | Index::IvfPq(_) | Index::IvfHnswPq(_) | Index::IvfHnswSq(_) => { - IndexType::Vector - } + Index::IvfFlat(_) + | Index::IvfPq(_) + | Index::IvfRq(_) + | Index::IvfHnswPq(_) + | Index::IvfHnswSq(_) => IndexType::Vector, } }