From 3324e7d5255591776e28ad2b9341b5343feb10c1 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Tue, 10 Dec 2024 10:36:03 +0800 Subject: [PATCH] feat: support 4bit PQ (#1916) --- docs/src/ann_indexes.md | 7 ++++--- nodejs/__test__/table.test.ts | 9 +++++++++ nodejs/lancedb/indices.ts | 10 ++++++++++ nodejs/src/index.rs | 4 ++++ python/python/lancedb/index.py | 16 ++++++++++++++++ python/python/lancedb/table.py | 8 ++++++++ python/python/tests/test_index.py | 23 +++++++++++++++++++++++ python/python/tests/test_table.py | 1 + python/src/index.rs | 13 +++++++++++-- rust/lancedb/src/index/vector.rs | 8 ++++++++ 10 files changed, 94 insertions(+), 5 deletions(-) diff --git a/docs/src/ann_indexes.md b/docs/src/ann_indexes.md index 2d80c48e..8d293fa4 100644 --- a/docs/src/ann_indexes.md +++ b/docs/src/ann_indexes.md @@ -83,6 +83,7 @@ The following IVF_PQ paramters can be specified: - **num_sub_vectors**: The number of sub-vectors (M) that will be created during Product Quantization (PQ). For D dimensional vector, it will be divided into `M` subvectors with dimension `D/M`, each of which is replaced by a single PQ code. The default is the dimension of the vector divided by 16. +- **num_bits**: The number of bits used to encode each sub-vector. Only 4 and 8 are supported. The higher the number of bits, the higher the accuracy of the index, also the slower search. The default is 8. !!! note @@ -142,11 +143,11 @@ There are a couple of parameters that can be used to fine-tune the search: - **nprobes** (default: 20): The number of probes used. A higher number makes search more accurate but also slower.
Most of the time, setting nprobes to cover 5-15% of the dataset should achieve high recall with low latency.
- _For example_, For a dataset of 1 million vectors divided into 256 partitions, `nprobes` should be set to ~20-40. This value can be adjusted to achieve the optimal balance between search latency and search quality.
- + - **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.
A higher number makes search more accurate but also slower. If you find the recall is less than ideal, try refine_factor=10 to start.
- _For example_, For a dataset of 1 million vectors divided into 256 partitions, setting the `refine_factor` to 200 will initially retrieve the top 4,000 candidates (top k * refine_factor) from all searched partitions. These candidates are then reranked to determine the final top 20 results.
-!!! note +!!! note Both `nprobes` and `refine_factor` are only applicable if an ANN index is present. If specified on a table without an ANN index, those parameters are ignored. @@ -288,4 +289,4 @@ less space distortion, and thus yields better accuracy. However, a higher `num_s `m` determines the number of connections a new node establishes with its closest neighbors upon entering the graph. Typically, `m` falls within the range of 5 to 48. Lower `m` values are suitable for low-dimensional data or scenarios where recall is less critical. Conversely, higher `m` values are beneficial for high-dimensional data or when high recall is required. In essence, a larger `m` results in a denser graph with increased connectivity, but at the expense of higher memory consumption. -`ef_construction` balances build speed and accuracy. Higher values increase accuracy but slow down the build process. A typical range is 150 to 300. For good search results, a minimum value of 100 is recommended. In most cases, setting this value above 500 offers no additional benefit. Ensure that `ef_construction` is always set to a value equal to or greater than `ef` in the search phase \ No newline at end of file +`ef_construction` balances build speed and accuracy. Higher values increase accuracy but slow down the build process. A typical range is 150 to 300. For good search results, a minimum value of 100 is recommended. In most cases, setting this value above 500 offers no additional benefit. Ensure that `ef_construction` is always set to a value equal to or greater than `ef` in the search phase diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index 4e8faa29..44368827 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -567,6 +567,15 @@ describe("When creating an index", () => { // TODO: Verify parameters when we can load index config as part of list indices }); + it("should be able to create 4bit IVF_PQ", async () => { + await tbl.createIndex("vec", { + config: Index.ivfPq({ + numPartitions: 10, + numBits: 4, + }), + }); + }); + it("should allow me to replace (or not) an existing index", async () => { await tbl.createIndex("id"); // Default is replace=true diff --git a/nodejs/lancedb/indices.ts b/nodejs/lancedb/indices.ts index e7ce10b9..9001b732 100644 --- a/nodejs/lancedb/indices.ts +++ b/nodejs/lancedb/indices.ts @@ -47,6 +47,16 @@ export interface IvfPqOptions { */ numSubVectors?: number; + /** + * Number of bits per sub-vector. + * + * This value controls how much each subvector is compressed. The more bits the more + * accurate the index will be but the slower search. The default is 8 bits. + * + * The number of bits must be 4 or 8. + */ + numBits?: number; + /** * Distance type to use to build the index. * diff --git a/nodejs/src/index.rs b/nodejs/src/index.rs index c828f20c..99bfdb45 100644 --- a/nodejs/src/index.rs +++ b/nodejs/src/index.rs @@ -45,6 +45,7 @@ impl Index { distance_type: Option, num_partitions: Option, num_sub_vectors: Option, + num_bits: Option, max_iterations: Option, sample_rate: Option, ) -> napi::Result { @@ -59,6 +60,9 @@ impl Index { if let Some(num_sub_vectors) = num_sub_vectors { ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors); } + if let Some(num_bits) = num_bits { + ivf_pq_builder = ivf_pq_builder.num_bits(num_bits); + } if let Some(max_iterations) = max_iterations { ivf_pq_builder = ivf_pq_builder.max_iterations(max_iterations); } diff --git a/python/python/lancedb/index.py b/python/python/lancedb/index.py index 598d761e..55fa0e82 100644 --- a/python/python/lancedb/index.py +++ b/python/python/lancedb/index.py @@ -178,6 +178,12 @@ class HnswPq: If the dimension is not visible by 8 then we use 1 subvector. This is not ideal and will likely result in poor performance. + num_bits: int, default 8 + Number of bits to encode each sub-vector. + + This value controls how much the sub-vectors are compressed. The more bits + the more accurate the index but the slower search. Only 4 and 8 are supported. + max_iterations, default 50 Max iterations to train kmeans. @@ -232,6 +238,7 @@ class HnswPq: distance_type: Optional[str] = None, num_partitions: Optional[int] = None, num_sub_vectors: Optional[int] = None, + num_bits: Optional[int] = None, max_iterations: Optional[int] = None, sample_rate: Optional[int] = None, m: Optional[int] = None, @@ -241,6 +248,7 @@ class HnswPq: distance_type=distance_type, num_partitions=num_partitions, num_sub_vectors=num_sub_vectors, + num_bits=num_bits, max_iterations=max_iterations, sample_rate=sample_rate, m=m, @@ -387,6 +395,7 @@ class IvfPq: distance_type: Optional[str] = None, num_partitions: Optional[int] = None, num_sub_vectors: Optional[int] = None, + num_bits: Optional[int] = None, max_iterations: Optional[int] = None, sample_rate: Optional[int] = None, ): @@ -449,6 +458,12 @@ class IvfPq: If the dimension is not visible by 8 then we use 1 subvector. This is not ideal and will likely result in poor performance. + num_bits: int, default 8 + Number of bits to encode each sub-vector. + + This value controls how much the sub-vectors are compressed. The more bits + the more accurate the index but the slower search. The default is 8 + bits. Only 4 and 8 are supported. max_iterations: int, default 50 Max iteration to train kmeans. @@ -482,6 +497,7 @@ class IvfPq: distance_type=distance_type, num_partitions=num_partitions, num_sub_vectors=num_sub_vectors, + num_bits=num_bits, max_iterations=max_iterations, sample_rate=sample_rate, ) diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 07c4f17e..3a82f9fa 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -413,6 +413,8 @@ class Table(ABC): replace: bool = True, accelerator: Optional[str] = None, index_cache_size: Optional[int] = None, + *, + num_bits: int = 8, ): """Create an index on the table. @@ -439,6 +441,9 @@ class Table(ABC): Only support "cuda" for now. index_cache_size : int, optional The size of the index cache in number of entries. Default value is 256. + num_bits: int + The number of bits to encode sub-vectors. Only used with the IVF_PQ index. + Only 4 and 8 are supported. """ raise NotImplementedError @@ -1430,6 +1435,8 @@ class LanceTable(Table): accelerator: Optional[str] = None, index_cache_size: Optional[int] = None, index_type="IVF_PQ", + *, + num_bits: int = 8, ): """Create an index on the table.""" self._dataset_mut.create_index( @@ -1441,6 +1448,7 @@ class LanceTable(Table): replace=replace, accelerator=accelerator, index_cache_size=index_cache_size, + num_bits=num_bits, ) def create_scalar_index( diff --git a/python/python/tests/test_index.py b/python/python/tests/test_index.py index 3268179b..4c0caf7e 100644 --- a/python/python/tests/test_index.py +++ b/python/python/tests/test_index.py @@ -108,6 +108,29 @@ async def test_create_vector_index(some_table: AsyncTable): assert stats.num_indices == 1 +@pytest.mark.asyncio +async def test_create_4bit_ivfpq_index(some_table: AsyncTable): + # Can create + await some_table.create_index("vector", config=IvfPq(num_bits=4)) + # Can recreate if replace=True + await some_table.create_index("vector", config=IvfPq(num_bits=4), replace=True) + # Can't recreate if replace=False + with pytest.raises(RuntimeError, match="already exists"): + await some_table.create_index("vector", replace=False) + indices = await some_table.list_indices() + assert len(indices) == 1 + assert indices[0].index_type == "IvfPq" + assert indices[0].columns == ["vector"] + assert indices[0].name == "vector_idx" + + stats = await some_table.index_stats("vector_idx") + assert stats.index_type == "IVF_PQ" + assert stats.distance_type == "l2" + assert stats.num_indexed_rows == await some_table.count_rows() + assert stats.num_unindexed_rows == 0 + assert stats.num_indices == 1 + + @pytest.mark.asyncio async def test_create_hnswpq_index(some_table: AsyncTable): await some_table.create_index("vector", config=HnswPq(num_partitions=10)) diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py index d1b44c50..7a8bb552 100644 --- a/python/python/tests/test_table.py +++ b/python/python/tests/test_table.py @@ -530,6 +530,7 @@ def test_create_index_method(): replace=True, accelerator=None, index_cache_size=256, + num_bits=8, ) diff --git a/python/src/index.rs b/python/src/index.rs index fd09d847..1e9ff260 100644 --- a/python/src/index.rs +++ b/python/src/index.rs @@ -47,12 +47,13 @@ impl Index { #[pymethods] impl Index { - #[pyo3(signature = (distance_type=None, num_partitions=None, num_sub_vectors=None, max_iterations=None, sample_rate=None))] + #[pyo3(signature = (distance_type=None, num_partitions=None, num_sub_vectors=None,num_bits=None, max_iterations=None, sample_rate=None))] #[staticmethod] pub fn ivf_pq( distance_type: Option, num_partitions: Option, num_sub_vectors: Option, + num_bits: Option, max_iterations: Option, sample_rate: Option, ) -> PyResult { @@ -75,6 +76,9 @@ impl Index { if let Some(num_sub_vectors) = num_sub_vectors { ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors); } + if let Some(num_bits) = num_bits { + ivf_pq_builder = ivf_pq_builder.num_bits(num_bits); + } if let Some(max_iterations) = max_iterations { ivf_pq_builder = ivf_pq_builder.max_iterations(max_iterations); } @@ -148,12 +152,14 @@ impl Index { } } - #[pyo3(signature = (distance_type=None, num_partitions=None, num_sub_vectors=None, max_iterations=None, sample_rate=None, m=None, ef_construction=None))] + #[pyo3(signature = (distance_type=None, num_partitions=None, num_sub_vectors=None,num_bits=None, max_iterations=None, sample_rate=None, m=None, ef_construction=None))] #[staticmethod] + #[allow(clippy::too_many_arguments)] pub fn hnsw_pq( distance_type: Option, num_partitions: Option, num_sub_vectors: Option, + num_bits: Option, max_iterations: Option, sample_rate: Option, m: Option, @@ -170,6 +176,9 @@ impl Index { if let Some(num_sub_vectors) = num_sub_vectors { hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors); } + if let Some(num_bits) = num_bits { + hnsw_pq_builder = hnsw_pq_builder.num_bits(num_bits); + } if let Some(max_iterations) = max_iterations { hnsw_pq_builder = hnsw_pq_builder.max_iterations(max_iterations); } diff --git a/rust/lancedb/src/index/vector.rs b/rust/lancedb/src/index/vector.rs index ddbbc7e8..f338026c 100644 --- a/rust/lancedb/src/index/vector.rs +++ b/rust/lancedb/src/index/vector.rs @@ -132,6 +132,10 @@ macro_rules! impl_pq_params_setter { self.num_sub_vectors = Some(num_sub_vectors); self } + pub fn num_bits(mut self, num_bits: u32) -> Self { + self.num_bits = Some(num_bits); + self + } }; } @@ -189,6 +193,7 @@ pub struct IvfPqIndexBuilder { // PQ pub(crate) num_sub_vectors: Option, + pub(crate) num_bits: Option, } impl Default for IvfPqIndexBuilder { @@ -197,6 +202,7 @@ impl Default for IvfPqIndexBuilder { distance_type: DistanceType::L2, num_partitions: None, num_sub_vectors: None, + num_bits: None, sample_rate: 256, max_iterations: 50, } @@ -256,6 +262,7 @@ pub struct IvfHnswPqIndexBuilder { // PQ pub(crate) num_sub_vectors: Option, + pub(crate) num_bits: Option, } impl Default for IvfHnswPqIndexBuilder { @@ -264,6 +271,7 @@ impl Default for IvfHnswPqIndexBuilder { distance_type: DistanceType::L2, num_partitions: None, num_sub_vectors: None, + num_bits: None, sample_rate: 256, max_iterations: 50, m: 20,