From 3324e7d5255591776e28ad2b9341b5343feb10c1 Mon Sep 17 00:00:00 2001
From: BubbleCal <bubble-cal@outlook.com>
Date: Tue, 10 Dec 2024 10:36:03 +0800
Subject: [PATCH] feat: support 4bit PQ (#1916)

---
 docs/src/ann_indexes.md           |  7 ++++---
 nodejs/__test__/table.test.ts     |  9 +++++++++
 nodejs/lancedb/indices.ts         | 10 ++++++++++
 nodejs/src/index.rs               |  4 ++++
 python/python/lancedb/index.py    | 16 ++++++++++++++++
 python/python/lancedb/table.py    |  8 ++++++++
 python/python/tests/test_index.py | 23 +++++++++++++++++++++++
 python/python/tests/test_table.py |  1 +
 python/src/index.rs               | 13 +++++++++++--
 rust/lancedb/src/index/vector.rs  |  8 ++++++++
 10 files changed, 94 insertions(+), 5 deletions(-)
diff --git a/docs/src/ann_indexes.md b/docs/src/ann_indexes.md
index 2d80c48e..8d293fa4 100644
--- a/docs/src/ann_indexes.md
+++ b/docs/src/ann_indexes.md
@@ -83,6 +83,7 @@ The following IVF_PQ paramters can be specified:
 - **num_sub_vectors**: The number of sub-vectors (M) that will be created during Product Quantization (PQ).
   For D dimensional vector, it will be divided into `M` subvectors with dimension `D/M`, each of which is replaced by
   a single PQ code. The default is the dimension of the vector divided by 16.
+- **num_bits**: The number of bits used to encode each sub-vector. Only 4 and 8 are supported. The higher the number of bits, the higher the accuracy of the index, also the slower search. The default is 8.
 
 !!! note
 
@@ -142,11 +143,11 @@ There are a couple of parameters that can be used to fine-tune the search:
 - **nprobes** (default: 20): The number of probes used. A higher number makes search more accurate but also slower.<br/>
   Most of the time, setting nprobes to cover 5-15% of the dataset should achieve high recall with low latency.<br/>
     - _For example_, For a dataset of 1 million vectors divided into 256 partitions, `nprobes` should be set to ~20-40. This value can be adjusted to achieve the optimal balance between search latency and search quality. <br/>
-  
+
 - **refine_factor** (default: None): Refine the results by reading extra elements and re-ranking them in memory.<br/>
   A higher number makes search more accurate but also slower. If you find the recall is less than ideal, try refine_factor=10 to start.<br/>
     - _For example_, For a dataset of 1 million vectors divided into 256 partitions, setting the `refine_factor` to 200 will initially retrieve the top 4,000 candidates (top k * refine_factor) from all searched partitions. These candidates are then reranked to determine the final top 20 results.<br/>
-!!! note 
+!!! note
     Both `nprobes` and `refine_factor` are only applicable if an ANN index is present. If specified on a table without an ANN index, those parameters are ignored.
 
 
@@ -288,4 +289,4 @@ less space distortion, and thus yields better accuracy. However, a higher `num_s
 
 `m` determines the number of connections a new node establishes with its closest neighbors upon entering the graph. Typically, `m` falls within the range of 5 to 48. Lower `m` values are suitable for low-dimensional data or scenarios where recall is less critical. Conversely, higher `m` values are beneficial for high-dimensional data or when high recall is required. In essence, a larger `m` results in a denser graph with increased connectivity, but at the expense of higher memory consumption.
 
-`ef_construction` balances build speed and accuracy. Higher values increase accuracy but slow down the build process. A typical range is 150 to 300. For good search results, a minimum value of 100 is recommended. In most cases, setting this value above 500 offers no additional benefit. Ensure that `ef_construction` is always set to a value equal to or greater than `ef` in the search phase
\ No newline at end of file
+`ef_construction` balances build speed and accuracy. Higher values increase accuracy but slow down the build process. A typical range is 150 to 300. For good search results, a minimum value of 100 is recommended. In most cases, setting this value above 500 offers no additional benefit. Ensure that `ef_construction` is always set to a value equal to or greater than `ef` in the search phase
diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts
index 4e8faa29..44368827 100644
--- a/nodejs/__test__/table.test.ts
+++ b/nodejs/__test__/table.test.ts
@@ -567,6 +567,15 @@ describe("When creating an index", () => {
     // TODO: Verify parameters when we can load index config as part of list indices
   });
 
+  it("should be able to create 4bit IVF_PQ", async () => {
+    await tbl.createIndex("vec", {
+      config: Index.ivfPq({
+        numPartitions: 10,
+        numBits: 4,
+      }),
+    });
+  });
+
   it("should allow me to replace (or not) an existing index", async () => {
     await tbl.createIndex("id");
     // Default is replace=true
diff --git a/nodejs/lancedb/indices.ts b/nodejs/lancedb/indices.ts
index e7ce10b9..9001b732 100644
--- a/nodejs/lancedb/indices.ts
+++ b/nodejs/lancedb/indices.ts
@@ -47,6 +47,16 @@ export interface IvfPqOptions {
    */
   numSubVectors?: number;
 
+  /**
+   * Number of bits per sub-vector.
+   *
+   * This value controls how much each subvector is compressed.  The more bits the more
+   * accurate the index will be but the slower search.  The default is 8 bits.
+   *
+   * The number of bits must be 4 or 8.
+   */
+  numBits?: number;
+
   /**
    * Distance type to use to build the index.
    *
diff --git a/nodejs/src/index.rs b/nodejs/src/index.rs
index c828f20c..99bfdb45 100644
--- a/nodejs/src/index.rs
+++ b/nodejs/src/index.rs
@@ -45,6 +45,7 @@ impl Index {
         distance_type: Option<String>,
         num_partitions: Option<u32>,
         num_sub_vectors: Option<u32>,
+        num_bits: Option<u32>,
         max_iterations: Option<u32>,
         sample_rate: Option<u32>,
     ) -> napi::Result<Self> {
@@ -59,6 +60,9 @@ impl Index {
         if let Some(num_sub_vectors) = num_sub_vectors {
             ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
         }
+        if let Some(num_bits) = num_bits {
+            ivf_pq_builder = ivf_pq_builder.num_bits(num_bits);
+        }
         if let Some(max_iterations) = max_iterations {
             ivf_pq_builder = ivf_pq_builder.max_iterations(max_iterations);
         }
diff --git a/python/python/lancedb/index.py b/python/python/lancedb/index.py
index 598d761e..55fa0e82 100644
--- a/python/python/lancedb/index.py
+++ b/python/python/lancedb/index.py
@@ -178,6 +178,12 @@ class HnswPq:
         If the dimension is not visible by 8 then we use 1 subvector.  This is not
         ideal and will likely result in poor performance.
 
+     num_bits: int, default 8
+        Number of bits to encode each sub-vector.
+
+        This value controls how much the sub-vectors are compressed.  The more bits
+        the more accurate the index but the slower search. Only 4 and 8 are supported.
+
     max_iterations, default 50
 
         Max iterations to train kmeans.
@@ -232,6 +238,7 @@ class HnswPq:
         distance_type: Optional[str] = None,
         num_partitions: Optional[int] = None,
         num_sub_vectors: Optional[int] = None,
+        num_bits: Optional[int] = None,
         max_iterations: Optional[int] = None,
         sample_rate: Optional[int] = None,
         m: Optional[int] = None,
@@ -241,6 +248,7 @@ class HnswPq:
             distance_type=distance_type,
             num_partitions=num_partitions,
             num_sub_vectors=num_sub_vectors,
+            num_bits=num_bits,
             max_iterations=max_iterations,
             sample_rate=sample_rate,
             m=m,
@@ -387,6 +395,7 @@ class IvfPq:
         distance_type: Optional[str] = None,
         num_partitions: Optional[int] = None,
         num_sub_vectors: Optional[int] = None,
+        num_bits: Optional[int] = None,
         max_iterations: Optional[int] = None,
         sample_rate: Optional[int] = None,
     ):
@@ -449,6 +458,12 @@ class IvfPq:
 
             If the dimension is not visible by 8 then we use 1 subvector.  This is not
             ideal and will likely result in poor performance.
+        num_bits: int, default 8
+            Number of bits to encode each sub-vector.
+
+            This value controls how much the sub-vectors are compressed.  The more bits
+            the more accurate the index but the slower search.  The default is 8
+            bits.  Only 4 and 8 are supported.
         max_iterations: int, default 50
             Max iteration to train kmeans.
 
@@ -482,6 +497,7 @@ class IvfPq:
             distance_type=distance_type,
             num_partitions=num_partitions,
             num_sub_vectors=num_sub_vectors,
+            num_bits=num_bits,
             max_iterations=max_iterations,
             sample_rate=sample_rate,
         )
diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py
index 07c4f17e..3a82f9fa 100644
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -413,6 +413,8 @@ class Table(ABC):
         replace: bool = True,
         accelerator: Optional[str] = None,
         index_cache_size: Optional[int] = None,
+        *,
+        num_bits: int = 8,
     ):
         """Create an index on the table.
 
@@ -439,6 +441,9 @@ class Table(ABC):
             Only support "cuda" for now.
         index_cache_size : int, optional
             The size of the index cache in number of entries. Default value is 256.
+        num_bits: int
+            The number of bits to encode sub-vectors. Only used with the IVF_PQ index.
+            Only 4 and 8 are supported.
         """
         raise NotImplementedError
 
@@ -1430,6 +1435,8 @@ class LanceTable(Table):
         accelerator: Optional[str] = None,
         index_cache_size: Optional[int] = None,
         index_type="IVF_PQ",
+        *,
+        num_bits: int = 8,
     ):
         """Create an index on the table."""
         self._dataset_mut.create_index(
@@ -1441,6 +1448,7 @@ class LanceTable(Table):
             replace=replace,
             accelerator=accelerator,
             index_cache_size=index_cache_size,
+            num_bits=num_bits,
         )
 
     def create_scalar_index(
diff --git a/python/python/tests/test_index.py b/python/python/tests/test_index.py
index 3268179b..4c0caf7e 100644
--- a/python/python/tests/test_index.py
+++ b/python/python/tests/test_index.py
@@ -108,6 +108,29 @@ async def test_create_vector_index(some_table: AsyncTable):
     assert stats.num_indices == 1
 
 
+@pytest.mark.asyncio
+async def test_create_4bit_ivfpq_index(some_table: AsyncTable):
+    # Can create
+    await some_table.create_index("vector", config=IvfPq(num_bits=4))
+    # Can recreate if replace=True
+    await some_table.create_index("vector", config=IvfPq(num_bits=4), replace=True)
+    # Can't recreate if replace=False
+    with pytest.raises(RuntimeError, match="already exists"):
+        await some_table.create_index("vector", replace=False)
+    indices = await some_table.list_indices()
+    assert len(indices) == 1
+    assert indices[0].index_type == "IvfPq"
+    assert indices[0].columns == ["vector"]
+    assert indices[0].name == "vector_idx"
+
+    stats = await some_table.index_stats("vector_idx")
+    assert stats.index_type == "IVF_PQ"
+    assert stats.distance_type == "l2"
+    assert stats.num_indexed_rows == await some_table.count_rows()
+    assert stats.num_unindexed_rows == 0
+    assert stats.num_indices == 1
+
+
 @pytest.mark.asyncio
 async def test_create_hnswpq_index(some_table: AsyncTable):
     await some_table.create_index("vector", config=HnswPq(num_partitions=10))
diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py
index d1b44c50..7a8bb552 100644
--- a/python/python/tests/test_table.py
+++ b/python/python/tests/test_table.py
@@ -530,6 +530,7 @@ def test_create_index_method():
             replace=True,
             accelerator=None,
             index_cache_size=256,
+            num_bits=8,
         )
 
 
diff --git a/python/src/index.rs b/python/src/index.rs
index fd09d847..1e9ff260 100644
--- a/python/src/index.rs
+++ b/python/src/index.rs
@@ -47,12 +47,13 @@ impl Index {
 
 #[pymethods]
 impl Index {
-    #[pyo3(signature = (distance_type=None, num_partitions=None, num_sub_vectors=None, max_iterations=None, sample_rate=None))]
+    #[pyo3(signature = (distance_type=None, num_partitions=None, num_sub_vectors=None,num_bits=None, max_iterations=None, sample_rate=None))]
     #[staticmethod]
     pub fn ivf_pq(
         distance_type: Option<String>,
         num_partitions: Option<u32>,
         num_sub_vectors: Option<u32>,
+        num_bits: Option<u32>,
         max_iterations: Option<u32>,
         sample_rate: Option<u32>,
     ) -> PyResult<Self> {
@@ -75,6 +76,9 @@ impl Index {
         if let Some(num_sub_vectors) = num_sub_vectors {
             ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
         }
+        if let Some(num_bits) = num_bits {
+            ivf_pq_builder = ivf_pq_builder.num_bits(num_bits);
+        }
         if let Some(max_iterations) = max_iterations {
             ivf_pq_builder = ivf_pq_builder.max_iterations(max_iterations);
         }
@@ -148,12 +152,14 @@ impl Index {
         }
     }
 
-    #[pyo3(signature = (distance_type=None, num_partitions=None, num_sub_vectors=None, max_iterations=None, sample_rate=None, m=None, ef_construction=None))]
+    #[pyo3(signature = (distance_type=None, num_partitions=None, num_sub_vectors=None,num_bits=None, max_iterations=None, sample_rate=None, m=None, ef_construction=None))]
     #[staticmethod]
+    #[allow(clippy::too_many_arguments)]
     pub fn hnsw_pq(
         distance_type: Option<String>,
         num_partitions: Option<u32>,
         num_sub_vectors: Option<u32>,
+        num_bits: Option<u32>,
         max_iterations: Option<u32>,
         sample_rate: Option<u32>,
         m: Option<u32>,
@@ -170,6 +176,9 @@ impl Index {
         if let Some(num_sub_vectors) = num_sub_vectors {
             hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
         }
+        if let Some(num_bits) = num_bits {
+            hnsw_pq_builder = hnsw_pq_builder.num_bits(num_bits);
+        }
         if let Some(max_iterations) = max_iterations {
             hnsw_pq_builder = hnsw_pq_builder.max_iterations(max_iterations);
         }
diff --git a/rust/lancedb/src/index/vector.rs b/rust/lancedb/src/index/vector.rs
index ddbbc7e8..f338026c 100644
--- a/rust/lancedb/src/index/vector.rs
+++ b/rust/lancedb/src/index/vector.rs
@@ -132,6 +132,10 @@ macro_rules! impl_pq_params_setter {
             self.num_sub_vectors = Some(num_sub_vectors);
             self
         }
+        pub fn num_bits(mut self, num_bits: u32) -> Self {
+            self.num_bits = Some(num_bits);
+            self
+        }
     };
 }
 
@@ -189,6 +193,7 @@ pub struct IvfPqIndexBuilder {
 
     // PQ
     pub(crate) num_sub_vectors: Option<u32>,
+    pub(crate) num_bits: Option<u32>,
 }
 
 impl Default for IvfPqIndexBuilder {
@@ -197,6 +202,7 @@ impl Default for IvfPqIndexBuilder {
             distance_type: DistanceType::L2,
             num_partitions: None,
             num_sub_vectors: None,
+            num_bits: None,
             sample_rate: 256,
             max_iterations: 50,
         }
@@ -256,6 +262,7 @@ pub struct IvfHnswPqIndexBuilder {
 
     // PQ
     pub(crate) num_sub_vectors: Option<u32>,
+    pub(crate) num_bits: Option<u32>,
 }
 
 impl Default for IvfHnswPqIndexBuilder {
@@ -264,6 +271,7 @@ impl Default for IvfHnswPqIndexBuilder {
             distance_type: DistanceType::L2,
             num_partitions: None,
             num_sub_vectors: None,
+            num_bits: None,
             sample_rate: 256,
             max_iterations: 50,
             m: 20,