From a250d8e7dfd377524d42e0780466ad4a247d635a Mon Sep 17 00:00:00 2001 From: Prashanth Rao <35005448+prrao87@users.noreply.github.com> Date: Sun, 23 Nov 2025 21:35:19 -0800 Subject: [PATCH] docs: improve docstring for RabitQ in Python (#2808) This PR improves the docstring for `IVF_RQ` (RabitQ) in Python. The earlier version referred to it as "residual quantization", which is confusing to future readers of the code. In contrast, the TypeScript and Rust codebases defined `IVF_RQ` as RabitQ. So now the three languages use comments that are consistent with one another. --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- python/python/lancedb/index.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/python/python/lancedb/index.py b/python/python/lancedb/index.py index 020e8e05..27202c01 100644 --- a/python/python/lancedb/index.py +++ b/python/python/lancedb/index.py @@ -609,9 +609,19 @@ class IvfPq: class IvfRq: """Describes an IVF RQ Index - IVF-RQ (Residual Quantization) stores a compressed copy of each vector using - residual quantization and organizes them into IVF partitions. Parameters - largely mirror IVF-PQ for consistency. + IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization + and organizes them into IVF partitions. + + The compression scheme is called RabitQ quantization. Each dimension is + quantized into a small number of bits. The parameters `num_bits` and + `num_partitions` control this process, providing a tradeoff between + index size (and thus search speed) and index accuracy. + + The partitioning process is called IVF and the `num_partitions` parameter + controls how many groups to create. + + Note that training an IVF RQ index on a large dataset is a slow operation + and currently is also a memory intensive operation. Attributes ---------- @@ -628,7 +638,7 @@ class IvfRq: Number of IVF partitions to create. num_bits: int, default 1 - Number of bits to encode each dimension. + Number of bits to encode each dimension in the RabitQ codebook. max_iterations: int, default 50 Max iterations to train kmeans when computing IVF partitions.