diff --git a/rust/lancedb/src/index/vector.rs b/rust/lancedb/src/index/vector.rs index f287ce2f..ddbbc7e8 100644 --- a/rust/lancedb/src/index/vector.rs +++ b/rust/lancedb/src/index/vector.rs @@ -214,6 +214,11 @@ pub(crate) fn suggested_num_partitions(rows: usize) -> u32 { max(1, num_partitions) } +pub(crate) fn suggested_num_partitions_for_hnsw(rows: usize, dim: u32) -> u32 { + let num_partitions = (((rows as u64) * (dim as u64)) / (256 * 5_000_000)) as u32; + max(1, num_partitions) +} + pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 { if dim % 16 == 0 { // Should be more aggressive than this default. diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index 86fd1d1b..109ebcd4 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -54,7 +54,8 @@ use crate::embeddings::{EmbeddingDefinition, EmbeddingRegistry, MaybeEmbedded, M use crate::error::{Error, Result}; use crate::index::scalar::FtsIndexBuilder; use crate::index::vector::{ - IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder, VectorIndex, + suggested_num_partitions_for_hnsw, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, + IvfPqIndexBuilder, VectorIndex, }; use crate::index::IndexConfig; use crate::index::IndexStatistics; @@ -1440,11 +1441,19 @@ impl NativeTable { }); } - let num_partitions = if let Some(n) = index.num_partitions { + let num_partitions: u32 = if let Some(n) = index.num_partitions { n } else { - suggested_num_partitions(self.count_rows(None).await?) + match field.data_type() { + arrow_schema::DataType::FixedSizeList(_, n) => Ok::( + suggested_num_partitions_for_hnsw(self.count_rows(None).await?, *n as u32), + ), + _ => Err(Error::Schema { + message: format!("Column '{}' is not a FixedSizeList", field.name()), + }), + }? }; + let num_sub_vectors: u32 = if let Some(n) = index.num_sub_vectors { n } else { @@ -1503,10 +1512,17 @@ impl NativeTable { }); } - let num_partitions = if let Some(n) = index.num_partitions { + let num_partitions: u32 = if let Some(n) = index.num_partitions { n } else { - suggested_num_partitions(self.count_rows(None).await?) + match field.data_type() { + arrow_schema::DataType::FixedSizeList(_, n) => Ok::( + suggested_num_partitions_for_hnsw(self.count_rows(None).await?, *n as u32), + ), + _ => Err(Error::Schema { + message: format!("Column '{}' is not a FixedSizeList", field.name()), + }), + }? }; let mut dataset = self.dataset.get_mut().await?;