mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-07 04:12:59 +00:00
fix: hnsw default partitions (#1667)
PR fixes #1662 --------- Co-authored-by: Will Jones <willjones127@gmail.com>
This commit is contained in:
@@ -214,6 +214,11 @@ pub(crate) fn suggested_num_partitions(rows: usize) -> u32 {
|
||||
max(1, num_partitions)
|
||||
}
|
||||
|
||||
pub(crate) fn suggested_num_partitions_for_hnsw(rows: usize, dim: u32) -> u32 {
|
||||
let num_partitions = (((rows as u64) * (dim as u64)) / (256 * 5_000_000)) as u32;
|
||||
max(1, num_partitions)
|
||||
}
|
||||
|
||||
pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
|
||||
if dim % 16 == 0 {
|
||||
// Should be more aggressive than this default.
|
||||
|
||||
@@ -54,7 +54,8 @@ use crate::embeddings::{EmbeddingDefinition, EmbeddingRegistry, MaybeEmbedded, M
|
||||
use crate::error::{Error, Result};
|
||||
use crate::index::scalar::FtsIndexBuilder;
|
||||
use crate::index::vector::{
|
||||
IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder, VectorIndex,
|
||||
suggested_num_partitions_for_hnsw, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder,
|
||||
IvfPqIndexBuilder, VectorIndex,
|
||||
};
|
||||
use crate::index::IndexConfig;
|
||||
use crate::index::IndexStatistics;
|
||||
@@ -1440,11 +1441,19 @@ impl NativeTable {
|
||||
});
|
||||
}
|
||||
|
||||
let num_partitions = if let Some(n) = index.num_partitions {
|
||||
let num_partitions: u32 = if let Some(n) = index.num_partitions {
|
||||
n
|
||||
} else {
|
||||
suggested_num_partitions(self.count_rows(None).await?)
|
||||
match field.data_type() {
|
||||
arrow_schema::DataType::FixedSizeList(_, n) => Ok::<u32, Error>(
|
||||
suggested_num_partitions_for_hnsw(self.count_rows(None).await?, *n as u32),
|
||||
),
|
||||
_ => Err(Error::Schema {
|
||||
message: format!("Column '{}' is not a FixedSizeList", field.name()),
|
||||
}),
|
||||
}?
|
||||
};
|
||||
|
||||
let num_sub_vectors: u32 = if let Some(n) = index.num_sub_vectors {
|
||||
n
|
||||
} else {
|
||||
@@ -1503,10 +1512,17 @@ impl NativeTable {
|
||||
});
|
||||
}
|
||||
|
||||
let num_partitions = if let Some(n) = index.num_partitions {
|
||||
let num_partitions: u32 = if let Some(n) = index.num_partitions {
|
||||
n
|
||||
} else {
|
||||
suggested_num_partitions(self.count_rows(None).await?)
|
||||
match field.data_type() {
|
||||
arrow_schema::DataType::FixedSizeList(_, n) => Ok::<u32, Error>(
|
||||
suggested_num_partitions_for_hnsw(self.count_rows(None).await?, *n as u32),
|
||||
),
|
||||
_ => Err(Error::Schema {
|
||||
message: format!("Column '{}' is not a FixedSizeList", field.name()),
|
||||
}),
|
||||
}?
|
||||
};
|
||||
|
||||
let mut dataset = self.dataset.get_mut().await?;
|
||||
|
||||
Reference in New Issue
Block a user