feat: support to sepcify ef search param (#1844)

Signed-off-by: BubbleCal <bubble-cal@outlook.com>
This commit is contained in:
BubbleCal
2024-11-19 23:12:25 +08:00
committed by GitHub
parent f2e3989831
commit b2f88f0b29
10 changed files with 165 additions and 0 deletions

View File

@@ -704,6 +704,9 @@ pub struct VectorQuery {
// IVF PQ - ANN search.
pub(crate) query_vector: Vec<Arc<dyn Array>>,
pub(crate) nprobes: usize,
// The number of candidates to return during the refine step for HNSW,
// defaults to 1.5 * limit.
pub(crate) ef: Option<usize>,
pub(crate) refine_factor: Option<u32>,
pub(crate) distance_type: Option<DistanceType>,
/// Default is true. Set to false to enforce a brute force search.
@@ -717,6 +720,7 @@ impl VectorQuery {
column: None,
query_vector: Vec::new(),
nprobes: 20,
ef: None,
refine_factor: None,
distance_type: None,
use_index: true,
@@ -776,6 +780,18 @@ impl VectorQuery {
self
}
/// Set the number of candidates to return during the refine step for HNSW
///
/// This argument is only used when the vector column has an HNSW index.
/// If there is no index then this value is ignored.
///
/// Increasing this value will increase the recall of your query but will
/// also increase the latency of your query. The default value is 1.5*limit.
pub fn ef(mut self, ef: usize) -> Self {
self.ef = Some(ef);
self
}
/// A multiplier to control how many additional rows are taken during the refine step
///
/// This argument is only used when the vector column has an IVF PQ index.

View File

@@ -196,6 +196,7 @@ impl<S: HttpSend> RemoteTable<S> {
body["prefilter"] = query.base.prefilter.into();
body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
body["nprobes"] = query.nprobes.into();
body["ef"] = query.ef.into();
body["refine_factor"] = query.refine_factor.into();
if let Some(vector_column) = query.column.as_ref() {
body["vector_column"] = serde_json::Value::String(vector_column.clone());
@@ -1121,6 +1122,7 @@ mod tests {
"prefilter": true,
"distance_type": "l2",
"nprobes": 20,
"ef": Option::<usize>::None,
"refine_factor": null,
});
// Pass vector separately to make sure it matches f32 precision.
@@ -1166,6 +1168,7 @@ mod tests {
"bypass_vector_index": true,
"columns": ["a", "b"],
"nprobes": 12,
"ef": Option::<usize>::None,
"refine_factor": 2,
});
// Pass vector separately to make sure it matches f32 precision.

View File

@@ -1904,6 +1904,9 @@ impl TableInternal for NativeTable {
query.base.offset.map(|offset| offset as i64),
)?;
scanner.nprobs(query.nprobes);
if let Some(ef) = query.ef {
scanner.ef(ef);
}
scanner.use_index(query.use_index);
scanner.prefilter(query.base.prefilter);
match query.base.select {