1use std::fmt::{Display, Formatter};
16
17use common_error::ext::BoxedError;
18use common_recordbatch::OrderOption;
19use datafusion_expr::expr::Expr;
20pub use datatypes::schema::{VectorDistanceMetric, VectorIndexEngineType};
22use strum::Display;
23
24use crate::storage::{ColumnId, SequenceNumber};
25
26#[derive(Debug, Clone, PartialEq)]
28pub struct VectorSearchRequest {
29 pub column_id: ColumnId,
31 pub query_vector: Vec<f32>,
33 pub k: usize,
35 pub metric: VectorDistanceMetric,
37}
38
39#[derive(Debug, Clone, PartialEq)]
41pub struct VectorSearchMatches {
42 pub keys: Vec<u64>,
44 pub distances: Vec<f32>,
46}
47
48pub trait VectorIndexEngine: Send + Sync {
53 fn add(&mut self, key: u64, vector: &[f32]) -> Result<(), BoxedError>;
55
56 fn search(&self, query: &[f32], k: usize) -> Result<VectorSearchMatches, BoxedError>;
58
59 fn serialized_length(&self) -> usize;
61
62 fn save_to_buffer(&self, buffer: &mut [u8]) -> Result<(), BoxedError>;
64
65 fn reserve(&mut self, capacity: usize) -> Result<(), BoxedError>;
67
68 fn size(&self) -> usize;
70
71 fn capacity(&self) -> usize;
73
74 fn memory_usage(&self) -> usize;
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display)]
80pub enum TimeSeriesRowSelector {
81 LastRow,
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display)]
87pub enum TimeSeriesDistribution {
88 TimeWindowed,
91 PerSeries,
94}
95
96#[derive(Default, Clone, Debug, PartialEq)]
97pub struct ScanRequest {
98 pub projection: Option<Vec<usize>>,
101 pub filters: Vec<Expr>,
103 pub output_ordering: Option<Vec<OrderOption>>,
105 pub limit: Option<usize>,
110 pub series_row_selector: Option<TimeSeriesRowSelector>,
112 pub memtable_max_sequence: Option<SequenceNumber>,
118 pub memtable_min_sequence: Option<SequenceNumber>,
121 pub sst_min_sequence: Option<SequenceNumber>,
124 pub snapshot_on_scan: bool,
126 pub distribution: Option<TimeSeriesDistribution>,
128 pub vector_search: Option<VectorSearchRequest>,
131 pub force_flat_format: bool,
133}
134
135impl Display for ScanRequest {
136 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
137 enum Delimiter {
138 None,
139 Init,
140 }
141
142 impl Delimiter {
143 fn as_str(&mut self) -> &str {
144 match self {
145 Delimiter::None => {
146 *self = Delimiter::Init;
147 ""
148 }
149 Delimiter::Init => ", ",
150 }
151 }
152 }
153
154 let mut delimiter = Delimiter::None;
155
156 write!(f, "ScanRequest {{ ")?;
157 if let Some(projection) = &self.projection {
158 write!(f, "{}projection: {:?}", delimiter.as_str(), projection)?;
159 }
160 if !self.filters.is_empty() {
161 write!(
162 f,
163 "{}filters: [{}]",
164 delimiter.as_str(),
165 self.filters
166 .iter()
167 .map(|f| f.to_string())
168 .collect::<Vec<_>>()
169 .join(", ")
170 )?;
171 }
172 if let Some(output_ordering) = &self.output_ordering {
173 write!(
174 f,
175 "{}output_ordering: {:?}",
176 delimiter.as_str(),
177 output_ordering
178 )?;
179 }
180 if let Some(limit) = &self.limit {
181 write!(f, "{}limit: {}", delimiter.as_str(), limit)?;
182 }
183 if let Some(series_row_selector) = &self.series_row_selector {
184 write!(
185 f,
186 "{}series_row_selector: {}",
187 delimiter.as_str(),
188 series_row_selector
189 )?;
190 }
191 if let Some(sequence) = &self.memtable_max_sequence {
192 write!(f, "{}sequence: {}", delimiter.as_str(), sequence)?;
193 }
194 if let Some(sst_min_sequence) = &self.sst_min_sequence {
195 write!(
196 f,
197 "{}sst_min_sequence: {}",
198 delimiter.as_str(),
199 sst_min_sequence
200 )?;
201 }
202 if self.snapshot_on_scan {
203 write!(
204 f,
205 "{}snapshot_on_scan: {}",
206 delimiter.as_str(),
207 self.snapshot_on_scan
208 )?;
209 }
210 if let Some(distribution) = &self.distribution {
211 write!(f, "{}distribution: {}", delimiter.as_str(), distribution)?;
212 }
213 if let Some(vector_search) = &self.vector_search {
214 write!(
215 f,
216 "{}vector_search: column_id={}, k={}, metric={}",
217 delimiter.as_str(),
218 vector_search.column_id,
219 vector_search.k,
220 vector_search.metric
221 )?;
222 }
223 if self.force_flat_format {
224 write!(
225 f,
226 "{}force_flat_format: {}",
227 delimiter.as_str(),
228 self.force_flat_format
229 )?;
230 }
231 write!(f, " }}")
232 }
233}
234
235#[cfg(test)]
236mod tests {
237 use datafusion_expr::{Operator, binary_expr, col, lit};
238
239 use super::*;
240
241 #[test]
242 fn test_display_scan_request() {
243 let request = ScanRequest {
244 ..Default::default()
245 };
246 assert_eq!(request.to_string(), "ScanRequest { }");
247
248 let request = ScanRequest {
249 projection: Some(vec![1, 2]),
250 filters: vec![
251 binary_expr(col("i"), Operator::Gt, lit(1)),
252 binary_expr(col("s"), Operator::Eq, lit("x")),
253 ],
254 limit: Some(10),
255 ..Default::default()
256 };
257 assert_eq!(
258 request.to_string(),
259 r#"ScanRequest { projection: [1, 2], filters: [i > Int32(1), s = Utf8("x")], limit: 10 }"#
260 );
261
262 let request = ScanRequest {
263 filters: vec![
264 binary_expr(col("i"), Operator::Gt, lit(1)),
265 binary_expr(col("s"), Operator::Eq, lit("x")),
266 ],
267 limit: Some(10),
268 ..Default::default()
269 };
270 assert_eq!(
271 request.to_string(),
272 r#"ScanRequest { filters: [i > Int32(1), s = Utf8("x")], limit: 10 }"#
273 );
274
275 let request = ScanRequest {
276 projection: Some(vec![1, 2]),
277 limit: Some(10),
278 ..Default::default()
279 };
280 assert_eq!(
281 request.to_string(),
282 "ScanRequest { projection: [1, 2], limit: 10 }"
283 );
284
285 let request = ScanRequest {
286 force_flat_format: true,
287 ..Default::default()
288 };
289 assert_eq!(
290 request.to_string(),
291 "ScanRequest { force_flat_format: true }"
292 );
293
294 let request = ScanRequest {
295 snapshot_on_scan: true,
296 ..Default::default()
297 };
298 assert_eq!(
299 request.to_string(),
300 "ScanRequest { snapshot_on_scan: true }"
301 );
302 }
303}