1use std::collections::HashMap;
16use std::fmt::{Display, Formatter};
17
18use common_error::ext::BoxedError;
19use common_recordbatch::OrderOption;
20use datafusion_expr::expr::Expr;
21pub use datatypes::schema::{VectorDistanceMetric, VectorIndexEngineType};
23use datatypes::types::json_type::JsonNativeType;
24use itertools::Itertools;
25use strum::Display;
26
27use crate::storage::{ColumnId, ProjectionInput, SequenceNumber};
28
29#[derive(Debug, Clone, PartialEq)]
31pub struct VectorSearchRequest {
32 pub column_id: ColumnId,
34 pub query_vector: Vec<f32>,
36 pub k: usize,
38 pub metric: VectorDistanceMetric,
40}
41
42#[derive(Debug, Clone, PartialEq)]
44pub struct VectorSearchMatches {
45 pub keys: Vec<u64>,
47 pub distances: Vec<f32>,
49}
50
51pub trait VectorIndexEngine: Send + Sync {
56 fn add(&mut self, key: u64, vector: &[f32]) -> Result<(), BoxedError>;
58
59 fn search(&self, query: &[f32], k: usize) -> Result<VectorSearchMatches, BoxedError>;
61
62 fn serialized_length(&self) -> usize;
64
65 fn save_to_buffer(&self, buffer: &mut [u8]) -> Result<(), BoxedError>;
67
68 fn reserve(&mut self, capacity: usize) -> Result<(), BoxedError>;
70
71 fn size(&self) -> usize;
73
74 fn capacity(&self) -> usize;
76
77 fn memory_usage(&self) -> usize;
79}
80
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display)]
83pub enum TimeSeriesRowSelector {
84 LastRow,
86}
87
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display)]
90pub enum TimeSeriesDistribution {
91 TimeWindowed,
94 PerSeries,
97}
98
99#[derive(Default, Clone, Debug, PartialEq)]
100pub struct ScanRequest {
101 pub projection_input: Option<ProjectionInput>,
104 pub filters: Vec<Expr>,
106 pub output_ordering: Option<Vec<OrderOption>>,
108 pub limit: Option<usize>,
113 pub series_row_selector: Option<TimeSeriesRowSelector>,
115 pub memtable_max_sequence: Option<SequenceNumber>,
121 pub memtable_min_sequence: Option<SequenceNumber>,
124 pub sst_min_sequence: Option<SequenceNumber>,
127 pub snapshot_on_scan: bool,
129 pub distribution: Option<TimeSeriesDistribution>,
131 pub vector_search: Option<VectorSearchRequest>,
134 pub json_type_hint: HashMap<String, JsonNativeType>,
136}
137
138impl ScanRequest {
139 pub fn projection_indices(&self) -> Option<&[usize]> {
141 self.projection_input
142 .as_ref()
143 .map(|projection_input| projection_input.projection.as_slice())
144 }
145}
146
147impl Display for ScanRequest {
148 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
149 enum Delimiter {
150 None,
151 Init,
152 }
153
154 impl Delimiter {
155 fn as_str(&mut self) -> &str {
156 match self {
157 Delimiter::None => {
158 *self = Delimiter::Init;
159 ""
160 }
161 Delimiter::Init => ", ",
162 }
163 }
164 }
165
166 let mut delimiter = Delimiter::None;
167
168 write!(f, "ScanRequest {{ ")?;
169 if let Some(projection) = &self.projection_input {
170 write!(f, "{}projection: {:?}", delimiter.as_str(), projection)?;
171 }
172 if !self.filters.is_empty() {
173 write!(
174 f,
175 "{}filters: [{}]",
176 delimiter.as_str(),
177 self.filters
178 .iter()
179 .map(|f| f.to_string())
180 .collect::<Vec<_>>()
181 .join(", ")
182 )?;
183 }
184 if let Some(output_ordering) = &self.output_ordering {
185 write!(
186 f,
187 "{}output_ordering: {:?}",
188 delimiter.as_str(),
189 output_ordering
190 )?;
191 }
192 if let Some(limit) = &self.limit {
193 write!(f, "{}limit: {}", delimiter.as_str(), limit)?;
194 }
195 if let Some(series_row_selector) = &self.series_row_selector {
196 write!(
197 f,
198 "{}series_row_selector: {}",
199 delimiter.as_str(),
200 series_row_selector
201 )?;
202 }
203 if let Some(sequence) = &self.memtable_max_sequence {
204 write!(f, "{}sequence: {}", delimiter.as_str(), sequence)?;
205 }
206 if let Some(sst_min_sequence) = &self.sst_min_sequence {
207 write!(
208 f,
209 "{}sst_min_sequence: {}",
210 delimiter.as_str(),
211 sst_min_sequence
212 )?;
213 }
214 if self.snapshot_on_scan {
215 write!(
216 f,
217 "{}snapshot_on_scan: {}",
218 delimiter.as_str(),
219 self.snapshot_on_scan
220 )?;
221 }
222 if let Some(distribution) = &self.distribution {
223 write!(f, "{}distribution: {}", delimiter.as_str(), distribution)?;
224 }
225 if let Some(vector_search) = &self.vector_search {
226 write!(
227 f,
228 "{}vector_search: column_id={}, k={}, metric={}",
229 delimiter.as_str(),
230 vector_search.column_id,
231 vector_search.k,
232 vector_search.metric
233 )?;
234 }
235 if !self.json_type_hint.is_empty() {
236 write!(
237 f,
238 "{}json_type_hint: {}",
239 delimiter.as_str(),
240 self.json_type_hint
241 .iter()
242 .map(|(column, json_type)| format!("({column}: {json_type})"))
243 .join(", ")
244 )?;
245 }
246 write!(f, " }}")
247 }
248}
249
250#[cfg(test)]
251mod tests {
252 use datafusion_expr::{Operator, binary_expr, col, lit};
253
254 use super::*;
255
256 #[test]
257 fn test_display_scan_request() {
258 let request = ScanRequest {
259 ..Default::default()
260 };
261 assert_eq!(request.to_string(), "ScanRequest { }");
262
263 let projection_input = Some(vec![1, 2].into());
264 let request = ScanRequest {
265 projection_input,
266 filters: vec![
267 binary_expr(col("i"), Operator::Gt, lit(1)),
268 binary_expr(col("s"), Operator::Eq, lit("x")),
269 ],
270 limit: Some(10),
271 ..Default::default()
272 };
273 assert_eq!(
274 request.to_string(),
275 r#"ScanRequest { projection: ProjectionInput { projection: [1, 2], nested_paths: [] }, filters: [i > Int32(1), s = Utf8("x")], limit: 10 }"#
276 );
277
278 let request = ScanRequest {
279 filters: vec![
280 binary_expr(col("i"), Operator::Gt, lit(1)),
281 binary_expr(col("s"), Operator::Eq, lit("x")),
282 ],
283 limit: Some(10),
284 ..Default::default()
285 };
286 assert_eq!(
287 request.to_string(),
288 r#"ScanRequest { filters: [i > Int32(1), s = Utf8("x")], limit: 10 }"#
289 );
290
291 let projection_input = Some(vec![1, 2].into());
292 let request = ScanRequest {
293 projection_input,
294 limit: Some(10),
295 ..Default::default()
296 };
297 assert_eq!(
298 request.to_string(),
299 "ScanRequest { projection: ProjectionInput { projection: [1, 2], nested_paths: [] }, limit: 10 }"
300 );
301
302 let projection_input = Some(ProjectionInput::new(vec![1, 2]).with_nested_paths(vec![
303 vec!["j".to_string(), "a".to_string(), "b".to_string()],
304 vec!["s".to_string(), "x".to_string()],
305 ]));
306 let request = ScanRequest {
307 projection_input,
308 limit: Some(10),
309 ..Default::default()
310 };
311 assert_eq!(
312 request.to_string(),
313 r#"ScanRequest { projection: ProjectionInput { projection: [1, 2], nested_paths: [["j", "a", "b"], ["s", "x"]] }, limit: 10 }"#
314 );
315
316 let request = ScanRequest {
317 snapshot_on_scan: true,
318 ..Default::default()
319 };
320 assert_eq!(
321 request.to_string(),
322 "ScanRequest { snapshot_on_scan: true }"
323 );
324 }
325}