From 9c79baca4bd90b6f1c849a9652ccde0cef271bec Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 29 Oct 2024 15:57:17 +0800 Subject: [PATCH] feat(index): support building inverted index for the field column on Mito (#4887) feat(index): support building inverted index for the field column Signed-off-by: Zhenchi --- .../search/index_apply/predicates_apply.rs | 8 +- src/mito2/src/read/scan_region.rs | 25 ++- src/mito2/src/sst/index.rs | 18 +- .../index/inverted_index/applier/builder.rs | 21 +- .../inverted_index/applier/builder/between.rs | 31 ++- .../applier/builder/comparison.rs | 26 ++- .../inverted_index/applier/builder/eq_list.rs | 28 ++- .../inverted_index/applier/builder/in_list.rs | 22 +- .../applier/builder/regex_match.rs | 19 +- .../src/sst/index/inverted_index/creator.rs | 200 ++++++++++++------ 10 files changed, 271 insertions(+), 127 deletions(-) diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index face61bced..bead0761d7 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -114,17 +114,17 @@ impl PredicatesIndexApplier { .partition_in_place(|(_, ps)| ps.iter().any(|p| matches!(p, Predicate::InList(_)))); let mut iter = predicates.into_iter(); for _ in 0..in_list_index { - let (tag_name, predicates) = iter.next().unwrap(); + let (column_name, predicates) = iter.next().unwrap(); let fst_applier = Box::new(KeysFstApplier::try_from(predicates)?) as _; - fst_appliers.push((tag_name, fst_applier)); + fst_appliers.push((column_name, fst_applier)); } - for (tag_name, predicates) in iter { + for (column_name, predicates) in iter { if predicates.is_empty() { continue; } let fst_applier = Box::new(IntersectionFstApplier::try_from(predicates)?) as _; - fst_appliers.push((tag_name, fst_applier)); + fst_appliers.push((column_name, fst_applier)); } Ok(PredicatesIndexApplier { fst_appliers }) diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 2c61124180..6e2a034433 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -393,20 +393,29 @@ impl ScanRegion { .and_then(|c| c.index_cache()) .cloned(); + // TODO(zhongzc): currently we only index tag columns, need to support field columns. + let ignore_column_ids = &self + .version + .options + .index_options + .inverted_index + .ignore_column_ids; + let indexed_column_ids = self + .version + .metadata + .primary_key + .iter() + .filter(|id| !ignore_column_ids.contains(id)) + .copied() + .collect::>(); + InvertedIndexApplierBuilder::new( self.access_layer.region_dir().to_string(), self.access_layer.object_store().clone(), file_cache, index_cache, self.version.metadata.as_ref(), - self.version - .options - .index_options - .inverted_index - .ignore_column_ids - .iter() - .copied() - .collect(), + indexed_column_ids, self.access_layer.puffin_manager_factory().clone(), ) .build(&self.request.filters) diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs index 2fcfd8ee8c..f0ee66ab01 100644 --- a/src/mito2/src/sst/index.rs +++ b/src/mito2/src/sst/index.rs @@ -20,6 +20,7 @@ pub(crate) mod puffin_manager; mod statistics; mod store; +use std::collections::HashSet; use std::num::NonZeroUsize; use common_telemetry::{debug, warn}; @@ -212,13 +213,28 @@ impl<'a> IndexerBuilder<'a> { segment_row_count = row_group_size; } + // TODO(zhongzc): currently we only index tag columns, need to support field columns. + let indexed_column_ids = self + .metadata + .primary_key + .iter() + .filter(|id| { + !self + .index_options + .inverted_index + .ignore_column_ids + .contains(id) + }) + .copied() + .collect::>(); + let indexer = InvertedIndexer::new( self.file_id, self.metadata, self.intermediate_manager.clone(), self.inverted_index_config.mem_threshold_on_create(), segment_row_count, - &self.index_options.inverted_index.ignore_column_ids, + indexed_column_ids, ); Some(indexer) diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder.rs b/src/mito2/src/sst/index/inverted_index/applier/builder.rs index 6d37ffc023..603cf5aa23 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder.rs @@ -20,7 +20,6 @@ mod regex_match; use std::collections::{HashMap, HashSet}; -use api::v1::SemanticType; use common_telemetry::warn; use datafusion_common::ScalarValue; use datafusion_expr::{BinaryExpr, Expr, Operator}; @@ -55,8 +54,8 @@ pub(crate) struct InvertedIndexApplierBuilder<'a> { /// Metadata of the region, used to get metadata like column type. metadata: &'a RegionMetadata, - /// Column ids to ignore. - ignore_column_ids: HashSet, + /// Column ids of the columns that are indexed. + indexed_column_ids: HashSet, /// Stores predicates during traversal on the Expr tree. output: HashMap>, @@ -76,7 +75,7 @@ impl<'a> InvertedIndexApplierBuilder<'a> { file_cache: Option, index_cache: Option, metadata: &'a RegionMetadata, - ignore_column_ids: HashSet, + indexed_column_ids: HashSet, puffin_manager_factory: PuffinManagerFactory, ) -> Self { Self { @@ -84,7 +83,7 @@ impl<'a> InvertedIndexApplierBuilder<'a> { object_store, file_cache, metadata, - ignore_column_ids, + indexed_column_ids, output: HashMap::default(), index_cache, puffin_manager_factory, @@ -156,9 +155,9 @@ impl<'a> InvertedIndexApplierBuilder<'a> { self.output.entry(column_id).or_default().push(predicate); } - /// Helper function to get the column id and the column type of a tag column. + /// Helper function to get the column id and the column type of a column. /// Returns `None` if the column is not a tag column or if the column is ignored. - fn tag_column_id_and_type( + fn column_id_and_type( &self, column_name: &str, ) -> Result> { @@ -169,11 +168,7 @@ impl<'a> InvertedIndexApplierBuilder<'a> { column: column_name, })?; - if self.ignore_column_ids.contains(&column.column_id) { - return Ok(None); - } - - if column.semantic_type != SemanticType::Tag { + if !self.indexed_column_ids.contains(&column.column_id) { return Ok(None); } @@ -330,7 +325,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs index ae4de2170a..0a196e6f1a 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs @@ -28,7 +28,7 @@ impl InvertedIndexApplierBuilder<'_> { let Some(column_name) = Self::column_name(&between.expr) else { return Ok(()); }; - let Some((column_id, data_type)) = self.tag_column_id_and_type(column_name)? else { + let Some((column_id, data_type)) = self.column_id_and_type(column_name)? else { return Ok(()); }; let Some(low) = Self::nonnull_lit(&between.low) else { @@ -78,7 +78,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -121,7 +121,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -147,7 +147,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -159,7 +159,24 @@ mod tests { }; builder.collect_between(&between).unwrap(); - assert!(builder.output.is_empty()); + + let predicates = builder.output.get(&3).unwrap(); + assert_eq!(predicates.len(), 1); + assert_eq!( + predicates[0], + Predicate::Range(RangePredicate { + range: Range { + lower: Some(Bound { + inclusive: true, + value: encoded_string("abc"), + }), + upper: Some(Bound { + inclusive: true, + value: encoded_string("def"), + }), + } + }) + ); } #[test] @@ -173,7 +190,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -200,7 +217,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs index 2c9fa861ea..cdaec9f94e 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs @@ -114,7 +114,7 @@ impl InvertedIndexApplierBuilder<'_> { let Some(lit) = Self::nonnull_lit(literal) else { return Ok(()); }; - let Some((column_id, data_type)) = self.tag_column_id_and_type(column_name)? else { + let Some((column_id, data_type)) = self.column_id_and_type(column_name)? else { return Ok(()); }; @@ -234,7 +234,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -263,7 +263,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -283,14 +283,28 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); builder .collect_comparison_expr(&field_column(), &Operator::Lt, &string_lit("abc")) .unwrap(); - assert!(builder.output.is_empty()); + + let predicates = builder.output.get(&3).unwrap(); + assert_eq!(predicates.len(), 1); + assert_eq!( + predicates[0], + Predicate::Range(RangePredicate { + range: Range { + lower: None, + upper: Some(Bound { + inclusive: false, + value: encoded_string("abc"), + }), + } + }) + ); } #[test] @@ -304,7 +318,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs index 6d142d6402..1d07cca487 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs @@ -31,7 +31,7 @@ impl InvertedIndexApplierBuilder<'_> { let Some(lit) = Self::nonnull_lit(right).or_else(|| Self::nonnull_lit(left)) else { return Ok(()); }; - let Some((column_id, data_type)) = self.tag_column_id_and_type(column_name)? else { + let Some((column_id, data_type)) = self.column_id_and_type(column_name)? else { return Ok(()); }; @@ -59,7 +59,7 @@ impl InvertedIndexApplierBuilder<'_> { let Some(lit) = Self::nonnull_lit(right).or_else(|| Self::nonnull_lit(left)) else { return Ok(()); }; - let Some((column_id, data_type)) = self.tag_column_id_and_type(column_name)? else { + let Some((column_id, data_type)) = self.column_id_and_type(column_name)? else { return Ok(()); }; @@ -140,7 +140,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -178,14 +178,22 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); builder .collect_eq(&field_column(), &string_lit("abc")) .unwrap(); - assert!(builder.output.is_empty()); + + let predicates = builder.output.get(&3).unwrap(); + assert_eq!(predicates.len(), 1); + assert_eq!( + predicates[0], + Predicate::InList(InListPredicate { + list: HashSet::from_iter([encoded_string("abc")]) + }) + ); } #[test] @@ -199,7 +207,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -219,7 +227,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -239,7 +247,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -298,7 +306,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -336,7 +344,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs index c8cf9c4d16..6a520ba401 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs @@ -29,7 +29,7 @@ impl InvertedIndexApplierBuilder<'_> { let Some(column_name) = Self::column_name(&inlist.expr) else { return Ok(()); }; - let Some((column_id, data_type)) = self.tag_column_id_and_type(column_name)? else { + let Some((column_id, data_type)) = self.column_id_and_type(column_name)? else { return Ok(()); }; @@ -71,7 +71,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -104,7 +104,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -129,7 +129,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -140,7 +140,15 @@ mod tests { }; builder.collect_inlist(&in_list).unwrap(); - assert!(builder.output.is_empty()); + + let predicates = builder.output.get(&3).unwrap(); + assert_eq!(predicates.len(), 1); + assert_eq!( + predicates[0], + Predicate::InList(InListPredicate { + list: HashSet::from_iter([encoded_string("foo"), encoded_string("bar")]) + }) + ); } #[test] @@ -154,7 +162,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -181,7 +189,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs index a60d9d9c0f..7fdf7f3de5 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs @@ -25,7 +25,7 @@ impl InvertedIndexApplierBuilder<'_> { let Some(column_name) = Self::column_name(column) else { return Ok(()); }; - let Some((column_id, data_type)) = self.tag_column_id_and_type(column_name)? else { + let Some((column_id, data_type)) = self.column_id_and_type(column_name)? else { return Ok(()); }; if !data_type.is_string() { @@ -65,7 +65,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -94,7 +94,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -102,7 +102,14 @@ mod tests { .collect_regex_match(&field_column(), &string_lit("abc")) .unwrap(); - assert!(builder.output.is_empty()); + let predicates = builder.output.get(&3).unwrap(); + assert_eq!(predicates.len(), 1); + assert_eq!( + predicates[0], + Predicate::RegexMatch(RegexMatchPredicate { + pattern: "abc".to_string() + }) + ); } #[test] @@ -116,7 +123,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); @@ -138,7 +145,7 @@ mod tests { None, None, &metadata, - HashSet::default(), + HashSet::from_iter([1, 2, 3]), facotry, ); diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index b2b1104819..6db1ef6e0b 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -36,6 +36,7 @@ use crate::error::{ PushIndexValueSnafu, Result, }; use crate::read::Batch; +use crate::row_converter::SortField; use crate::sst::file::FileId; use crate::sst::index::intermediate::{IntermediateLocation, IntermediateManager}; use crate::sst::index::inverted_index::codec::{IndexValueCodec, IndexValuesCodec}; @@ -72,7 +73,7 @@ pub struct InvertedIndexer { memory_usage: Arc, /// Ids of indexed columns. - column_ids: HashSet, + indexed_column_ids: HashSet, } impl InvertedIndexer { @@ -84,7 +85,7 @@ impl InvertedIndexer { intermediate_manager: IntermediateManager, memory_usage_threshold: Option, segment_row_count: NonZeroUsize, - ignore_column_ids: &[ColumnId], + indexed_column_ids: HashSet, ) -> Self { let temp_file_provider = Arc::new(TempFileProvider::new( IntermediateLocation::new(&metadata.region_id, &sst_file_id), @@ -102,14 +103,6 @@ impl InvertedIndexer { let index_creator = Box::new(SortIndexCreator::new(sorter, segment_row_count)); let codec = IndexValuesCodec::from_tag_columns(metadata.primary_key_columns()); - let mut column_ids = metadata - .primary_key_columns() - .map(|c| c.column_id) - .collect::>(); - for id in ignore_column_ids { - column_ids.remove(id); - } - Self { codec, index_creator, @@ -118,7 +111,7 @@ impl InvertedIndexer { stats: Statistics::new(TYPE_INVERTED_INDEX), aborted: false, memory_usage, - column_ids, + indexed_column_ids, } } @@ -189,7 +182,7 @@ impl InvertedIndexer { guard.inc_row_count(n); for ((col_id, col_id_str), field, value) in self.codec.decode(batch.primary_key())? { - if !self.column_ids.contains(col_id) { + if !self.indexed_column_ids.contains(col_id) { continue; } @@ -210,6 +203,32 @@ impl InvertedIndexer { .context(PushIndexValueSnafu)?; } + for field in batch.fields() { + if !self.indexed_column_ids.contains(&field.column_id) { + continue; + } + + let sort_field = SortField::new(field.data.data_type()); + let col_id_str = field.column_id.to_string(); + for i in 0..n { + self.value_buf.clear(); + let value = field.data.get_ref(i); + + if value.is_null() { + self.index_creator + .push_with_name(&col_id_str, None) + .await + .context(PushIndexValueSnafu)?; + } else { + IndexValueCodec::encode_nonnull_value(value, &sort_field, &mut self.value_buf)?; + self.index_creator + .push_with_name(&col_id_str, Some(&self.value_buf)) + .await + .context(PushIndexValueSnafu)?; + } + } + } + Ok(()) } @@ -269,7 +288,7 @@ impl InvertedIndexer { } pub fn column_ids(&self) -> impl Iterator + '_ { - self.column_ids.iter().copied() + self.indexed_column_ids.iter().copied() } pub fn memory_usage(&self) -> usize { @@ -297,6 +316,7 @@ mod tests { use super::*; use crate::cache::index::InvertedIndexCache; + use crate::read::BatchColumn; use crate::row_converter::{McmpRowCodec, RowCodec, SortField}; use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder; use crate::sst::index::puffin_manager::PuffinManagerFactory; @@ -340,12 +360,25 @@ mod tests { semantic_type: SemanticType::Timestamp, column_id: 3, }) + .push_column_metadata(ColumnMetadata { + column_schema: ColumnSchema::new( + "field_u64", + ConcreteDataType::uint64_datatype(), + false, + ), + semantic_type: SemanticType::Field, + column_id: 4, + }) .primary_key(vec![1, 2]); Arc::new(builder.build().unwrap()) } - fn new_batch(num_rows: usize, str_tag: impl AsRef, i32_tag: impl Into) -> Batch { + fn new_batch( + str_tag: impl AsRef, + i32_tag: impl Into, + u64_field: impl IntoIterator, + ) -> Batch { let fields = vec![ SortField::new(ConcreteDataType::string_datatype()), SortField::new(ConcreteDataType::int32_datatype()), @@ -354,6 +387,12 @@ mod tests { let row: [ValueRef; 2] = [str_tag.as_ref().into(), i32_tag.into().into()]; let primary_key = codec.encode(row.into_iter()).unwrap(); + let u64_field = BatchColumn { + column_id: 4, + data: Arc::new(UInt64Vector::from_iter_values(u64_field)), + }; + let num_rows = u64_field.data.len(); + Batch::new( primary_key, Arc::new(UInt64Vector::from_iter_values( @@ -365,14 +404,14 @@ mod tests { Arc::new(UInt8Vector::from_iter_values( iter::repeat(1).take(num_rows), )), - vec![], + vec![u64_field], ) .unwrap() } async fn build_applier_factory( prefix: &str, - tags: BTreeSet<(&'static str, i32)>, + rows: BTreeSet<(&'static str, i32, [u64; 2])>, ) -> impl Fn(DfExpr) -> BoxFuture<'static, Vec> { let (d, factory) = PuffinManagerFactory::new_for_test_async(prefix).await; let region_dir = "region0".to_string(); @@ -383,6 +422,7 @@ mod tests { let intm_mgr = new_intm_mgr(d.path().to_string_lossy()).await; let memory_threshold = None; let segment_row_count = 2; + let indexed_column_ids = HashSet::from_iter([1, 2, 4]); let mut creator = InvertedIndexer::new( sst_file_id, @@ -390,18 +430,18 @@ mod tests { intm_mgr, memory_threshold, NonZeroUsize::new(segment_row_count).unwrap(), - &[], + indexed_column_ids.clone(), ); - for (str_tag, i32_tag) in &tags { - let batch = new_batch(segment_row_count, str_tag, *i32_tag); + for (str_tag, i32_tag, u64_field) in &rows { + let batch = new_batch(str_tag, *i32_tag, u64_field.iter().copied()); creator.update(&batch).await.unwrap(); } let puffin_manager = factory.build(object_store.clone()); let mut writer = puffin_manager.writer(&file_path).await.unwrap(); let (row_count, _) = creator.finish(&mut writer).await.unwrap(); - assert_eq!(row_count, tags.len() * segment_row_count); + assert_eq!(row_count, rows.len() * segment_row_count); writer.finish().await.unwrap(); move |expr| { @@ -413,7 +453,7 @@ mod tests { None, Some(cache), ®ion_metadata, - Default::default(), + indexed_column_ids.clone(), factory.clone(), ) .build(&[expr]) @@ -433,19 +473,19 @@ mod tests { #[tokio::test] async fn test_create_and_query_get_key() { - let tags = BTreeSet::from_iter([ - ("aaa", 1), - ("aaa", 2), - ("aaa", 3), - ("aab", 1), - ("aab", 2), - ("aab", 3), - ("abc", 1), - ("abc", 2), - ("abc", 3), + let rows = BTreeSet::from_iter([ + ("aaa", 1, [1, 2]), + ("aaa", 2, [2, 3]), + ("aaa", 3, [3, 4]), + ("aab", 1, [4, 5]), + ("aab", 2, [5, 6]), + ("aab", 3, [6, 7]), + ("abc", 1, [7, 8]), + ("abc", 2, [8, 9]), + ("abc", 3, [9, 10]), ]); - let applier_factory = build_applier_factory("test_create_and_query_get_key_", tags).await; + let applier_factory = build_applier_factory("test_create_and_query_get_key_", rows).await; let expr = col("tag_str").eq(lit("aaa")); let res = applier_factory(expr).await; @@ -468,23 +508,27 @@ mod tests { let expr = col("tag_str").in_list(vec![lit("aaa"), lit("abc")], false); let res = applier_factory(expr).await; assert_eq!(res, vec![0, 1, 2, 6, 7, 8]); + + let expr = col("field_u64").eq(lit(2u64)); + let res = applier_factory(expr).await; + assert_eq!(res, vec![0, 1]); } #[tokio::test] async fn test_create_and_query_range() { - let tags = BTreeSet::from_iter([ - ("aaa", 1), - ("aaa", 2), - ("aaa", 3), - ("aab", 1), - ("aab", 2), - ("aab", 3), - ("abc", 1), - ("abc", 2), - ("abc", 3), + let rows = BTreeSet::from_iter([ + ("aaa", 1, [1, 2]), + ("aaa", 2, [2, 3]), + ("aaa", 3, [3, 4]), + ("aab", 1, [4, 5]), + ("aab", 2, [5, 6]), + ("aab", 3, [6, 7]), + ("abc", 1, [7, 8]), + ("abc", 2, [8, 9]), + ("abc", 3, [9, 10]), ]); - let applier_factory = build_applier_factory("test_create_and_query_range_", tags).await; + let applier_factory = build_applier_factory("test_create_and_query_range_", rows).await; let expr = col("tag_str").between(lit("aaa"), lit("aab")); let res = applier_factory(expr).await; @@ -501,24 +545,28 @@ mod tests { let expr = col("tag_i32").between(lit(2), lit(2)); let res = applier_factory(expr).await; assert_eq!(res, vec![1, 4, 7]); + + let expr = col("field_u64").between(lit(2u64), lit(5u64)); + let res = applier_factory(expr).await; + assert_eq!(res, vec![0, 1, 2, 3, 4]); } #[tokio::test] async fn test_create_and_query_comparison() { - let tags = BTreeSet::from_iter([ - ("aaa", 1), - ("aaa", 2), - ("aaa", 3), - ("aab", 1), - ("aab", 2), - ("aab", 3), - ("abc", 1), - ("abc", 2), - ("abc", 3), + let rows = BTreeSet::from_iter([ + ("aaa", 1, [1, 2]), + ("aaa", 2, [2, 3]), + ("aaa", 3, [3, 4]), + ("aab", 1, [4, 5]), + ("aab", 2, [5, 6]), + ("aab", 3, [6, 7]), + ("abc", 1, [7, 8]), + ("abc", 2, [8, 9]), + ("abc", 3, [9, 10]), ]); let applier_factory = - build_applier_factory("test_create_and_query_comparison_", tags).await; + build_applier_factory("test_create_and_query_comparison_", rows).await; let expr = col("tag_str").lt(lit("aab")); let res = applier_factory(expr).await; @@ -528,6 +576,10 @@ mod tests { let res = applier_factory(expr).await; assert_eq!(res, vec![0, 3, 6]); + let expr = col("field_u64").lt(lit(2u64)); + let res = applier_factory(expr).await; + assert_eq!(res, vec![0]); + let expr = col("tag_str").gt(lit("aab")); let res = applier_factory(expr).await; assert_eq!(res, vec![6, 7, 8]); @@ -536,6 +588,10 @@ mod tests { let res = applier_factory(expr).await; assert_eq!(res, vec![2, 5, 8]); + let expr = col("field_u64").gt(lit(8u64)); + let res = applier_factory(expr).await; + assert_eq!(res, vec![7, 8]); + let expr = col("tag_str").lt_eq(lit("aab")); let res = applier_factory(expr).await; assert_eq!(res, vec![0, 1, 2, 3, 4, 5]); @@ -544,6 +600,10 @@ mod tests { let res = applier_factory(expr).await; assert_eq!(res, vec![0, 1, 3, 4, 6, 7]); + let expr = col("field_u64").lt_eq(lit(2u64)); + let res = applier_factory(expr).await; + assert_eq!(res, vec![0, 1]); + let expr = col("tag_str").gt_eq(lit("aab")); let res = applier_factory(expr).await; assert_eq!(res, vec![3, 4, 5, 6, 7, 8]); @@ -552,6 +612,10 @@ mod tests { let res = applier_factory(expr).await; assert_eq!(res, vec![1, 2, 4, 5, 7, 8]); + let expr = col("field_u64").gt_eq(lit(8u64)); + let res = applier_factory(expr).await; + assert_eq!(res, vec![6, 7, 8]); + let expr = col("tag_str") .gt(lit("aaa")) .and(col("tag_str").lt(lit("abc"))); @@ -561,23 +625,29 @@ mod tests { let expr = col("tag_i32").gt(lit(1)).and(col("tag_i32").lt(lit(3))); let res = applier_factory(expr).await; assert_eq!(res, vec![1, 4, 7]); + + let expr = col("field_u64") + .gt(lit(2u64)) + .and(col("field_u64").lt(lit(9u64))); + let res = applier_factory(expr).await; + assert_eq!(res, vec![1, 2, 3, 4, 5, 6, 7]); } #[tokio::test] async fn test_create_and_query_regex() { - let tags = BTreeSet::from_iter([ - ("aaa", 1), - ("aaa", 2), - ("aaa", 3), - ("aab", 1), - ("aab", 2), - ("aab", 3), - ("abc", 1), - ("abc", 2), - ("abc", 3), + let rows = BTreeSet::from_iter([ + ("aaa", 1, [1, 2]), + ("aaa", 2, [2, 3]), + ("aaa", 3, [3, 4]), + ("aab", 1, [4, 5]), + ("aab", 2, [5, 6]), + ("aab", 3, [6, 7]), + ("abc", 1, [7, 8]), + ("abc", 2, [8, 9]), + ("abc", 3, [9, 10]), ]); - let applier_factory = build_applier_factory("test_create_and_query_regex_", tags).await; + let applier_factory = build_applier_factory("test_create_and_query_regex_", rows).await; let expr = binary_expr(col("tag_str"), Operator::RegexMatch, lit(".*")); let res = applier_factory(expr).await;