diff --git a/Cargo.lock b/Cargo.lock index 8676c62f33..81a76954e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2797,6 +2797,12 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + [[package]] name = "digest" version = "0.10.7" @@ -2895,6 +2901,12 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "downcast" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" + [[package]] name = "dyn-clone" version = "1.0.16" @@ -3182,6 +3194,15 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + [[package]] name = "fnv" version = "1.0.7" @@ -3206,6 +3227,12 @@ dependencies = [ "regex", ] +[[package]] +name = "fragile" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" + [[package]] name = "frontend" version = "0.4.4" @@ -3913,12 +3940,14 @@ name = "index" version = "0.4.4" dependencies = [ "async-trait", + "bytemuck", "common-base", "common-error", "common-macro", "fst", "futures", "greptime-proto", + "mockall", "prost 0.12.2", "regex", "regex-automata 0.1.10", @@ -4873,6 +4902,33 @@ dependencies = [ "uuid", ] +[[package]] +name = "mockall" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c84490118f2ee2d74570d114f3d0493cbf02790df303d2707606c3e14e07c96" +dependencies = [ + "cfg-if 1.0.0", + "downcast", + "fragile", + "lazy_static", + "mockall_derive", + "predicates", + "predicates-tree", +] + +[[package]] +name = "mockall_derive" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ce75669015c4f47b289fd4d4f56e894e4c96003ffdf3ac51313126f94c6cbb" +dependencies = [ + "cfg-if 1.0.0", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "moka" version = "0.12.1" @@ -5101,6 +5157,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -6225,6 +6287,36 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "predicates" +version = "2.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" +dependencies = [ + "difflib", + "float-cmp", + "itertools 0.10.5", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" + +[[package]] +name = "predicates-tree" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "prettydiff" version = "0.6.4" @@ -9195,6 +9287,12 @@ dependencies = [ "libc", ] +[[package]] +name = "termtree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" + [[package]] name = "tests-integration" version = "0.4.4" diff --git a/Cargo.toml b/Cargo.toml index a25d4f20ce..04774aef41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,6 +74,7 @@ async-trait = "0.1" base64 = "0.21" bigdecimal = "0.4.2" bitflags = "2.4.1" +bytemuck = "1.12" chrono = { version = "0.4", features = ["serde"] } datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } @@ -92,6 +93,7 @@ humantime-serde = "1.1" itertools = "0.10" lazy_static = "1.4" meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" } +mockall = "0.11.4" moka = "0.12" once_cell = "1.18" opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [ diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index 54b8c75f7e..800420164a 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -6,12 +6,14 @@ license.workspace = true [dependencies] async-trait.workspace = true +bytemuck.workspace = true common-base.workspace = true common-error.workspace = true common-macro.workspace = true fst.workspace = true futures.workspace = true greptime-proto.workspace = true +mockall.workspace = true prost.workspace = true regex-automata.workspace = true regex.workspace = true diff --git a/src/index/src/inverted_index/format/reader.rs b/src/index/src/inverted_index/format/reader.rs index c18c6bd83e..e8e9c72c95 100644 --- a/src/index/src/inverted_index/format/reader.rs +++ b/src/index/src/inverted_index/format/reader.rs @@ -23,6 +23,7 @@ use crate::inverted_index::error::Result; use crate::inverted_index::FstMap; /// InvertedIndexReader defines an asynchronous reader of inverted index data +#[mockall::automock] #[async_trait] pub trait InvertedIndexReader { /// Retrieve metadata of all inverted indices stored within the blob. diff --git a/src/index/src/inverted_index/search.rs b/src/index/src/inverted_index/search.rs index d4f4b71f05..e4ab3d5c3b 100644 --- a/src/index/src/inverted_index/search.rs +++ b/src/index/src/inverted_index/search.rs @@ -13,4 +13,5 @@ // limitations under the License. pub mod fst_apply; +pub mod fst_values_mapper; pub mod predicate; diff --git a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs index 23503a97b1..a5ed84dce7 100644 --- a/src/index/src/inverted_index/search/fst_apply/keys_apply.rs +++ b/src/index/src/inverted_index/search/fst_apply/keys_apply.rs @@ -283,7 +283,10 @@ mod tests { }, })]; let result = KeysFstApplier::try_from(predicates); - assert!(result.is_err()); + assert!(matches!( + result, + Err(Error::KeysApplierWithoutInList { .. }) + )); } #[test] diff --git a/src/index/src/inverted_index/search/fst_values_mapper.rs b/src/index/src/inverted_index/search/fst_values_mapper.rs new file mode 100644 index 0000000000..d4675e652d --- /dev/null +++ b/src/index/src/inverted_index/search/fst_values_mapper.rs @@ -0,0 +1,112 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_base::BitVec; +use greptime_proto::v1::index::InvertedIndexMeta; + +use crate::inverted_index::error::Result; +use crate::inverted_index::format::reader::InvertedIndexReader; + +/// `FstValuesMapper` maps FST-encoded u64 values to their corresponding bitmaps +/// within an inverted index. The higher 32 bits of each u64 value represent the +/// bitmap offset and the lower 32 bits represent its size. This mapper uses these +/// combined offset-size pairs to fetch and union multiple bitmaps into a single `BitVec`. +pub struct FstValuesMapper<'a> { + /// `reader` retrieves bitmap data using offsets and sizes from FST values. + reader: &'a mut dyn InvertedIndexReader, + + /// `metadata` provides context for interpreting the index structures. + metadata: &'a InvertedIndexMeta, +} + +impl<'a> FstValuesMapper<'a> { + pub fn new( + reader: &'a mut dyn InvertedIndexReader, + metadata: &'a InvertedIndexMeta, + ) -> FstValuesMapper<'a> { + FstValuesMapper { reader, metadata } + } + + /// Maps an array of FST values to a `BitVec` by retrieving and combining bitmaps. + pub async fn map_values(&mut self, values: &[u64]) -> Result { + let mut bitmap = BitVec::new(); + + for value in values { + // relative_offset (higher 32 bits), size (lower 32 bits) + let [relative_offset, size] = bytemuck::cast::(*value); + + let bm = self + .reader + .bitmap(self.metadata, relative_offset, size) + .await?; + + // Ensure the longest BitVec is the left operand to prevent truncation during OR. + if bm.len() > bitmap.len() { + bitmap = bm | bitmap + } else { + bitmap |= bm + } + } + + Ok(bitmap) + } +} + +#[cfg(test)] +mod tests { + use common_base::bit_vec::prelude::*; + + use super::*; + use crate::inverted_index::format::reader::MockInvertedIndexReader; + + fn value(offset: u32, size: u32) -> u64 { + bytemuck::cast::<[u32; 2], u64>([offset, size]) + } + + #[tokio::test] + async fn test_map_values() { + let mut mock_reader = MockInvertedIndexReader::new(); + mock_reader + .expect_bitmap() + .returning(|_, offset, size| match (offset, size) { + (1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1]), + (2, 1) => Ok(bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1]), + _ => unreachable!(), + }); + + let meta = InvertedIndexMeta::default(); + let mut values_mapper = FstValuesMapper::new(&mut mock_reader, &meta); + + let result = values_mapper.map_values(&[]).await.unwrap(); + assert_eq!(result.count_ones(), 0); + + let result = values_mapper.map_values(&[value(1, 1)]).await.unwrap(); + assert_eq!(result, bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1]); + + let result = values_mapper.map_values(&[value(2, 1)]).await.unwrap(); + assert_eq!(result, bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1]); + + let result = values_mapper + .map_values(&[value(1, 1), value(2, 1)]) + .await + .unwrap(); + assert_eq!(result, bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1]); + + let result = values_mapper + .map_values(&[value(2, 1), value(1, 1)]) + .await + .unwrap(); + assert_eq!(result, bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1]); + } +} diff --git a/src/promql/Cargo.toml b/src/promql/Cargo.toml index fb4984b6e0..a10973d4eb 100644 --- a/src/promql/Cargo.toml +++ b/src/promql/Cargo.toml @@ -7,7 +7,7 @@ license.workspace = true [dependencies] async-recursion = "1.0" async-trait.workspace = true -bytemuck = "1.12" +bytemuck.workspace = true catalog.workspace = true common-catalog.workspace = true common-error.workspace = true