feat(inverted_index): Add applier builder to convert Expr to Predicates (Part 1) (#3034)

* feat(inverted_index.integration): Add applier builder to convert Expr to Predicates (Part 1)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add docs

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: typos

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/mito2/src/sst/index/applier/builder.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: remove unwrap

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: error source

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
This commit is contained in:
Zhenchi
2023-12-30 15:32:32 +08:00
committed by GitHub
parent 1c94d4c506
commit 69a53130c2
14 changed files with 668 additions and 50 deletions

13
Cargo.lock generated
View File

@@ -4029,7 +4029,7 @@ dependencies = [
"prost 0.12.3",
"rand",
"regex",
"regex-automata 0.1.10",
"regex-automata 0.2.0",
"snafu",
"tokio",
"tokio-util",
@@ -4977,6 +4977,7 @@ dependencies = [
"datatypes",
"futures",
"humantime-serde",
"index",
"lazy_static",
"log-store",
"memcomparable",
@@ -7134,8 +7135,18 @@ name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
name = "regex-automata"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782"
dependencies = [
"fst",
"memchr",
"regex-syntax 0.6.29",
]

View File

@@ -111,7 +111,7 @@ prost = "0.12"
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
rand = "0.8"
regex = "1.8"
regex-automata = { version = "0.1", features = ["transducer"] }
regex-automata = { version = "0.2", features = ["transducer"] }
reqwest = { version = "0.11", default-features = false, features = [
"json",
"rustls-tls-native-roots",
@@ -169,6 +169,7 @@ datanode = { path = "src/datanode" }
datatypes = { path = "src/datatypes" }
file-engine = { path = "src/file-engine" }
frontend = { path = "src/frontend" }
index = { path = "src/index" }
log-store = { path = "src/log-store" }
meta-client = { path = "src/meta-client" }
meta-srv = { path = "src/meta-srv" }

View File

@@ -42,7 +42,7 @@ pub struct KafkaConfig {
#[serde(skip)]
#[serde(default)]
pub compression: RsKafkaCompression,
/// The maximum log size a kakfa batch producer could buffer.
/// The maximum log size a kafka batch producer could buffer.
pub max_batch_size: ReadableSize,
/// The linger duration of a kafka batch producer.
#[serde(with = "humantime_serde")]

View File

@@ -113,7 +113,7 @@ pub enum Error {
#[snafu(display("Failed to parse regex DFA"))]
ParseDFA {
#[snafu(source)]
error: regex_automata::Error,
error: Box<regex_automata::dfa::Error>,
location: Location,
},

View File

@@ -14,7 +14,7 @@
use fst::map::OpBuilder;
use fst::{IntoStreamer, Streamer};
use regex_automata::DenseDFA;
use regex_automata::dfa::dense::DFA;
use snafu::{ensure, ResultExt};
use crate::inverted_index::error::{
@@ -24,15 +24,13 @@ use crate::inverted_index::search::fst_apply::FstApplier;
use crate::inverted_index::search::predicate::{Predicate, Range};
use crate::inverted_index::FstMap;
type Dfa = DenseDFA<Vec<usize>, usize>;
/// `IntersectionFstApplier` applies intersection operations on an FstMap using specified ranges and regex patterns.
pub struct IntersectionFstApplier {
/// A list of `Range` which define inclusive or exclusive ranges for keys to be queried in the FstMap.
ranges: Vec<Range>,
/// A list of `Dfa` compiled from regular expression patterns.
dfas: Vec<Dfa>,
dfas: Vec<DFA<Vec<u32>>>,
}
impl FstApplier for IntersectionFstApplier {
@@ -88,8 +86,8 @@ impl IntersectionFstApplier {
match predicate {
Predicate::Range(range) => ranges.push(range.range),
Predicate::RegexMatch(regex) => {
let dfa = DenseDFA::new(&regex.pattern);
let dfa = dfa.context(ParseDFASnafu)?;
let dfa = DFA::new(&regex.pattern);
let dfa = dfa.map_err(Box::new).context(ParseDFASnafu)?;
dfas.push(dfa);
}
// Rejection of `InList` predicates is enforced here.
@@ -210,47 +208,67 @@ mod tests {
#[test]
fn test_intersection_fst_applier_with_valid_pattern() {
let test_fst = FstMap::from_iter([("aa", 1), ("bb", 2), ("cc", 3)]).unwrap();
let test_fst = FstMap::from_iter([("123", 1), ("abc", 2)]).unwrap();
let applier = create_applier_from_pattern("a.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let cases = vec![
("1", vec![1]),
("2", vec![1]),
("3", vec![1]),
("^1", vec![1]),
("^2", vec![]),
("^3", vec![]),
("^1.*", vec![1]),
("^.*2", vec![1]),
("^.*3", vec![1]),
("1$", vec![]),
("2$", vec![]),
("3$", vec![1]),
("1.*$", vec![1]),
("2.*$", vec![1]),
("3.*$", vec![1]),
("^1..$", vec![1]),
("^.2.$", vec![1]),
("^..3$", vec![1]),
("^[0-9]", vec![1]),
("^[0-9]+$", vec![1]),
("^[0-9][0-9]$", vec![]),
("^[0-9][0-9][0-9]$", vec![1]),
("^123$", vec![1]),
("a", vec![2]),
("b", vec![2]),
("c", vec![2]),
("^a", vec![2]),
("^b", vec![]),
("^c", vec![]),
("^a.*", vec![2]),
("^.*b", vec![2]),
("^.*c", vec![2]),
("a$", vec![]),
("b$", vec![]),
("c$", vec![2]),
("a.*$", vec![2]),
("b.*$", vec![2]),
("c.*$", vec![2]),
("^.[a-z]", vec![2]),
("^abc$", vec![2]),
("^ab$", vec![]),
("abc$", vec![2]),
("^a.c$", vec![2]),
("^..c$", vec![2]),
("ab", vec![2]),
(".*", vec![1, 2]),
("", vec![1, 2]),
("^$", vec![]),
("1|a", vec![1, 2]),
("^123$|^abc$", vec![1, 2]),
("^123$|d", vec![1]),
];
let applier = create_applier_from_pattern("b.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![2]);
let applier = create_applier_from_pattern("c.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![3]);
let applier = create_applier_from_pattern("a.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let applier = create_applier_from_pattern("b.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![2]);
let applier = create_applier_from_pattern("c.*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![3]);
let applier = create_applier_from_pattern("d.?").unwrap();
let results = applier.apply(&test_fst);
assert!(results.is_empty());
let applier = create_applier_from_pattern("a.?|b.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1, 2]);
let applier = create_applier_from_pattern("d.?|a.?").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1]);
let applier = create_applier_from_pattern(".*").unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, vec![1, 2, 3]);
for (pattern, expected) in cases {
let applier = create_applier_from_pattern(pattern).unwrap();
let results = applier.apply(&test_fst);
assert_eq!(results, expected);
}
}
#[test]

View File

@@ -39,6 +39,7 @@ datafusion.workspace = true
datatypes.workspace = true
futures.workspace = true
humantime-serde.workspace = true
index.workspace = true
lazy_static = "1.4"
log-store = { workspace = true, optional = true }
memcomparable = "0.2"

View File

@@ -423,6 +423,23 @@ pub enum Error {
#[snafu(source)]
error: parquet::errors::ParquetError,
},
#[snafu(display("Column not found, column: {column}"))]
ColumnNotFound { column: String, location: Location },
#[snafu(display("Failed to build index applier"))]
BuildIndexApplier {
#[snafu(source)]
source: index::inverted_index::error::Error,
location: Location,
},
#[snafu(display("Failed to convert value"))]
ConvertValue {
#[snafu(source)]
source: datatypes::error::Error,
location: Location,
},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -468,6 +485,7 @@ impl ErrorExt for Error {
| InvalidRequest { .. }
| FillDefault { .. }
| ConvertColumnDataType { .. }
| ColumnNotFound { .. }
| InvalidMetadata { .. } => StatusCode::InvalidArguments,
RegionMetadataNotFound { .. }
| Join { .. }
@@ -504,6 +522,8 @@ impl ErrorExt for Error {
JsonOptions { .. } => StatusCode::InvalidArguments,
EmptyRegionDir { .. } | EmptyManifestDir { .. } => StatusCode::RegionNotFound,
ArrowReader { .. } => StatusCode::StorageUnavailable,
BuildIndexApplier { source, .. } => source.status_code(),
ConvertValue { source, .. } => source.status_code(),
}
}

View File

@@ -84,7 +84,11 @@ impl SortField {
}
impl SortField {
fn serialize(&self, serializer: &mut Serializer<&mut Vec<u8>>, value: &ValueRef) -> Result<()> {
pub(crate) fn serialize(
&self,
serializer: &mut Serializer<&mut Vec<u8>>,
value: &ValueRef,
) -> Result<()> {
macro_rules! cast_value_and_serialize {
(
$self: ident;

View File

@@ -16,5 +16,6 @@
pub mod file;
pub mod file_purger;
mod index;
pub mod parquet;
pub(crate) mod version;

View File

@@ -0,0 +1,18 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![allow(dead_code)]
pub mod applier;
mod codec;

View File

@@ -0,0 +1,47 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod builder;
use index::inverted_index::search::index_apply::IndexApplier;
use object_store::ObjectStore;
/// The [`SstIndexApplier`] is responsible for applying predicates to the provided SST files
/// and returning the relevant row group ids for further scan.
pub struct SstIndexApplier {
/// The root directory of the region.
region_dir: String,
/// Object store responsible for accessing SST files.
object_store: ObjectStore,
/// Predefined index applier used to apply predicates to index files
/// and return the relevant row group ids for further scan.
index_applier: Box<dyn IndexApplier>,
}
impl SstIndexApplier {
/// Creates a new [`SstIndexApplier`].
pub fn new(
region_dir: String,
object_store: ObjectStore,
index_applier: Box<dyn IndexApplier>,
) -> Self {
Self {
region_dir,
object_store,
index_applier,
}
}
}

View File

@@ -0,0 +1,261 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod between;
// TODO(zhongzc): This PR is too large. The following modules are coming soon.
// mod comparison;
// mod eq_list;
// mod in_list;
// mod regex_match;
use std::collections::HashMap;
use api::v1::SemanticType;
use common_query::logical_plan::Expr;
use common_telemetry::warn;
use datafusion_common::ScalarValue;
use datafusion_expr::Expr as DfExpr;
use datatypes::data_type::ConcreteDataType;
use datatypes::value::Value;
use index::inverted_index::search::index_apply::PredicatesIndexApplier;
use index::inverted_index::search::predicate::Predicate;
use object_store::ObjectStore;
use snafu::{OptionExt, ResultExt};
use store_api::metadata::RegionMetadata;
use crate::error::{BuildIndexApplierSnafu, ColumnNotFoundSnafu, ConvertValueSnafu, Result};
use crate::row_converter::SortField;
use crate::sst::index::applier::SstIndexApplier;
use crate::sst::index::codec::IndexValueCodec;
type ColumnName = String;
/// Constructs an [`SstIndexApplier`] which applies predicates to SST files during scan.
pub struct SstIndexApplierBuilder<'a> {
/// Directory of the region, required argument for constructing [`SstIndexApplier`].
region_dir: String,
/// Object store, required argument for constructing [`SstIndexApplier`].
object_store: ObjectStore,
/// Metadata of the region, used to get metadata like column type.
metadata: &'a RegionMetadata,
/// Stores predicates during traversal on the Expr tree.
output: HashMap<ColumnName, Vec<Predicate>>,
}
impl<'a> SstIndexApplierBuilder<'a> {
/// Creates a new [`SstIndexApplierBuilder`].
pub fn new(
region_dir: String,
object_store: ObjectStore,
metadata: &'a RegionMetadata,
) -> Self {
Self {
region_dir,
object_store,
metadata,
output: HashMap::default(),
}
}
/// Consumes the builder to construct an [`SstIndexApplier`], optionally returned based on
/// the expressions provided. If no predicates match, returns `None`.
pub fn build(mut self, exprs: &[Expr]) -> Result<Option<SstIndexApplier>> {
for expr in exprs {
self.traverse_and_collect(expr.df_expr());
}
if self.output.is_empty() {
return Ok(None);
}
let predicates = self.output.into_iter().collect();
let applier = PredicatesIndexApplier::try_from(predicates);
Ok(Some(SstIndexApplier::new(
self.region_dir,
self.object_store,
Box::new(applier.context(BuildIndexApplierSnafu)?),
)))
}
/// Recursively traverses expressions to collect predicates.
/// Results are stored in `self.output`.
fn traverse_and_collect(&mut self, expr: &DfExpr) {
let res = match expr {
DfExpr::Between(between) => self.collect_between(between),
// TODO(zhongzc): This PR is too large. The following arms are coming soon.
// DfExpr::InList(in_list) => self.collect_inlist(in_list),
// DfExpr::BinaryExpr(BinaryExpr { left, op, right }) => match op {
// Operator::And => {
// self.traverse_and_collect(left);
// self.traverse_and_collect(right);
// Ok(())
// }
// Operator::Or => self.collect_or_eq_list(left, right),
// Operator::Eq => self.collect_eq(left, right),
// Operator::Lt | Operator::LtEq | Operator::Gt | Operator::GtEq => {
// self.collect_comparison_expr(left, op, right)
// }
// Operator::RegexMatch => self.collect_regex_match(left, right),
// _ => Ok(()),
// },
// TODO(zhongzc): support more expressions, e.g. IsNull, IsNotNull, ...
_ => Ok(()),
};
if let Err(err) = res {
warn!(err; "Failed to collect predicates, ignore it. expr: {expr}");
}
}
/// Helper function to add a predicate to the output.
fn add_predicate(&mut self, column_name: &str, predicate: Predicate) {
match self.output.get_mut(column_name) {
Some(predicates) => predicates.push(predicate),
None => {
self.output.insert(column_name.to_string(), vec![predicate]);
}
}
}
/// Helper function to get the column type of a tag column.
/// Returns `None` if the column is not a tag column.
fn tag_column_type(&self, column_name: &str) -> Result<Option<ConcreteDataType>> {
let column = self
.metadata
.column_by_name(column_name)
.context(ColumnNotFoundSnafu {
column: column_name,
})?;
Ok((column.semantic_type == SemanticType::Tag)
.then(|| column.column_schema.data_type.clone()))
}
/// Helper function to get a non-null literal.
fn nonnull_lit(expr: &DfExpr) -> Option<&ScalarValue> {
match expr {
DfExpr::Literal(lit) if !lit.is_null() => Some(lit),
_ => None,
}
}
/// Helper function to get the column name of a column expression.
fn column_name(expr: &DfExpr) -> Option<&str> {
match expr {
DfExpr::Column(column) => Some(&column.name),
_ => None,
}
}
/// Helper function to encode a literal into bytes.
fn encode_lit(lit: &ScalarValue, data_type: ConcreteDataType) -> Result<Vec<u8>> {
let value = Value::try_from(lit.clone()).context(ConvertValueSnafu)?;
let mut bytes = vec![];
let field = SortField::new(data_type);
IndexValueCodec::encode_value(value.as_value_ref(), &field, &mut bytes)?;
Ok(bytes)
}
}
#[cfg(test)]
mod tests {
use api::v1::SemanticType;
use datafusion_common::Column;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::ColumnSchema;
use object_store::services::Memory;
use object_store::ObjectStore;
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
use store_api::storage::RegionId;
use super::*;
pub(crate) fn test_region_metadata() -> RegionMetadata {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1234, 5678));
builder
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new("a", ConcreteDataType::string_datatype(), false),
semantic_type: SemanticType::Tag,
column_id: 1,
})
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new("b", ConcreteDataType::string_datatype(), false),
semantic_type: SemanticType::Field,
column_id: 2,
})
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"c",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id: 3,
})
.primary_key(vec![1]);
builder.build().unwrap()
}
pub(crate) fn test_object_store() -> ObjectStore {
ObjectStore::new(Memory::default()).unwrap().finish()
}
pub(crate) fn tag_column() -> DfExpr {
DfExpr::Column(Column {
relation: None,
name: "a".to_string(),
})
}
pub(crate) fn field_column() -> DfExpr {
DfExpr::Column(Column {
relation: None,
name: "b".to_string(),
})
}
pub(crate) fn nonexistent_column() -> DfExpr {
DfExpr::Column(Column {
relation: None,
name: "nonexistent".to_string(),
})
}
pub(crate) fn string_lit(s: impl Into<String>) -> DfExpr {
DfExpr::Literal(ScalarValue::Utf8(Some(s.into())))
}
pub(crate) fn int64_lit(i: impl Into<i64>) -> DfExpr {
DfExpr::Literal(ScalarValue::Int64(Some(i.into())))
}
pub(crate) fn encoded_string(s: impl Into<String>) -> Vec<u8> {
let mut bytes = vec![];
IndexValueCodec::encode_value(
Value::from(s.into()).as_value_ref(),
&SortField::new(ConcreteDataType::string_datatype()),
&mut bytes,
)
.unwrap();
bytes
}
}

View File

@@ -0,0 +1,171 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use datafusion_expr::Between;
use index::inverted_index::search::predicate::{Bound, Predicate, Range, RangePredicate};
use crate::error::Result;
use crate::sst::index::applier::builder::SstIndexApplierBuilder;
impl<'a> SstIndexApplierBuilder<'a> {
/// Collects a `BETWEEN` expression in the form of `column BETWEEN lit AND lit`.
pub(crate) fn collect_between(&mut self, between: &Between) -> Result<()> {
if between.negated {
return Ok(());
}
let Some(column_name) = Self::column_name(&between.expr) else {
return Ok(());
};
let Some(data_type) = self.tag_column_type(column_name)? else {
return Ok(());
};
let Some(low) = Self::nonnull_lit(&between.low) else {
return Ok(());
};
let Some(high) = Self::nonnull_lit(&between.high) else {
return Ok(());
};
let predicate = Predicate::Range(RangePredicate {
range: Range {
lower: Some(Bound {
inclusive: true,
value: Self::encode_lit(low, data_type.clone())?,
}),
upper: Some(Bound {
inclusive: true,
value: Self::encode_lit(high, data_type)?,
}),
},
});
self.add_predicate(column_name, predicate);
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::Error;
use crate::sst::index::applier::builder::tests::{
encoded_string, field_column, int64_lit, nonexistent_column, string_lit, tag_column,
test_object_store, test_region_metadata,
};
#[test]
fn test_collect_between_basic() {
let metadata = test_region_metadata();
let mut builder =
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), &metadata);
let between = Between {
negated: false,
expr: Box::new(tag_column()),
low: Box::new(string_lit("abc")),
high: Box::new(string_lit("def")),
};
builder.collect_between(&between).unwrap();
let predicates = builder.output.get("a").unwrap();
assert_eq!(predicates.len(), 1);
assert_eq!(
predicates[0],
Predicate::Range(RangePredicate {
range: Range {
lower: Some(Bound {
inclusive: true,
value: encoded_string("abc"),
}),
upper: Some(Bound {
inclusive: true,
value: encoded_string("def"),
}),
}
})
);
}
#[test]
fn test_collect_between_negated() {
let metadata = test_region_metadata();
let mut builder =
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), &metadata);
let between = Between {
negated: true,
expr: Box::new(tag_column()),
low: Box::new(string_lit("abc")),
high: Box::new(string_lit("def")),
};
builder.collect_between(&between).unwrap();
assert!(builder.output.is_empty());
}
#[test]
fn test_collect_between_field_column() {
let metadata = test_region_metadata();
let mut builder =
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), &metadata);
let between = Between {
negated: false,
expr: Box::new(field_column()),
low: Box::new(string_lit("abc")),
high: Box::new(string_lit("def")),
};
builder.collect_between(&between).unwrap();
assert!(builder.output.is_empty());
}
#[test]
fn test_collect_between_type_mismatch() {
let metadata = test_region_metadata();
let mut builder =
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), &metadata);
let between = Between {
negated: false,
expr: Box::new(tag_column()),
low: Box::new(int64_lit(123)),
high: Box::new(int64_lit(456)),
};
let res = builder.collect_between(&between);
assert!(matches!(res, Err(Error::FieldTypeMismatch { .. })));
assert!(builder.output.is_empty());
}
#[test]
fn test_collect_between_nonexistent_column() {
let metadata = test_region_metadata();
let mut builder =
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), &metadata);
let between = Between {
negated: false,
expr: Box::new(nonexistent_column()),
low: Box::new(string_lit("abc")),
high: Box::new(string_lit("def")),
};
let res = builder.collect_between(&between);
assert!(matches!(res, Err(Error::ColumnNotFound { .. })));
assert!(builder.output.is_empty());
}
}

View File

@@ -0,0 +1,65 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use datatypes::value::ValueRef;
use memcomparable::Serializer;
use crate::error::Result;
use crate::row_converter::SortField;
/// Encodes index values according to their data types for sorting and storage use.
pub struct IndexValueCodec;
impl IndexValueCodec {
/// Serializes a `ValueRef` using the data type defined in `SortField` and writes
/// the result into a buffer.
///
/// # Arguments
/// * `value` - The value to be encoded.
/// * `field` - Contains data type to guide serialization.
/// * `buffer` - Destination buffer for the serialized value.
pub fn encode_value(value: ValueRef, field: &SortField, buffer: &mut Vec<u8>) -> Result<()> {
buffer.reserve(field.estimated_size());
let mut serializer = Serializer::new(buffer);
field.serialize(&mut serializer, &value)
}
}
#[cfg(test)]
mod tests {
use datatypes::data_type::ConcreteDataType;
use super::*;
use crate::error::Error;
#[test]
fn test_encode_value_basic() {
let value = ValueRef::from("hello");
let field = SortField::new(ConcreteDataType::string_datatype());
let mut buffer = Vec::new();
IndexValueCodec::encode_value(value, &field, &mut buffer).unwrap();
assert!(!buffer.is_empty());
}
#[test]
fn test_encode_value_type_mismatch() {
let value = ValueRef::from("hello");
let field = SortField::new(ConcreteDataType::int64_datatype());
let mut buffer = Vec::new();
let res = IndexValueCodec::encode_value(value, &field, &mut buffer);
assert!(matches!(res, Err(Error::FieldTypeMismatch { .. })));
}
}