Merge branch 'main' into fix-proto-clear

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2024-03-14 00:36:22 +08:00
99 changed files with 5038 additions and 921 deletions

10
.editorconfig Normal file
View File

@@ -0,0 +1,10 @@
root = true
[*]
end_of_line = lf
indent_style = space
insert_final_newline = true
trim_trailing_whitespace = true
[{Makefile,**.mk}]
indent_style = tab

View File

@@ -123,7 +123,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
target: [ "fuzz_create_table" ]
target: [ "fuzz_create_table", "fuzz_alter_table" ]
steps:
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3

8
Cargo.lock generated
View File

@@ -3474,10 +3474,12 @@ dependencies = [
"common-telemetry",
"common-time",
"datatypes",
"enum_dispatch",
"hydroflow",
"itertools 0.10.5",
"num-traits",
"serde",
"serde_json",
"servers",
"session",
"snafu",
@@ -4815,9 +4817,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.151"
version = "0.2.153"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
[[package]]
name = "libfuzzer-sys"
@@ -9191,6 +9193,7 @@ dependencies = [
"derive_builder 0.12.0",
"digest",
"futures",
"hashbrown 0.14.3",
"headers",
"hex",
"hostname",
@@ -10216,6 +10219,7 @@ dependencies = [
"common-query",
"common-runtime",
"common-telemetry",
"common-time",
"datatypes",
"derive_builder 0.12.0",
"dotenv",

View File

@@ -134,7 +134,7 @@ reqwest = { version = "0.11", default-features = false, features = [
rskafka = "0.5"
rust_decimal = "1.33"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_json = { version = "1.0", features = ["float_roundtrip"] }
serde_with = "3"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.7"

View File

@@ -28,12 +28,15 @@ const REGION: &str = "region";
const ENABLE_VIRTUAL_HOST_STYLE: &str = "enable_virtual_host_style";
pub fn is_supported_in_s3(key: &str) -> bool {
key == ENDPOINT
|| key == ACCESS_KEY_ID
|| key == SECRET_ACCESS_KEY
|| key == SESSION_TOKEN
|| key == REGION
|| key == ENABLE_VIRTUAL_HOST_STYLE
[
ENDPOINT,
ACCESS_KEY_ID,
SECRET_ACCESS_KEY,
SESSION_TOKEN,
REGION,
ENABLE_VIRTUAL_HOST_STYLE,
]
.contains(&key)
}
pub fn build_s3_backend(

View File

@@ -14,9 +14,11 @@
use std::sync::Arc;
mod greatest;
mod to_timezone;
mod to_unixtime;
use greatest::GreatestFunction;
use to_timezone::ToTimezoneFunction;
use to_unixtime::ToUnixtimeFunction;
use crate::function_registry::FunctionRegistry;
@@ -25,6 +27,7 @@ pub(crate) struct TimestampFunction;
impl TimestampFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(ToTimezoneFunction));
registry.register(Arc::new(ToUnixtimeFunction));
registry.register(Arc::new(GreatestFunction));
}

View File

@@ -0,0 +1,260 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use std::sync::Arc;
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use common_time::{Timestamp, Timezone};
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::types::TimestampType;
use datatypes::value::Value;
use datatypes::vectors::{
StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
TimestampNanosecondVector, TimestampSecondVector, Vector,
};
use snafu::{ensure, OptionExt};
use crate::function::{Function, FunctionContext};
use crate::helper;
#[derive(Clone, Debug, Default)]
pub struct ToTimezoneFunction;
const NAME: &str = "to_timezone";
fn convert_to_timezone(arg: &str) -> Option<Timezone> {
Timezone::from_tz_string(arg).ok()
}
fn convert_to_timestamp(arg: &Value) -> Option<Timestamp> {
match arg {
Value::Timestamp(ts) => Some(*ts),
_ => None,
}
}
impl fmt::Display for ToTimezoneFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "TO_TIMEZONE")
}
}
impl Function for ToTimezoneFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
// type checked by signature - MUST BE timestamp
Ok(input_types[0].clone())
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
],
vec![ConcreteDataType::string_datatype()],
)
}
fn eval(&self, _ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly 2, have: {}",
columns.len()
),
}
);
// TODO: maybe support epoch timestamp? https://github.com/GreptimeTeam/greptimedb/issues/3477
let ts = columns[0].data_type().as_timestamp().with_context(|| {
UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
})?;
let array = columns[0].to_arrow_array();
let times = match ts {
TimestampType::Second(_) => {
let vector = TimestampSecondVector::try_from_arrow_array(array).unwrap();
(0..vector.len())
.map(|i| convert_to_timestamp(&vector.get(i)))
.collect::<Vec<_>>()
}
TimestampType::Millisecond(_) => {
let vector = TimestampMillisecondVector::try_from_arrow_array(array).unwrap();
(0..vector.len())
.map(|i| convert_to_timestamp(&vector.get(i)))
.collect::<Vec<_>>()
}
TimestampType::Microsecond(_) => {
let vector = TimestampMicrosecondVector::try_from_arrow_array(array).unwrap();
(0..vector.len())
.map(|i| convert_to_timestamp(&vector.get(i)))
.collect::<Vec<_>>()
}
TimestampType::Nanosecond(_) => {
let vector = TimestampNanosecondVector::try_from_arrow_array(array).unwrap();
(0..vector.len())
.map(|i| convert_to_timestamp(&vector.get(i)))
.collect::<Vec<_>>()
}
};
let tzs = {
let array = columns[1].to_arrow_array();
let vector = StringVector::try_from_arrow_array(&array)
.ok()
.with_context(|| UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
})?;
(0..vector.len())
.map(|i| convert_to_timezone(&vector.get(i).to_string()))
.collect::<Vec<_>>()
};
let result = times
.iter()
.zip(tzs.iter())
.map(|(time, tz)| match (time, tz) {
(Some(time), _) => Some(time.to_timezone_aware_string(tz.as_ref())),
_ => None,
})
.collect::<Vec<Option<String>>>();
Ok(Arc::new(StringVector::from(result)))
}
}
#[cfg(test)]
mod tests {
use datatypes::scalars::ScalarVector;
use datatypes::timestamp::{
TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
};
use datatypes::vectors::StringVector;
use super::*;
#[test]
fn test_timestamp_to_timezone() {
let f = ToTimezoneFunction;
assert_eq!("to_timezone", f.name());
let results = vec![
Some("1969-12-31 19:00:01"),
None,
Some("1970-01-01 03:00:01"),
None,
];
let times: Vec<Option<TimestampSecond>> = vec![
Some(TimestampSecond::new(1)),
None,
Some(TimestampSecond::new(1)),
None,
];
let ts_vector: TimestampSecondVector =
TimestampSecondVector::from_owned_iterator(times.into_iter());
let tzs = vec![Some("America/New_York"), None, Some("Europe/Moscow"), None];
let args: Vec<VectorRef> = vec![
Arc::new(ts_vector),
Arc::new(StringVector::from(tzs.clone())),
];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
let expect_times: VectorRef = Arc::new(StringVector::from(results));
assert_eq!(expect_times, vector);
let results = vec![
Some("1969-12-31 19:00:00.001"),
None,
Some("1970-01-01 03:00:00.001"),
None,
];
let times: Vec<Option<TimestampMillisecond>> = vec![
Some(TimestampMillisecond::new(1)),
None,
Some(TimestampMillisecond::new(1)),
None,
];
let ts_vector: TimestampMillisecondVector =
TimestampMillisecondVector::from_owned_iterator(times.into_iter());
let args: Vec<VectorRef> = vec![
Arc::new(ts_vector),
Arc::new(StringVector::from(tzs.clone())),
];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
let expect_times: VectorRef = Arc::new(StringVector::from(results));
assert_eq!(expect_times, vector);
let results = vec![
Some("1969-12-31 19:00:00.000001"),
None,
Some("1970-01-01 03:00:00.000001"),
None,
];
let times: Vec<Option<TimestampMicrosecond>> = vec![
Some(TimestampMicrosecond::new(1)),
None,
Some(TimestampMicrosecond::new(1)),
None,
];
let ts_vector: TimestampMicrosecondVector =
TimestampMicrosecondVector::from_owned_iterator(times.into_iter());
let args: Vec<VectorRef> = vec![
Arc::new(ts_vector),
Arc::new(StringVector::from(tzs.clone())),
];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
let expect_times: VectorRef = Arc::new(StringVector::from(results));
assert_eq!(expect_times, vector);
let results = vec![
Some("1969-12-31 19:00:00.000000001"),
None,
Some("1970-01-01 03:00:00.000000001"),
None,
];
let times: Vec<Option<TimestampNanosecond>> = vec![
Some(TimestampNanosecond::new(1)),
None,
Some(TimestampNanosecond::new(1)),
None,
];
let ts_vector: TimestampNanosecondVector =
TimestampNanosecondVector::from_owned_iterator(times.into_iter());
let args: Vec<VectorRef> = vec![
Arc::new(ts_vector),
Arc::new(StringVector::from(tzs.clone())),
];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
let expect_times: VectorRef = Arc::new(StringVector::from(results));
assert_eq!(expect_times, vector);
}
}

View File

@@ -45,6 +45,10 @@ impl TxnService for ChrootKvBackend {
let txn_res = self.inner.txn(txn).await?;
Ok(self.chroot_txn_response(txn_res))
}
fn max_txn_ops(&self) -> usize {
self.inner.max_txn_ops()
}
}
#[async_trait::async_trait]

View File

@@ -323,6 +323,10 @@ impl<T: ErrorExt + Send + Sync> TxnService for MemoryKvBackend<T> {
responses,
})
}
fn max_txn_ops(&self) -> usize {
usize::MAX
}
}
impl<T: ErrorExt + Send + Sync + 'static> ResettableKvBackend for MemoryKvBackend<T> {

View File

@@ -30,7 +30,7 @@ pub trait TxnService: Sync + Send {
/// Maximum number of operations permitted in a transaction.
fn max_txn_ops(&self) -> usize {
usize::MAX
unimplemented!("txn is not implemented")
}
}

View File

@@ -32,7 +32,7 @@ use snafu::ResultExt;
use crate::error::{self, Result};
use crate::{
DfRecordBatch, DfSendableRecordBatchStream, RecordBatch, RecordBatchStream,
DfRecordBatch, DfSendableRecordBatchStream, OrderOption, RecordBatch, RecordBatchStream,
SendableRecordBatchStream, Stream,
};
@@ -228,6 +228,10 @@ impl RecordBatchStream for RecordBatchStreamAdapter {
Metrics::Unavailable | Metrics::Unresolved(_) => None,
}
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
}
impl Stream for RecordBatchStreamAdapter {
@@ -316,6 +320,14 @@ impl RecordBatchStream for AsyncRecordBatchStreamAdapter {
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
fn metrics(&self) -> Option<RecordBatchMetrics> {
None
}
}
impl Stream for AsyncRecordBatchStreamAdapter {
@@ -375,6 +387,14 @@ mod test {
fn schema(&self) -> SchemaRef {
unimplemented!()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
fn metrics(&self) -> Option<RecordBatchMetrics> {
None
}
}
impl Stream for MaybeErrorRecordBatchStream {

View File

@@ -39,13 +39,9 @@ use snafu::{ensure, ResultExt};
pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
fn schema(&self) -> SchemaRef;
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
fn output_ordering(&self) -> Option<&[OrderOption]>;
fn metrics(&self) -> Option<RecordBatchMetrics> {
None
}
fn metrics(&self) -> Option<RecordBatchMetrics>;
}
pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send>>;
@@ -74,6 +70,14 @@ impl RecordBatchStream for EmptyRecordBatchStream {
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
fn metrics(&self) -> Option<RecordBatchMetrics> {
None
}
}
impl Stream for EmptyRecordBatchStream {
@@ -192,6 +196,14 @@ impl RecordBatchStream for SimpleRecordBatchStream {
fn schema(&self) -> SchemaRef {
self.inner.schema()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
fn metrics(&self) -> Option<RecordBatchMetrics> {
None
}
}
impl Stream for SimpleRecordBatchStream {

View File

@@ -41,7 +41,8 @@ mod tests {
use futures::Stream;
use super::*;
use crate::RecordBatchStream;
use crate::adapter::RecordBatchMetrics;
use crate::{OrderOption, RecordBatchStream};
struct MockRecordBatchStream {
batch: Option<RecordBatch>,
@@ -52,6 +53,14 @@ mod tests {
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
fn metrics(&self) -> Option<RecordBatchMetrics> {
None
}
}
impl Stream for MockRecordBatchStream {

View File

@@ -36,7 +36,7 @@ use crate::{error, Interval};
/// - for [TimeUnit::Second]: [-262144-01-01 00:00:00, +262143-12-31 23:59:59]
/// - for [TimeUnit::Millisecond]: [-262144-01-01 00:00:00.000, +262143-12-31 23:59:59.999]
/// - for [TimeUnit::Microsecond]: [-262144-01-01 00:00:00.000000, +262143-12-31 23:59:59.999999]
/// - for [TimeUnit::Nanosecond]: [1677-09-21 00:12:43.145225, 2262-04-11 23:47:16.854775807]
/// - for [TimeUnit::Nanosecond]: [1677-09-21 00:12:43.145224192, 2262-04-11 23:47:16.854775807]
///
/// # Note:
/// For values out of range, you can still store these timestamps, but while performing arithmetic
@@ -187,28 +187,28 @@ impl Timestamp {
Self { unit, value }
}
pub fn new_second(value: i64) -> Self {
pub const fn new_second(value: i64) -> Self {
Self {
value,
unit: TimeUnit::Second,
}
}
pub fn new_millisecond(value: i64) -> Self {
pub const fn new_millisecond(value: i64) -> Self {
Self {
value,
unit: TimeUnit::Millisecond,
}
}
pub fn new_microsecond(value: i64) -> Self {
pub const fn new_microsecond(value: i64) -> Self {
Self {
value,
unit: TimeUnit::Microsecond,
}
}
pub fn new_nanosecond(value: i64) -> Self {
pub const fn new_nanosecond(value: i64) -> Self {
Self {
value,
unit: TimeUnit::Nanosecond,
@@ -281,8 +281,26 @@ impl Timestamp {
.and_then(|v| v.checked_add(micros as i64))
.map(Timestamp::new_microsecond)
} else {
// Refer to <https://github.com/chronotope/chrono/issues/1289>
//
// subsec nanos are always non-negative, however the timestamp itself (both in seconds and in nanos) can be
// negative. Now i64::MIN is NOT dividable by 1_000_000_000, so
//
// (sec * 1_000_000_000) + nsec
//
// may underflow (even when in theory we COULD represent the datetime as i64) because we add the non-negative
// nanos AFTER the multiplication. This is fixed by converting the negative case to
//
// ((sec + 1) * 1_000_000_000) + (nsec - 1_000_000_000)
let mut sec = sec;
let mut nsec = nsec as i64;
if sec < 0 && nsec > 0 {
nsec -= 1_000_000_000;
sec += 1;
}
sec.checked_mul(1_000_000_000)
.and_then(|v| v.checked_add(nsec as i64))
.and_then(|v| v.checked_add(nsec))
.map(Timestamp::new_nanosecond)
}
}
@@ -425,6 +443,20 @@ impl Timestamp {
}
}
impl Timestamp {
pub const MIN_SECOND: Self = Self::new_second(-8_334_601_228_800);
pub const MAX_SECOND: Self = Self::new_second(8_210_266_876_799);
pub const MIN_MILLISECOND: Self = Self::new_millisecond(-8_334_601_228_800_000);
pub const MAX_MILLISECOND: Self = Self::new_millisecond(8_210_266_876_799_999);
pub const MIN_MICROSECOND: Self = Self::new_microsecond(-8_334_601_228_800_000_000);
pub const MAX_MICROSECOND: Self = Self::new_microsecond(8_210_266_876_799_999_999);
pub const MIN_NANOSECOND: Self = Self::new_nanosecond(i64::MIN);
pub const MAX_NANOSECOND: Self = Self::new_nanosecond(i64::MAX);
}
/// Converts the naive datetime (which has no specific timezone) to a
/// nanosecond epoch timestamp in UTC.
fn naive_datetime_to_timestamp(
@@ -586,6 +618,7 @@ impl Hash for Timestamp {
mod tests {
use std::collections::hash_map::DefaultHasher;
use chrono_tz::Tz;
use rand::Rng;
use serde_json::Value;
@@ -1297,7 +1330,7 @@ mod tests {
"+262142-12-31 23:59:59Z",
"+262142-12-31 23:59:59.999Z",
"+262142-12-31 23:59:59.999999Z",
"1677-09-21 00:12:43.145225Z",
"1677-09-21 00:12:43.145224192Z",
"2262-04-11 23:47:16.854775807Z",
"+100000-01-01 00:00:01.5Z",
];
@@ -1306,4 +1339,47 @@ mod tests {
Timestamp::from_str_utc(s).unwrap();
}
}
#[test]
fn test_min_nanos_roundtrip() {
let (sec, nsec) = Timestamp::MIN_NANOSECOND.split();
let ts = Timestamp::from_splits(sec, nsec).unwrap();
assert_eq!(Timestamp::MIN_NANOSECOND, ts);
}
#[test]
fn test_timestamp_bound_format() {
assert_eq!(
"1677-09-21 00:12:43.145224192",
Timestamp::MIN_NANOSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
);
assert_eq!(
"2262-04-11 23:47:16.854775807",
Timestamp::MAX_NANOSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
);
assert_eq!(
"-262143-01-01 00:00:00",
Timestamp::MIN_MICROSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
);
assert_eq!(
"+262142-12-31 23:59:59.999999",
Timestamp::MAX_MICROSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
);
assert_eq!(
"-262143-01-01 00:00:00",
Timestamp::MIN_MILLISECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
);
assert_eq!(
"+262142-12-31 23:59:59.999",
Timestamp::MAX_MILLISECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
);
assert_eq!(
"-262143-01-01 00:00:00",
Timestamp::MIN_SECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
);
assert_eq!(
"+262142-12-31 23:59:59",
Timestamp::MAX_SECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
);
}
}

View File

@@ -2417,4 +2417,12 @@ mod tests {
);
check_value_ref_size_eq(&ValueRef::Decimal128(Decimal128::new(1234, 3, 1)), 32)
}
#[test]
fn test_incorrect_default_value_issue_3479() {
let value = OrderedF64::from(0.047318541668048164);
let serialized = serde_json::to_string(&value).unwrap();
let deserialized: OrderedF64 = serde_json::from_str(&serialized).unwrap();
assert_eq!(value, deserialized);
}
}

View File

@@ -22,8 +22,9 @@ use std::task::{Context, Poll};
use common_datasource::object_store::build_backend;
use common_error::ext::BoxedError;
use common_query::prelude::Expr;
use common_recordbatch::adapter::RecordBatchMetrics;
use common_recordbatch::error::{CastVectorSnafu, ExternalSnafu, Result as RecordBatchResult};
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use datafusion::logical_expr::utils as df_logical_expr_utils;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
@@ -151,6 +152,14 @@ impl RecordBatchStream for FileToScanRegionStream {
fn schema(&self) -> SchemaRef {
self.scan_schema.clone()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
fn metrics(&self) -> Option<RecordBatchMetrics> {
None
}
}
impl Stream for FileToScanRegionStream {

View File

@@ -18,6 +18,7 @@ common-query.workspace = true
common-telemetry.workspace = true
common-time.workspace = true
datatypes.workspace = true
enum_dispatch = "0.3"
hydroflow = "0.5.0"
itertools.workspace = true
num-traits = "0.2"
@@ -27,3 +28,6 @@ session.workspace = true
snafu.workspace = true
tokio.workspace = true
tonic.workspace = true
[dev-dependencies]
serde_json = "1.0"

View File

@@ -24,5 +24,6 @@ mod scalar;
pub(crate) use error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu};
pub(crate) use func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
pub(crate) use id::{GlobalId, Id, LocalId};
pub(crate) use linear::{MapFilterProject, MfpPlan, SafeMfpPlan};
pub(crate) use relation::{AggregateExpr, AggregateFunc};
pub(crate) use scalar::ScalarExpr;

View File

@@ -61,4 +61,7 @@ pub enum EvalError {
#[snafu(display("Unsupported temporal filter: {reason}"))]
UnsupportedTemporalFilter { reason: String, location: Location },
#[snafu(display("Overflowed during evaluation"))]
Overflow { location: Location },
}

View File

@@ -45,7 +45,7 @@ use crate::repr::{self, value_to_internal_ts, Diff, Row};
/// expressions in `self.expressions`, even though this is not something
/// we can directly evaluate. The plan creation methods will defensively
/// ensure that the right thing happens.
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub struct MapFilterProject {
/// A sequence of expressions that should be appended to the row.
///
@@ -415,7 +415,7 @@ impl MapFilterProject {
}
/// A wrapper type which indicates it is safe to simply evaluate all expressions.
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
pub struct SafeMfpPlan {
pub(crate) mfp: MapFilterProject,
}

View File

@@ -21,7 +21,7 @@ mod accum;
mod func;
/// Describes an aggregation expression.
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
pub struct AggregateExpr {
/// Names the aggregation function.
pub func: AggregateFunc,

View File

@@ -14,7 +14,10 @@
//! Accumulators for aggregate functions that's is accumulatable. i.e. sum/count
//!
//! Currently support sum, count, any, all
//! Accumulator will only be restore from row and being updated every time dataflow need process a new batch of rows.
//! So the overhead is acceptable.
//!
//! Currently support sum, count, any, all and min/max(with one caveat that min/max can't support delete with aggregate).
use std::fmt::Display;
@@ -22,13 +25,506 @@ use common_decimal::Decimal128;
use common_time::{Date, DateTime};
use datatypes::data_type::ConcreteDataType;
use datatypes::value::{OrderedF32, OrderedF64, OrderedFloat, Value};
use enum_dispatch::enum_dispatch;
use hydroflow::futures::stream::Concat;
use serde::{Deserialize, Serialize};
use snafu::ensure;
use crate::expr::error::{InternalSnafu, TryFromValueSnafu, TypeMismatchSnafu};
use crate::expr::error::{InternalSnafu, OverflowSnafu, TryFromValueSnafu, TypeMismatchSnafu};
use crate::expr::relation::func::GenericFn;
use crate::expr::{AggregateFunc, EvalError};
use crate::repr::Diff;
/// Accumulates values for the various types of accumulable aggregations.
#[enum_dispatch]
pub trait Accumulator: Sized {
fn into_state(self) -> Vec<Value>;
fn update(
&mut self,
aggr_fn: &AggregateFunc,
value: Value,
diff: Diff,
) -> Result<(), EvalError>;
fn update_batch<I>(&mut self, aggr_fn: &AggregateFunc, value_diffs: I) -> Result<(), EvalError>
where
I: IntoIterator<Item = (Value, Diff)>,
{
for (v, d) in value_diffs {
self.update(aggr_fn, v, d)?;
}
Ok(())
}
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError>;
}
/// Bool accumulator, used for `Any` `All` `Max/MinBool`
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct Bool {
/// The number of `true` values observed.
trues: Diff,
/// The number of `false` values observed.
falses: Diff,
}
impl TryFrom<Vec<Value>> for Bool {
type Error = EvalError;
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
ensure!(
state.len() == 2,
InternalSnafu {
reason: "Bool Accumulator state should have 2 values",
}
);
let mut iter = state.into_iter();
Ok(Self {
trues: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
falses: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
})
}
}
impl Accumulator for Bool {
fn into_state(self) -> Vec<Value> {
vec![self.trues.into(), self.falses.into()]
}
/// Null values are ignored
fn update(
&mut self,
aggr_fn: &AggregateFunc,
value: Value,
diff: Diff,
) -> Result<(), EvalError> {
ensure!(
matches!(
aggr_fn,
AggregateFunc::Any
| AggregateFunc::All
| AggregateFunc::MaxBool
| AggregateFunc::MinBool
),
InternalSnafu {
reason: format!(
"Bool Accumulator does not support this aggregation function: {:?}",
aggr_fn
),
}
);
match value {
Value::Boolean(true) => self.trues += diff,
Value::Boolean(false) => self.falses += diff,
Value::Null => (), // ignore nulls
x => {
return Err(TypeMismatchSnafu {
expected: ConcreteDataType::boolean_datatype(),
actual: x.data_type(),
}
.build());
}
};
Ok(())
}
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
match aggr_fn {
AggregateFunc::Any => Ok(Value::from(self.trues > 0)),
AggregateFunc::All => Ok(Value::from(self.falses == 0)),
AggregateFunc::MaxBool => Ok(Value::from(self.trues > 0)),
AggregateFunc::MinBool => Ok(Value::from(self.falses == 0)),
_ => Err(InternalSnafu {
reason: format!(
"Bool Accumulator does not support this aggregation function: {:?}",
aggr_fn
),
}
.build()),
}
}
}
/// Accumulates simple numeric values for sum over integer.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct SimpleNumber {
/// The accumulation of all non-NULL values observed.
accum: i128,
/// The number of non-NULL values observed.
non_nulls: Diff,
}
impl TryFrom<Vec<Value>> for SimpleNumber {
type Error = EvalError;
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
ensure!(
state.len() == 2,
InternalSnafu {
reason: "Number Accumulator state should have 2 values",
}
);
let mut iter = state.into_iter();
Ok(Self {
accum: Decimal128::try_from(iter.next().unwrap())
.map_err(err_try_from_val)?
.val(),
non_nulls: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
})
}
}
impl Accumulator for SimpleNumber {
fn into_state(self) -> Vec<Value> {
vec![
Value::Decimal128(Decimal128::new(self.accum, 38, 0)),
self.non_nulls.into(),
]
}
fn update(
&mut self,
aggr_fn: &AggregateFunc,
value: Value,
diff: Diff,
) -> Result<(), EvalError> {
ensure!(
matches!(
aggr_fn,
AggregateFunc::SumInt16
| AggregateFunc::SumInt32
| AggregateFunc::SumInt64
| AggregateFunc::SumUInt16
| AggregateFunc::SumUInt32
| AggregateFunc::SumUInt64
),
InternalSnafu {
reason: format!(
"SimpleNumber Accumulator does not support this aggregation function: {:?}",
aggr_fn
),
}
);
let v = match (aggr_fn, value) {
(AggregateFunc::SumInt16, Value::Int16(x)) => i128::from(x),
(AggregateFunc::SumInt32, Value::Int32(x)) => i128::from(x),
(AggregateFunc::SumInt64, Value::Int64(x)) => i128::from(x),
(AggregateFunc::SumUInt16, Value::UInt16(x)) => i128::from(x),
(AggregateFunc::SumUInt32, Value::UInt32(x)) => i128::from(x),
(AggregateFunc::SumUInt64, Value::UInt64(x)) => i128::from(x),
(_f, Value::Null) => return Ok(()), // ignore null
(f, v) => {
let expected_datatype = f.signature().input;
return Err(TypeMismatchSnafu {
expected: expected_datatype,
actual: v.data_type(),
}
.build())?;
}
};
self.accum += v * i128::from(diff);
self.non_nulls += diff;
Ok(())
}
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
match aggr_fn {
AggregateFunc::SumInt16 | AggregateFunc::SumInt32 | AggregateFunc::SumInt64 => {
i64::try_from(self.accum)
.map_err(|_e| OverflowSnafu {}.build())
.map(Value::from)
}
AggregateFunc::SumUInt16 | AggregateFunc::SumUInt32 | AggregateFunc::SumUInt64 => {
u64::try_from(self.accum)
.map_err(|_e| OverflowSnafu {}.build())
.map(Value::from)
}
_ => Err(InternalSnafu {
reason: format!(
"SimpleNumber Accumulator does not support this aggregation function: {:?}",
aggr_fn
),
}
.build()),
}
}
}
/// Accumulates float values for sum over floating numbers.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct Float {
/// Accumulates non-special float values, i.e. not NaN, +inf, -inf.
/// accum will be set to zero if `non_nulls` is zero.
accum: OrderedF64,
/// Counts +inf
pos_infs: Diff,
/// Counts -inf
neg_infs: Diff,
/// Counts NaNs
nans: Diff,
/// Counts non-NULL values
non_nulls: Diff,
}
impl TryFrom<Vec<Value>> for Float {
type Error = EvalError;
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
ensure!(
state.len() == 5,
InternalSnafu {
reason: "Float Accumulator state should have 5 values",
}
);
let mut iter = state.into_iter();
let mut ret = Self {
accum: OrderedF64::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
pos_infs: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
neg_infs: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
nans: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
non_nulls: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
};
// This prevent counter-intuitive behavior of summing over no values
if ret.non_nulls == 0 {
ret.accum = OrderedFloat::from(0.0);
}
Ok(ret)
}
}
impl Accumulator for Float {
fn into_state(self) -> Vec<Value> {
vec![
self.accum.into(),
self.pos_infs.into(),
self.neg_infs.into(),
self.nans.into(),
self.non_nulls.into(),
]
}
/// sum ignore null
fn update(
&mut self,
aggr_fn: &AggregateFunc,
value: Value,
diff: Diff,
) -> Result<(), EvalError> {
ensure!(
matches!(
aggr_fn,
AggregateFunc::SumFloat32 | AggregateFunc::SumFloat64
),
InternalSnafu {
reason: format!(
"Float Accumulator does not support this aggregation function: {:?}",
aggr_fn
),
}
);
let x = match (aggr_fn, value) {
(AggregateFunc::SumFloat32, Value::Float32(x)) => OrderedF64::from(*x as f64),
(AggregateFunc::SumFloat64, Value::Float64(x)) => OrderedF64::from(x),
(_f, Value::Null) => return Ok(()), // ignore null
(f, v) => {
let expected_datatype = f.signature().input;
return Err(TypeMismatchSnafu {
expected: expected_datatype,
actual: v.data_type(),
}
.build())?;
}
};
if x.is_nan() {
self.nans += diff;
} else if x.is_infinite() {
if x.is_sign_positive() {
self.pos_infs += diff;
} else {
self.neg_infs += diff;
}
} else {
self.accum += *(x * OrderedF64::from(diff as f64));
}
self.non_nulls += diff;
Ok(())
}
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
match aggr_fn {
AggregateFunc::SumFloat32 => Ok(Value::Float32(OrderedF32::from(self.accum.0 as f32))),
AggregateFunc::SumFloat64 => Ok(Value::Float64(self.accum)),
_ => Err(InternalSnafu {
reason: format!(
"Float Accumulator does not support this aggregation function: {:?}",
aggr_fn
),
}
.build()),
}
}
}
/// Accumulates a single `Ord`ed `Value`, useful for min/max aggregations.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct OrdValue {
val: Option<Value>,
non_nulls: Diff,
}
impl TryFrom<Vec<Value>> for OrdValue {
type Error = EvalError;
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
ensure!(
state.len() == 2,
InternalSnafu {
reason: "OrdValue Accumulator state should have 2 values",
}
);
let mut iter = state.into_iter();
Ok(Self {
val: {
let v = iter.next().unwrap();
if v == Value::Null {
None
} else {
Some(v)
}
},
non_nulls: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
})
}
}
impl Accumulator for OrdValue {
fn into_state(self) -> Vec<Value> {
vec![self.val.unwrap_or(Value::Null), self.non_nulls.into()]
}
/// min/max try to find results in all non-null values, if all values are null, the result is null.
/// count(col_name) gives the number of non-null values, count(*) gives the number of rows including nulls.
/// TODO(discord9): add count(*) as a aggr function
fn update(
&mut self,
aggr_fn: &AggregateFunc,
value: Value,
diff: Diff,
) -> Result<(), EvalError> {
ensure!(
aggr_fn.is_max() || aggr_fn.is_min() || matches!(aggr_fn, AggregateFunc::Count),
InternalSnafu {
reason: format!(
"OrdValue Accumulator does not support this aggregation function: {:?}",
aggr_fn
),
}
);
if diff <= 0 && (aggr_fn.is_max() || aggr_fn.is_min()) {
return Err(InternalSnafu {
reason: "OrdValue Accumulator does not support non-monotonic input for min/max aggregation".to_string(),
}.build());
}
// if aggr_fn is count, the incoming value type doesn't matter in type checking
// otherwise, type need to be the same or value can be null
let check_type_aggr_fn_and_arg_value =
ty_eq_without_precision(value.data_type(), aggr_fn.signature().input)
|| matches!(aggr_fn, AggregateFunc::Count)
|| value.is_null();
let check_type_aggr_fn_and_self_val = self
.val
.as_ref()
.map(|zelf| ty_eq_without_precision(zelf.data_type(), aggr_fn.signature().input))
.unwrap_or(true)
|| matches!(aggr_fn, AggregateFunc::Count);
if !check_type_aggr_fn_and_arg_value {
return Err(TypeMismatchSnafu {
expected: aggr_fn.signature().input,
actual: value.data_type(),
}
.build());
} else if !check_type_aggr_fn_and_self_val {
return Err(TypeMismatchSnafu {
expected: aggr_fn.signature().input,
actual: self
.val
.as_ref()
.map(|v| v.data_type())
.unwrap_or(ConcreteDataType::null_datatype()),
}
.build());
}
let is_null = value.is_null();
if is_null {
return Ok(());
}
if !is_null {
// compile count(*) to count(true) to include null/non-nulls
// And the counts of non-null values are updated here
self.non_nulls += diff;
match aggr_fn.signature().generic_fn {
GenericFn::Max => {
self.val = self
.val
.clone()
.map(|v| v.max(value.clone()))
.or_else(|| Some(value))
}
GenericFn::Min => {
self.val = self
.val
.clone()
.map(|v| v.min(value.clone()))
.or_else(|| Some(value))
}
GenericFn::Count => (),
_ => unreachable!("already checked by ensure!"),
}
};
// min/max ignore nulls
Ok(())
}
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
if aggr_fn.is_max() || aggr_fn.is_min() {
Ok(self.val.clone().unwrap_or(Value::Null))
} else if matches!(aggr_fn, AggregateFunc::Count) {
Ok(self.non_nulls.into())
} else {
Err(InternalSnafu {
reason: format!(
"OrdValue Accumulator does not support this aggregation function: {:?}",
aggr_fn
),
}
.build())
}
}
}
/// Accumulates values for the various types of accumulable aggregations.
///
/// We assume that there are not more than 2^32 elements for the aggregation.
@@ -38,34 +534,407 @@ use crate::repr::Diff;
/// The float accumulator performs accumulation with tolerance for floating point error.
///
/// TODO(discord9): check for overflowing
#[enum_dispatch(Accumulator)]
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum Accum {
/// Accumulates boolean values.
Bool {
/// The number of `true` values observed.
trues: Diff,
/// The number of `false` values observed.
falses: Diff,
},
Bool(Bool),
/// Accumulates simple numeric values.
SimpleNumber {
/// The accumulation of all non-NULL values observed.
accum: i128,
/// The number of non-NULL values observed.
non_nulls: Diff,
},
SimpleNumber(SimpleNumber),
/// Accumulates float values.
Float {
/// Accumulates non-special float values, i.e. not NaN, +inf, -inf.
/// accum will be set to zero if `non_nulls` is zero.
accum: OrderedF64,
/// Counts +inf
pos_infs: Diff,
/// Counts -inf
neg_infs: Diff,
/// Counts NaNs
nans: Diff,
/// Counts non-NULL values
non_nulls: Diff,
},
Float(Float),
/// Accumulate Values that impl `Ord`
OrdValue(OrdValue),
}
impl Accum {
pub fn new_accum(aggr_fn: &AggregateFunc) -> Result<Self, EvalError> {
Ok(match aggr_fn {
AggregateFunc::Any
| AggregateFunc::All
| AggregateFunc::MaxBool
| AggregateFunc::MinBool => Self::from(Bool {
trues: 0,
falses: 0,
}),
AggregateFunc::SumInt16
| AggregateFunc::SumInt32
| AggregateFunc::SumInt64
| AggregateFunc::SumUInt16
| AggregateFunc::SumUInt32
| AggregateFunc::SumUInt64 => Self::from(SimpleNumber {
accum: 0,
non_nulls: 0,
}),
AggregateFunc::SumFloat32 | AggregateFunc::SumFloat64 => Self::from(Float {
accum: OrderedF64::from(0.0),
pos_infs: 0,
neg_infs: 0,
nans: 0,
non_nulls: 0,
}),
f if f.is_max() || f.is_min() || matches!(f, AggregateFunc::Count) => {
Self::from(OrdValue {
val: None,
non_nulls: 0,
})
}
f => {
return Err(InternalSnafu {
reason: format!(
"Accumulator does not support this aggregation function: {:?}",
f
),
}
.build());
}
})
}
pub fn try_into_accum(aggr_fn: &AggregateFunc, state: Vec<Value>) -> Result<Self, EvalError> {
match aggr_fn {
AggregateFunc::Any
| AggregateFunc::All
| AggregateFunc::MaxBool
| AggregateFunc::MinBool => Ok(Self::from(Bool::try_from(state)?)),
AggregateFunc::SumInt16
| AggregateFunc::SumInt32
| AggregateFunc::SumInt64
| AggregateFunc::SumUInt16
| AggregateFunc::SumUInt32
| AggregateFunc::SumUInt64 => Ok(Self::from(SimpleNumber::try_from(state)?)),
AggregateFunc::SumFloat32 | AggregateFunc::SumFloat64 => {
Ok(Self::from(Float::try_from(state)?))
}
f if f.is_max() || f.is_min() || matches!(f, AggregateFunc::Count) => {
Ok(Self::from(OrdValue::try_from(state)?))
}
f => Err(InternalSnafu {
reason: format!(
"Accumulator does not support this aggregation function: {:?}",
f
),
}
.build()),
}
}
}
fn err_try_from_val<T: Display>(reason: T) -> EvalError {
TryFromValueSnafu {
msg: reason.to_string(),
}
.build()
}
/// compare type while ignore their precision, including `TimeStamp`, `Time`,
/// `Duration`, `Interval`
fn ty_eq_without_precision(left: ConcreteDataType, right: ConcreteDataType) -> bool {
left == right
|| matches!(left, ConcreteDataType::Timestamp(..))
&& matches!(right, ConcreteDataType::Timestamp(..))
|| matches!(left, ConcreteDataType::Time(..)) && matches!(right, ConcreteDataType::Time(..))
|| matches!(left, ConcreteDataType::Duration(..))
&& matches!(right, ConcreteDataType::Duration(..))
|| matches!(left, ConcreteDataType::Interval(..))
&& matches!(right, ConcreteDataType::Interval(..))
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_accum() {
let testcases = vec![
(
AggregateFunc::SumInt32,
vec![(Value::Int32(1), 1), (Value::Null, 1)],
(
Value::Int64(1),
vec![Value::Decimal128(Decimal128::new(1, 38, 0)), 1i64.into()],
),
),
(
AggregateFunc::SumFloat32,
vec![(Value::Float32(OrderedF32::from(1.0)), 1), (Value::Null, 1)],
(
Value::Float32(OrderedF32::from(1.0)),
vec![
Value::Float64(OrderedF64::from(1.0)),
0i64.into(),
0i64.into(),
0i64.into(),
1i64.into(),
],
),
),
(
AggregateFunc::MaxInt32,
vec![(Value::Int32(1), 1), (Value::Int32(2), 1), (Value::Null, 1)],
(Value::Int32(2), vec![Value::Int32(2), 2i64.into()]),
),
(
AggregateFunc::MinInt32,
vec![(Value::Int32(2), 1), (Value::Int32(1), 1), (Value::Null, 1)],
(Value::Int32(1), vec![Value::Int32(1), 2i64.into()]),
),
(
AggregateFunc::MaxFloat32,
vec![
(Value::Float32(OrderedF32::from(1.0)), 1),
(Value::Float32(OrderedF32::from(2.0)), 1),
(Value::Null, 1),
],
(
Value::Float32(OrderedF32::from(2.0)),
vec![Value::Float32(OrderedF32::from(2.0)), 2i64.into()],
),
),
(
AggregateFunc::MaxDateTime,
vec![
(Value::DateTime(DateTime::from(0)), 1),
(Value::DateTime(DateTime::from(1)), 1),
(Value::Null, 1),
],
(
Value::DateTime(DateTime::from(1)),
vec![Value::DateTime(DateTime::from(1)), 2i64.into()],
),
),
(
AggregateFunc::Count,
vec![
(Value::Int32(1), 1),
(Value::Int32(2), 1),
(Value::Null, 1),
(Value::Null, 1),
],
(2i64.into(), vec![Value::Null, 2i64.into()]),
),
(
AggregateFunc::Any,
vec![
(Value::Boolean(false), 1),
(Value::Boolean(false), 1),
(Value::Boolean(true), 1),
(Value::Null, 1),
],
(
Value::Boolean(true),
vec![Value::from(1i64), Value::from(2i64)],
),
),
(
AggregateFunc::All,
vec![
(Value::Boolean(false), 1),
(Value::Boolean(false), 1),
(Value::Boolean(true), 1),
(Value::Null, 1),
],
(
Value::Boolean(false),
vec![Value::from(1i64), Value::from(2i64)],
),
),
(
AggregateFunc::MaxBool,
vec![
(Value::Boolean(false), 1),
(Value::Boolean(false), 1),
(Value::Boolean(true), 1),
(Value::Null, 1),
],
(
Value::Boolean(true),
vec![Value::from(1i64), Value::from(2i64)],
),
),
(
AggregateFunc::MinBool,
vec![
(Value::Boolean(false), 1),
(Value::Boolean(false), 1),
(Value::Boolean(true), 1),
(Value::Null, 1),
],
(
Value::Boolean(false),
vec![Value::from(1i64), Value::from(2i64)],
),
),
];
for (aggr_fn, input, (eval_res, state)) in testcases {
let create_and_insert = || -> Result<Accum, EvalError> {
let mut acc = Accum::new_accum(&aggr_fn)?;
acc.update_batch(&aggr_fn, input.clone())?;
let row = acc.into_state();
let acc = Accum::try_into_accum(&aggr_fn, row)?;
Ok(acc)
};
let acc = match create_and_insert() {
Ok(acc) => acc,
Err(err) => panic!(
"Failed to create accum for {:?} with input {:?} with error: {:?}",
aggr_fn, input, err
),
};
if acc.eval(&aggr_fn).unwrap() != eval_res {
panic!(
"Failed to eval accum for {:?} with input {:?}, expect {:?}, got {:?}",
aggr_fn,
input,
eval_res,
acc.eval(&aggr_fn).unwrap()
);
}
let actual_state = acc.into_state();
if actual_state != state {
panic!(
"Failed to cast into state from accum for {:?} with input {:?}, expect state {:?}, got state {:?}",
aggr_fn,
input,
state,
actual_state
);
}
}
}
#[test]
fn test_fail_path_accum() {
{
let bool_accum = Bool::try_from(vec![Value::Null]);
assert!(matches!(bool_accum, Err(EvalError::Internal { .. })));
}
{
let mut bool_accum = Bool::try_from(vec![1i64.into(), 1i64.into()]).unwrap();
// serde
let bool_accum_serde = serde_json::to_string(&bool_accum).unwrap();
let bool_accum_de = serde_json::from_str::<Bool>(&bool_accum_serde).unwrap();
assert_eq!(bool_accum, bool_accum_de);
assert!(matches!(
bool_accum.update(&AggregateFunc::MaxDate, 1.into(), 1),
Err(EvalError::Internal { .. })
));
assert!(matches!(
bool_accum.update(&AggregateFunc::Any, 1.into(), 1),
Err(EvalError::TypeMismatch { .. })
));
assert!(matches!(
bool_accum.eval(&AggregateFunc::MaxDate),
Err(EvalError::Internal { .. })
));
}
{
let ret = SimpleNumber::try_from(vec![Value::Null]);
assert!(matches!(ret, Err(EvalError::Internal { .. })));
let mut accum =
SimpleNumber::try_from(vec![Decimal128::new(0, 38, 0).into(), 0i64.into()])
.unwrap();
assert!(matches!(
accum.update(&AggregateFunc::All, 0.into(), 1),
Err(EvalError::Internal { .. })
));
assert!(matches!(
accum.update(&AggregateFunc::SumInt64, 0i32.into(), 1),
Err(EvalError::TypeMismatch { .. })
));
assert!(matches!(
accum.eval(&AggregateFunc::All),
Err(EvalError::Internal { .. })
));
accum
.update(&AggregateFunc::SumInt64, 1i64.into(), 1)
.unwrap();
accum
.update(&AggregateFunc::SumInt64, i64::MAX.into(), 1)
.unwrap();
assert!(matches!(
accum.eval(&AggregateFunc::SumInt64),
Err(EvalError::Overflow { .. })
));
}
{
let ret = Float::try_from(vec![2f64.into(), 0i64.into(), 0i64.into(), 0i64.into()]);
assert!(matches!(ret, Err(EvalError::Internal { .. })));
let mut accum = Float::try_from(vec![
2f64.into(),
0i64.into(),
0i64.into(),
0i64.into(),
1i64.into(),
])
.unwrap();
accum
.update(&AggregateFunc::SumFloat64, 2f64.into(), -1)
.unwrap();
assert!(matches!(
accum.update(&AggregateFunc::All, 0.into(), 1),
Err(EvalError::Internal { .. })
));
assert!(matches!(
accum.update(&AggregateFunc::SumFloat64, 0.0f32.into(), 1),
Err(EvalError::TypeMismatch { .. })
));
// no record, no accum
assert_eq!(
accum.eval(&AggregateFunc::SumFloat64).unwrap(),
0.0f64.into()
);
assert!(matches!(
accum.eval(&AggregateFunc::All),
Err(EvalError::Internal { .. })
));
accum
.update(&AggregateFunc::SumFloat64, f64::INFINITY.into(), 1)
.unwrap();
accum
.update(&AggregateFunc::SumFloat64, (-f64::INFINITY).into(), 1)
.unwrap();
accum
.update(&AggregateFunc::SumFloat64, f64::NAN.into(), 1)
.unwrap();
}
{
let ret = OrdValue::try_from(vec![Value::Null]);
assert!(matches!(ret, Err(EvalError::Internal { .. })));
let mut accum = OrdValue::try_from(vec![Value::Null, 0i64.into()]).unwrap();
assert!(matches!(
accum.update(&AggregateFunc::All, 0.into(), 1),
Err(EvalError::Internal { .. })
));
accum
.update(&AggregateFunc::MaxInt16, 1i16.into(), 1)
.unwrap();
assert!(matches!(
accum.update(&AggregateFunc::MaxInt16, 0i32.into(), 1),
Err(EvalError::TypeMismatch { .. })
));
assert!(matches!(
accum.update(&AggregateFunc::MaxInt16, 0i16.into(), -1),
Err(EvalError::Internal { .. })
));
accum
.update(&AggregateFunc::MaxInt16, Value::Null, 1)
.unwrap();
}
// insert uint64 into max_int64 should fail
{
let mut accum = OrdValue::try_from(vec![Value::Null, 0i64.into()]).unwrap();
assert!(matches!(
accum.update(&AggregateFunc::MaxInt64, 0u64.into(), 1),
Err(EvalError::TypeMismatch { .. })
));
}
}
}

View File

@@ -12,15 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::type_name;
use common_time::{Date, DateTime};
use datatypes::prelude::ConcreteDataType;
use datatypes::value::{OrderedF32, OrderedF64, Value};
use serde::{Deserialize, Serialize};
use crate::expr::error::{EvalError, TryFromValueSnafu, TypeMismatchSnafu};
use crate::expr::relation::accum::Accum;
use crate::expr::relation::accum::{Accum, Accumulator};
use crate::repr::Diff;
/// Aggregate functions that can be applied to a group of rows.
@@ -83,3 +81,280 @@ pub enum AggregateFunc {
Any,
All,
}
impl AggregateFunc {
pub fn is_max(&self) -> bool {
self.signature().generic_fn == GenericFn::Max
}
pub fn is_min(&self) -> bool {
self.signature().generic_fn == GenericFn::Min
}
pub fn is_sum(&self) -> bool {
self.signature().generic_fn == GenericFn::Sum
}
/// Eval value, diff with accumulator
///
/// Expect self to be accumulable aggregate functio, i.e. sum/count
///
/// TODO(discord9): deal with overflow&better accumulator
pub fn eval_diff_accumulable<I>(
&self,
accum: Vec<Value>,
value_diffs: I,
) -> Result<(Value, Vec<Value>), EvalError>
where
I: IntoIterator<Item = (Value, Diff)>,
{
let mut accum = if accum.is_empty() {
Accum::new_accum(self)?
} else {
Accum::try_into_accum(self, accum)?
};
accum.update_batch(self, value_diffs)?;
let res = accum.eval(self)?;
Ok((res, accum.into_state()))
}
}
pub struct Signature {
pub input: ConcreteDataType,
pub output: ConcreteDataType,
pub generic_fn: GenericFn,
}
#[derive(Debug, PartialEq, Eq)]
pub enum GenericFn {
Max,
Min,
Sum,
Count,
Any,
All,
}
impl AggregateFunc {
/// all concrete datatypes with precision types will be returned with largest possible variant
/// as a exception, count have a signature of `null -> i64`, but it's actually `anytype -> i64`
pub fn signature(&self) -> Signature {
match self {
AggregateFunc::MaxInt16 => Signature {
input: ConcreteDataType::int16_datatype(),
output: ConcreteDataType::int16_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxInt32 => Signature {
input: ConcreteDataType::int32_datatype(),
output: ConcreteDataType::int32_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxInt64 => Signature {
input: ConcreteDataType::int64_datatype(),
output: ConcreteDataType::int64_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxUInt16 => Signature {
input: ConcreteDataType::uint16_datatype(),
output: ConcreteDataType::uint16_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxUInt32 => Signature {
input: ConcreteDataType::uint32_datatype(),
output: ConcreteDataType::uint32_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxUInt64 => Signature {
input: ConcreteDataType::uint64_datatype(),
output: ConcreteDataType::uint64_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxFloat32 => Signature {
input: ConcreteDataType::float32_datatype(),
output: ConcreteDataType::float32_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxFloat64 => Signature {
input: ConcreteDataType::float64_datatype(),
output: ConcreteDataType::float64_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxBool => Signature {
input: ConcreteDataType::boolean_datatype(),
output: ConcreteDataType::boolean_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxString => Signature {
input: ConcreteDataType::string_datatype(),
output: ConcreteDataType::string_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxDate => Signature {
input: ConcreteDataType::date_datatype(),
output: ConcreteDataType::date_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxDateTime => Signature {
input: ConcreteDataType::datetime_datatype(),
output: ConcreteDataType::datetime_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxTimestamp => Signature {
input: ConcreteDataType::timestamp_second_datatype(),
output: ConcreteDataType::timestamp_second_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxTime => Signature {
input: ConcreteDataType::time_second_datatype(),
output: ConcreteDataType::time_second_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxDuration => Signature {
input: ConcreteDataType::duration_second_datatype(),
output: ConcreteDataType::duration_second_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MaxInterval => Signature {
input: ConcreteDataType::interval_year_month_datatype(),
output: ConcreteDataType::interval_year_month_datatype(),
generic_fn: GenericFn::Max,
},
AggregateFunc::MinInt16 => Signature {
input: ConcreteDataType::int16_datatype(),
output: ConcreteDataType::int16_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinInt32 => Signature {
input: ConcreteDataType::int32_datatype(),
output: ConcreteDataType::int32_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinInt64 => Signature {
input: ConcreteDataType::int64_datatype(),
output: ConcreteDataType::int64_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinUInt16 => Signature {
input: ConcreteDataType::uint16_datatype(),
output: ConcreteDataType::uint16_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinUInt32 => Signature {
input: ConcreteDataType::uint32_datatype(),
output: ConcreteDataType::uint32_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinUInt64 => Signature {
input: ConcreteDataType::uint64_datatype(),
output: ConcreteDataType::uint64_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinFloat32 => Signature {
input: ConcreteDataType::float32_datatype(),
output: ConcreteDataType::float32_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinFloat64 => Signature {
input: ConcreteDataType::float64_datatype(),
output: ConcreteDataType::float64_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinBool => Signature {
input: ConcreteDataType::boolean_datatype(),
output: ConcreteDataType::boolean_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinString => Signature {
input: ConcreteDataType::string_datatype(),
output: ConcreteDataType::string_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinDate => Signature {
input: ConcreteDataType::date_datatype(),
output: ConcreteDataType::date_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinDateTime => Signature {
input: ConcreteDataType::datetime_datatype(),
output: ConcreteDataType::datetime_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinTimestamp => Signature {
input: ConcreteDataType::timestamp_second_datatype(),
output: ConcreteDataType::timestamp_second_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinTime => Signature {
input: ConcreteDataType::time_second_datatype(),
output: ConcreteDataType::time_second_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinDuration => Signature {
input: ConcreteDataType::duration_second_datatype(),
output: ConcreteDataType::duration_second_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::MinInterval => Signature {
input: ConcreteDataType::interval_year_month_datatype(),
output: ConcreteDataType::interval_year_month_datatype(),
generic_fn: GenericFn::Min,
},
AggregateFunc::SumInt16 => Signature {
input: ConcreteDataType::int16_datatype(),
output: ConcreteDataType::int16_datatype(),
generic_fn: GenericFn::Sum,
},
AggregateFunc::SumInt32 => Signature {
input: ConcreteDataType::int32_datatype(),
output: ConcreteDataType::int32_datatype(),
generic_fn: GenericFn::Sum,
},
AggregateFunc::SumInt64 => Signature {
input: ConcreteDataType::int64_datatype(),
output: ConcreteDataType::int64_datatype(),
generic_fn: GenericFn::Sum,
},
AggregateFunc::SumUInt16 => Signature {
input: ConcreteDataType::uint16_datatype(),
output: ConcreteDataType::uint16_datatype(),
generic_fn: GenericFn::Sum,
},
AggregateFunc::SumUInt32 => Signature {
input: ConcreteDataType::uint32_datatype(),
output: ConcreteDataType::uint32_datatype(),
generic_fn: GenericFn::Sum,
},
AggregateFunc::SumUInt64 => Signature {
input: ConcreteDataType::uint64_datatype(),
output: ConcreteDataType::uint64_datatype(),
generic_fn: GenericFn::Sum,
},
AggregateFunc::SumFloat32 => Signature {
input: ConcreteDataType::float32_datatype(),
output: ConcreteDataType::float32_datatype(),
generic_fn: GenericFn::Sum,
},
AggregateFunc::SumFloat64 => Signature {
input: ConcreteDataType::float64_datatype(),
output: ConcreteDataType::float64_datatype(),
generic_fn: GenericFn::Sum,
},
AggregateFunc::Count => Signature {
input: ConcreteDataType::null_datatype(),
output: ConcreteDataType::int64_datatype(),
generic_fn: GenericFn::Count,
},
AggregateFunc::Any => Signature {
input: ConcreteDataType::boolean_datatype(),
output: ConcreteDataType::boolean_datatype(),
generic_fn: GenericFn::Any,
},
AggregateFunc::All => Signature {
input: ConcreteDataType::boolean_datatype(),
output: ConcreteDataType::boolean_datatype(),
generic_fn: GenericFn::All,
},
}
}
}

View File

@@ -17,4 +17,5 @@
// allow unused for now because it should be use later
mod adapter;
mod expr;
mod plan;
mod repr;

98
src/flow/src/plan.rs Normal file
View File

@@ -0,0 +1,98 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! This module contain basic definition for dataflow's plan
//! that can be translate to hydro dataflow
mod join;
mod reduce;
use serde::{Deserialize, Serialize};
pub(crate) use self::reduce::{AccumulablePlan, KeyValPlan, ReducePlan};
use crate::expr::{
AggregateExpr, EvalError, Id, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr,
};
use crate::plan::join::JoinPlan;
use crate::repr::{DiffRow, RelationType};
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
pub struct TypedPlan {
/// output type of the relation
pub typ: RelationType,
pub plan: Plan,
}
/// TODO(discord9): support `TableFunc`by define FlatMap that map 1 to n)
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
pub enum Plan {
/// A constant collection of rows.
Constant { rows: Vec<DiffRow> },
/// Get CDC data from an source, be it external reference to an existing source or an internal
/// reference to a `Let` identifier
Get { id: Id },
/// Create a temporary collection from given `value``, and make this bind only available
/// in scope of `body`
Let {
id: LocalId,
value: Box<Plan>,
body: Box<Plan>,
},
/// Map, Filter, and Project operators.
Mfp {
/// The input collection.
input: Box<Plan>,
/// Linear operator to apply to each record.
mfp: MapFilterProject,
},
/// Reduce operator, aggregation by key assembled from KeyValPlan
Reduce {
/// The input collection.
input: Box<Plan>,
/// A plan for changing input records into key, value pairs.
key_val_plan: KeyValPlan,
/// A plan for performing the reduce.
///
/// The implementation of reduction has several different strategies based
/// on the properties of the reduction, and the input itself.
reduce_plan: ReducePlan,
},
/// A multiway relational equijoin, with fused map, filter, and projection.
///
/// This stage performs a multiway join among `inputs`, using the equality
/// constraints expressed in `plan`. The plan also describes the implementation
/// strategy we will use, and any pushed down per-record work.
Join {
/// An ordered list of inputs that will be joined.
inputs: Vec<Plan>,
/// Detailed information about the implementation of the join.
///
/// This includes information about the implementation strategy, but also
/// any map, filter, project work that we might follow the join with, but
/// potentially pushed down into the implementation of the join.
plan: JoinPlan,
},
/// Adds the contents of the input collections.
///
/// Importantly, this is *multiset* union, so the multiplicities of records will
/// add. This is in contrast to *set* union, where the multiplicities would be
/// capped at one. A set union can be formed with `Union` followed by `Reduce`
/// implementing the "distinct" operator.
Union {
/// The input collections
inputs: Vec<Plan>,
/// Whether to consolidate the output, e.g., cancel negated records.
consolidate_output: bool,
},
}

78
src/flow/src/plan/join.rs Normal file
View File

@@ -0,0 +1,78 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use crate::expr::ScalarExpr;
use crate::plan::SafeMfpPlan;
/// TODO(discord9): consider impl more join strategies
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
pub enum JoinPlan {
Linear(LinearJoinPlan),
}
/// Determine if a given row should stay in the output. And apply a map filter project before output the row
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
pub struct JoinFilter {
/// each element in the outer vector will check if each expr in itself can be eval to same value
/// if not, the row will be filtered out. Useful for equi-join(join based on equality of some columns)
pub ready_equivalences: Vec<Vec<ScalarExpr>>,
/// Apply a map filter project before output the row
pub before: SafeMfpPlan,
}
/// A plan for the execution of a linear join.
///
/// A linear join is a sequence of stages, each of which introduces
/// a new collection. Each stage is represented by a [LinearStagePlan].
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
pub struct LinearJoinPlan {
/// The source relation from which we start the join.
pub source_relation: usize,
/// The arrangement to use for the source relation, if any
pub source_key: Option<Vec<ScalarExpr>>,
/// An initial closure to apply before any stages.
///
/// Values of `None` indicate the identity closure.
pub initial_closure: Option<JoinFilter>,
/// A *sequence* of stages to apply one after the other.
pub stage_plans: Vec<LinearStagePlan>,
/// A concluding filter to apply after the last stage.
///
/// Values of `None` indicate the identity closure.
pub final_closure: Option<JoinFilter>,
}
/// A plan for the execution of one stage of a linear join.
///
/// Each stage is a binary join between the current accumulated
/// join results, and a new collection. The former is referred to
/// as the "stream" and the latter the "lookup".
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
pub struct LinearStagePlan {
/// The index of the relation into which we will look up.
pub lookup_relation: usize,
/// The key expressions to use for the stream relation.
pub stream_key: Vec<ScalarExpr>,
/// Columns to retain from the stream relation.
/// These columns are those that are not redundant with `stream_key`,
/// and cannot be read out of the key component of an arrangement.
pub stream_thinning: Vec<usize>,
/// The key expressions to use for the lookup relation.
pub lookup_key: Vec<ScalarExpr>,
/// The closure to apply to the concatenation of the key columns,
/// the stream value columns, and the lookup value colunms.
pub closure: JoinFilter,
}

View File

@@ -0,0 +1,50 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use crate::expr::{AggregateExpr, Id, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr};
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
pub struct KeyValPlan {
pub key_plan: SafeMfpPlan,
pub val_plan: SafeMfpPlan,
}
/// TODO(discord9): def&impl of Hierarchical aggregates(for min/max with support to deletion) and
/// basic aggregates(for other aggregate functions) and mixed aggregate
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
pub enum ReducePlan {
/// Plan for not computing any aggregations, just determining the set of
/// distinct keys.
Distinct,
/// Plan for computing only accumulable aggregations.
/// Including simple functions like `sum`, `count`, `min/max`(without deletion)
Accumulable(AccumulablePlan),
}
/// Accumulable plan for the execution of a reduction.
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
pub struct AccumulablePlan {
/// All of the aggregations we were asked to compute, stored
/// in order.
pub full_aggrs: Vec<AggregateExpr>,
/// All of the non-distinct accumulable aggregates.
/// Each element represents:
/// (index of aggr output, index of value among inputs, aggr expr)
/// These will all be rendered together in one dataflow fragment.
pub simple_aggrs: Vec<(usize, usize, AggregateExpr)>,
/// Same as above but for all of the `DISTINCT` accumulable aggregations.
pub distinct_aggrs: Vec<(usize, usize, AggregateExpr)>,
}

View File

@@ -33,7 +33,10 @@ use snafu::ResultExt;
use crate::expr::error::{CastValueSnafu, EvalError};
/// System-wide Record count difference type.
/// System-wide Record count difference type. Useful for capture data change
///
/// i.e. +1 means insert one record, -1 means remove,
/// and +/-n means insert/remove multiple duplicate records.
pub type Diff = i64;
/// System-wide default timestamp type

View File

@@ -152,6 +152,10 @@ impl TxnService for RaftEngineBackend {
responses,
})
}
fn max_txn_ops(&self) -> usize {
usize::MAX
}
}
#[async_trait::async_trait]

View File

@@ -380,6 +380,10 @@ impl TxnService for LeaderCachedKvBackend {
Ok(res)
}
fn max_txn_ops(&self) -> usize {
self.store.max_txn_ops()
}
}
impl ResettableKvBackend for LeaderCachedKvBackend {

View File

@@ -79,5 +79,6 @@ rand.workspace = true
toml.workspace = true
[[bench]]
name = "bench_merge_tree"
name = "memtable_bench"
harness = false
required-features = ["test"]

View File

@@ -7,3 +7,9 @@ The Alfa Romeo [MiTo](https://en.wikipedia.org/wiki/Alfa_Romeo_MiTo) is a front-
> "You can't be a true petrolhead until you've owned an Alfa Romeo."
> <div align="right">-- by Jeremy Clarkson</div>
## Benchmarks
Run benchmarks in this crate:
```bash
cargo bench -p mito2 -F test
```

View File

@@ -0,0 +1,352 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use api::v1::value::ValueData;
use api::v1::{Row, Rows, SemanticType};
use criterion::{criterion_group, criterion_main, Criterion};
use datafusion_common::Column;
use datafusion_expr::{lit, Expr};
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::ColumnSchema;
use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
use mito2::memtable::time_series::TimeSeriesMemtable;
use mito2::memtable::{KeyValues, Memtable};
use mito2::test_util::memtable_util::{self, region_metadata_to_row_schema};
use rand::rngs::ThreadRng;
use rand::seq::SliceRandom;
use rand::Rng;
use store_api::metadata::{
ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef,
};
use store_api::storage::RegionId;
use table::predicate::Predicate;
/// Writes rows.
fn write_rows(c: &mut Criterion) {
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
let timestamps = (0..100).collect::<Vec<_>>();
// Note that this test only generate one time series.
let mut group = c.benchmark_group("write");
group.bench_function("merge_tree", |b| {
let memtable =
MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
let kvs =
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, &timestamps, 1);
b.iter(|| {
memtable.write(&kvs).unwrap();
});
});
group.bench_function("time_series", |b| {
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
let kvs =
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, &timestamps, 1);
b.iter(|| {
memtable.write(&kvs).unwrap();
});
});
}
/// Scans all rows.
fn full_scan(c: &mut Criterion) {
let metadata = Arc::new(cpu_metadata());
let config = MergeTreeConfig::default();
let start_sec = 1710043200;
let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
let mut group = c.benchmark_group("full_scan");
group.sample_size(10);
group.bench_function("merge_tree", |b| {
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
for kvs in generator.iter() {
memtable.write(&kvs).unwrap();
}
b.iter(|| {
let iter = memtable.iter(None, None).unwrap();
for batch in iter {
let _batch = batch.unwrap();
}
});
});
group.bench_function("time_series", |b| {
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
for kvs in generator.iter() {
memtable.write(&kvs).unwrap();
}
b.iter(|| {
let iter = memtable.iter(None, None).unwrap();
for batch in iter {
let _batch = batch.unwrap();
}
});
});
}
/// Filters 1 host.
fn filter_1_host(c: &mut Criterion) {
let metadata = Arc::new(cpu_metadata());
let config = MergeTreeConfig::default();
let start_sec = 1710043200;
let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
let mut group = c.benchmark_group("filter_1_host");
group.sample_size(10);
group.bench_function("merge_tree", |b| {
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
for kvs in generator.iter() {
memtable.write(&kvs).unwrap();
}
let predicate = generator.random_host_filter();
b.iter(|| {
let iter = memtable.iter(None, Some(predicate.clone())).unwrap();
for batch in iter {
let _batch = batch.unwrap();
}
});
});
group.bench_function("time_series", |b| {
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
for kvs in generator.iter() {
memtable.write(&kvs).unwrap();
}
let predicate = generator.random_host_filter();
b.iter(|| {
let iter = memtable.iter(None, Some(predicate.clone())).unwrap();
for batch in iter {
let _batch = batch.unwrap();
}
});
});
}
struct Host {
hostname: String,
region: String,
datacenter: String,
rack: String,
os: String,
arch: String,
team: String,
service: String,
service_version: String,
service_environment: String,
}
impl Host {
fn random_with_id(id: usize) -> Host {
let mut rng = rand::thread_rng();
let region = format!("ap-southeast-{}", rng.gen_range(0..10));
let datacenter = format!(
"{}{}",
region,
['a', 'b', 'c', 'd', 'e'].choose(&mut rng).unwrap()
);
Host {
hostname: format!("host_{id}"),
region,
datacenter,
rack: rng.gen_range(0..100).to_string(),
os: "Ubuntu16.04LTS".to_string(),
arch: "x86".to_string(),
team: "CHI".to_string(),
service: rng.gen_range(0..100).to_string(),
service_version: rng.gen_range(0..10).to_string(),
service_environment: "test".to_string(),
}
}
fn fill_values(&self, values: &mut Vec<api::v1::Value>) {
let tags = [
api::v1::Value {
value_data: Some(ValueData::StringValue(self.hostname.clone())),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(self.region.clone())),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(self.datacenter.clone())),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(self.rack.clone())),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(self.os.clone())),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(self.arch.clone())),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(self.team.clone())),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(self.service.clone())),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(self.service_version.clone())),
},
api::v1::Value {
value_data: Some(ValueData::StringValue(self.service_environment.clone())),
},
];
for tag in tags {
values.push(tag);
}
}
}
struct CpuDataGenerator {
metadata: RegionMetadataRef,
column_schemas: Vec<api::v1::ColumnSchema>,
hosts: Vec<Host>,
start_sec: i64,
end_sec: i64,
}
impl CpuDataGenerator {
fn new(metadata: RegionMetadataRef, num_hosts: usize, start_sec: i64, end_sec: i64) -> Self {
let column_schemas = region_metadata_to_row_schema(&metadata);
Self {
metadata,
column_schemas,
hosts: Self::generate_hosts(num_hosts),
start_sec,
end_sec,
}
}
fn iter(&self) -> impl Iterator<Item = KeyValues> + '_ {
// point per 10s.
(self.start_sec..self.end_sec)
.step_by(10)
.enumerate()
.map(|(seq, ts)| self.build_key_values(seq, ts))
}
fn build_key_values(&self, seq: usize, current_sec: i64) -> KeyValues {
let rows = self
.hosts
.iter()
.map(|host| {
let mut rng = rand::thread_rng();
let mut values = Vec::with_capacity(21);
values.push(api::v1::Value {
value_data: Some(ValueData::TimestampMillisecondValue(current_sec * 1000)),
});
host.fill_values(&mut values);
for _ in 0..10 {
values.push(api::v1::Value {
value_data: Some(ValueData::F64Value(Self::random_f64(&mut rng))),
});
}
Row { values }
})
.collect();
let mutation = api::v1::Mutation {
op_type: api::v1::OpType::Put as i32,
sequence: seq as u64,
rows: Some(Rows {
schema: self.column_schemas.clone(),
rows,
}),
};
KeyValues::new(&self.metadata, mutation).unwrap()
}
fn random_host_filter(&self) -> Predicate {
let host = self.random_hostname();
let expr = Expr::Column(Column::from_name("hostname")).eq(lit(host));
Predicate::new(vec![expr.into()])
}
fn random_hostname(&self) -> String {
let mut rng = rand::thread_rng();
self.hosts.choose(&mut rng).unwrap().hostname.clone()
}
fn random_f64(rng: &mut ThreadRng) -> f64 {
let base: u32 = rng.gen_range(30..95);
base as f64
}
fn generate_hosts(num_hosts: usize) -> Vec<Host> {
(0..num_hosts).map(Host::random_with_id).collect()
}
}
/// Creates a metadata for TSBS cpu-like table.
fn cpu_metadata() -> RegionMetadata {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
builder.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
semantic_type: SemanticType::Timestamp,
column_id: 0,
});
let mut column_id = 1;
let tags = [
"hostname",
"region",
"datacenter",
"rack",
"os",
"arch",
"team",
"service",
"service_version",
"service_environment",
];
for tag in tags {
builder.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(tag, ConcreteDataType::string_datatype(), true),
semantic_type: SemanticType::Tag,
column_id,
});
column_id += 1;
}
let fields = [
"usage_user",
"usage_system",
"usage_idle",
"usage_nice",
"usage_iowait",
"usage_irq",
"usage_softirq",
"usage_steal",
"usage_guest",
"usage_guest_nice",
];
for field in fields {
builder.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new(field, ConcreteDataType::float64_datatype(), true),
semantic_type: SemanticType::Field,
column_id,
});
column_id += 1;
}
builder.primary_key(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
builder.build().unwrap()
}
criterion_group!(benches, write_rows, full_scan, filter_1_host);
criterion_main!(benches);

View File

@@ -1,36 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use criterion::{criterion_group, criterion_main, Criterion};
use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
use mito2::memtable::Memtable;
use mito2::test_util::memtable_util;
fn bench_merge_tree_memtable(c: &mut Criterion) {
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
let timestamps = (0..100).collect::<Vec<_>>();
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
let _ = c.bench_function("MergeTreeMemtable", |b| {
let kvs =
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, &timestamps, 1);
b.iter(|| {
memtable.write(&kvs).unwrap();
});
});
}
criterion_group!(benches, bench_merge_tree_memtable);
criterion_main!(benches);

View File

@@ -85,7 +85,7 @@ impl Default for MergeTreeConfig {
Self {
index_max_keys_per_shard: 8192,
data_freeze_threshold: 32768,
data_freeze_threshold: 131072,
dedup: true,
fork_dictionary_bytes,
}

View File

@@ -957,6 +957,18 @@ impl DataParts {
self.active.write_row(pk_index, kv)
}
/// Returns the number of rows in the active buffer.
pub fn num_active_rows(&self) -> usize {
self.active.num_rows()
}
/// Freezes active buffer and creates a new active buffer.
pub fn freeze(&mut self) -> Result<()> {
let part = self.active.freeze(None, false)?;
self.frozen.push(part);
Ok(())
}
/// Reads data from all parts including active and frozen parts.
/// The returned iterator yields a record batch of one primary key at a time.
/// The order of yielding primary keys is determined by provided weights.
@@ -976,6 +988,11 @@ impl DataParts {
pub(crate) fn is_empty(&self) -> bool {
self.active.is_empty() && self.frozen.iter().all(|part| part.is_empty())
}
#[cfg(test)]
pub(crate) fn frozen_len(&self) -> usize {
self.frozen.len()
}
}
pub struct DataPartsReaderBuilder {

View File

@@ -78,7 +78,7 @@ impl Partition {
// Finds key in shards, now we ensure one key only exists in one shard.
if let Some(pk_id) = inner.find_key_in_shards(primary_key) {
inner.write_to_shard(pk_id, &key_value);
inner.write_to_shard(pk_id, &key_value)?;
inner.num_rows += 1;
return Ok(());
}
@@ -106,7 +106,7 @@ impl Partition {
}
/// Writes to the partition without a primary key.
pub fn write_no_key(&self, key_value: KeyValue) {
pub fn write_no_key(&self, key_value: KeyValue) -> Result<()> {
let mut inner = self.inner.write().unwrap();
// If no primary key, always write to the first shard.
debug_assert!(!inner.shards.is_empty());
@@ -117,12 +117,15 @@ impl Partition {
shard_id: 0,
pk_index: 0,
};
inner.shards[0].write_with_pk_id(pk_id, &key_value);
inner.shards[0].write_with_pk_id(pk_id, &key_value)?;
inner.num_rows += 1;
Ok(())
}
/// Scans data in the partition.
pub fn read(&self, mut context: ReadPartitionContext) -> Result<PartitionReader> {
let start = Instant::now();
let key_filter = if context.need_prune_key {
Some(PrimaryKeyFilter::new(
context.metadata.clone(),
@@ -150,7 +153,7 @@ impl Partition {
(builder_reader, shard_source)
};
context.metrics.num_shards = shard_reader_builders.len();
context.metrics.num_shards += shard_reader_builders.len();
let mut nodes = shard_reader_builders
.into_iter()
.map(|builder| {
@@ -161,7 +164,7 @@ impl Partition {
.collect::<Result<Vec<_>>>()?;
if let Some(builder) = builder_source {
context.metrics.read_builder = true;
context.metrics.num_builder += 1;
// Move the initialization of ShardBuilderReader out of read lock.
let shard_builder_reader =
builder.build(Some(&context.pk_weights), key_filter.clone())?;
@@ -172,8 +175,10 @@ impl Partition {
let merger = ShardMerger::try_new(nodes)?;
if self.dedup {
let source = DedupReader::try_new(merger)?;
context.metrics.build_partition_reader += start.elapsed();
PartitionReader::new(context, Box::new(source))
} else {
context.metrics.build_partition_reader += start.elapsed();
PartitionReader::new(context, Box::new(merger))
}
}
@@ -282,9 +287,10 @@ pub(crate) struct PartitionStats {
#[derive(Default)]
struct PartitionReaderMetrics {
build_partition_reader: Duration,
read_source: Duration,
data_batch_to_batch: Duration,
read_builder: bool,
num_builder: usize,
num_shards: usize,
}
@@ -440,9 +446,15 @@ impl Drop for ReadPartitionContext {
.observe(partition_data_batch_to_batch);
common_telemetry::debug!(
"TreeIter partitions metrics, read_builder: {}, num_shards: {}, partition_read_source: {}s, partition_data_batch_to_batch: {}s",
self.metrics.read_builder,
"TreeIter partitions metrics, \
num_builder: {}, \
num_shards: {}, \
build_partition_reader: {}s, \
partition_read_source: {}s, \
partition_data_batch_to_batch: {}s",
self.metrics.num_builder,
self.metrics.num_shards,
self.metrics.build_partition_reader.as_secs_f64(),
partition_read_source,
partition_data_batch_to_batch,
);
@@ -549,7 +561,16 @@ impl Inner {
fn new(metadata: RegionMetadataRef, config: &MergeTreeConfig) -> Self {
let (shards, current_shard_id) = if metadata.primary_key.is_empty() {
let data_parts = DataParts::new(metadata.clone(), DATA_INIT_CAP, config.dedup);
(vec![Shard::new(0, None, data_parts, config.dedup)], 1)
(
vec![Shard::new(
0,
None,
data_parts,
config.dedup,
config.data_freeze_threshold,
)],
1,
)
} else {
(Vec::new(), 0)
};
@@ -569,18 +590,22 @@ impl Inner {
self.pk_to_pk_id.get(primary_key).copied()
}
fn write_to_shard(&mut self, pk_id: PkId, key_value: &KeyValue) {
fn write_to_shard(&mut self, pk_id: PkId, key_value: &KeyValue) -> Result<()> {
if pk_id.shard_id == self.shard_builder.current_shard_id() {
self.shard_builder.write_with_pk_id(pk_id, key_value);
return;
}
for shard in &mut self.shards {
if shard.shard_id == pk_id.shard_id {
shard.write_with_pk_id(pk_id, key_value);
self.num_rows += 1;
return;
}
return Ok(());
}
// Safety: We find the shard by shard id.
let shard = self
.shards
.iter_mut()
.find(|shard| shard.shard_id == pk_id.shard_id)
.unwrap();
shard.write_with_pk_id(pk_id, key_value)?;
self.num_rows += 1;
Ok(())
}
fn freeze_active_shard(&mut self) -> Result<()> {

View File

@@ -39,6 +39,8 @@ pub struct Shard {
/// Data in the shard.
data_parts: DataParts,
dedup: bool,
/// Number of rows to freeze a data part.
data_freeze_threshold: usize,
}
impl Shard {
@@ -48,20 +50,29 @@ impl Shard {
key_dict: Option<KeyDictRef>,
data_parts: DataParts,
dedup: bool,
data_freeze_threshold: usize,
) -> Shard {
Shard {
shard_id,
key_dict,
data_parts,
dedup,
data_freeze_threshold,
}
}
/// Writes a key value into the shard.
pub fn write_with_pk_id(&mut self, pk_id: PkId, key_value: &KeyValue) {
///
/// It will freezes the active buffer if it is full.
pub fn write_with_pk_id(&mut self, pk_id: PkId, key_value: &KeyValue) -> Result<()> {
debug_assert_eq!(self.shard_id, pk_id.shard_id);
if self.data_parts.num_active_rows() >= self.data_freeze_threshold {
self.data_parts.freeze()?;
}
self.data_parts.write_row(pk_id.pk_index, key_value);
Ok(())
}
/// Scans the shard.
@@ -83,6 +94,7 @@ impl Shard {
key_dict: self.key_dict.clone(),
data_parts: DataParts::new(metadata, DATA_INIT_CAP, self.dedup),
dedup: self.dedup,
data_freeze_threshold: self.data_freeze_threshold,
}
}
@@ -467,6 +479,7 @@ mod tests {
shard_id: ShardId,
metadata: RegionMetadataRef,
input: &[(KeyValues, PkIndex)],
data_freeze_threshold: usize,
) -> Shard {
let mut dict_builder = KeyDictBuilder::new(1024);
let mut metrics = WriteMetrics::default();
@@ -481,26 +494,16 @@ mod tests {
let dict = dict_builder.finish(&mut BTreeMap::new()).unwrap();
let data_parts = DataParts::new(metadata, DATA_INIT_CAP, true);
Shard::new(shard_id, Some(Arc::new(dict)), data_parts, true)
Shard::new(
shard_id,
Some(Arc::new(dict)),
data_parts,
true,
data_freeze_threshold,
)
}
#[test]
fn test_write_read_shard() {
let metadata = metadata_for_test();
let input = input_with_key(&metadata);
let mut shard = new_shard_with_dict(8, metadata, &input);
assert!(shard.is_empty());
for (key_values, pk_index) in &input {
for kv in key_values.iter() {
let pk_id = PkId {
shard_id: shard.shard_id,
pk_index: *pk_index,
};
shard.write_with_pk_id(pk_id, &kv);
}
}
assert!(!shard.is_empty());
fn collect_timestamps(shard: &Shard) -> Vec<i64> {
let mut reader = shard.read().unwrap().build(None).unwrap();
let mut timestamps = Vec::new();
while reader.is_valid() {
@@ -511,6 +514,64 @@ mod tests {
reader.next().unwrap();
}
timestamps
}
#[test]
fn test_write_read_shard() {
let metadata = metadata_for_test();
let input = input_with_key(&metadata);
let mut shard = new_shard_with_dict(8, metadata, &input, 100);
assert!(shard.is_empty());
for (key_values, pk_index) in &input {
for kv in key_values.iter() {
let pk_id = PkId {
shard_id: shard.shard_id,
pk_index: *pk_index,
};
shard.write_with_pk_id(pk_id, &kv).unwrap();
}
}
assert!(!shard.is_empty());
let timestamps = collect_timestamps(&shard);
assert_eq!(vec![0, 1, 10, 11, 20, 21], timestamps);
}
#[test]
fn test_shard_freeze() {
let metadata = metadata_for_test();
let kvs = build_key_values_with_ts_seq_values(
&metadata,
"shard".to_string(),
0,
[0].into_iter(),
[Some(0.0)].into_iter(),
0,
);
let mut shard = new_shard_with_dict(8, metadata.clone(), &[(kvs, 0)], 50);
let expected: Vec<_> = (0..200).collect();
for i in &expected {
let kvs = build_key_values_with_ts_seq_values(
&metadata,
"shard".to_string(),
0,
[*i].into_iter(),
[Some(0.0)].into_iter(),
*i as u64,
);
let pk_id = PkId {
shard_id: shard.shard_id,
pk_index: *i as PkIndex,
};
for kv in kvs.iter() {
shard.write_with_pk_id(pk_id, &kv).unwrap();
}
}
assert!(!shard.is_empty());
assert_eq!(3, shard.data_parts.frozen_len());
let timestamps = collect_timestamps(&shard);
assert_eq!(expected, timestamps);
}
}

View File

@@ -138,7 +138,13 @@ impl ShardBuilder {
let shard_id = self.current_shard_id;
self.current_shard_id += 1;
Ok(Some(Shard::new(shard_id, key_dict, data_parts, self.dedup)))
Ok(Some(Shard::new(
shard_id,
key_dict,
data_parts,
self.dedup,
self.data_freeze_threshold,
)))
}
/// Scans the shard builder.

View File

@@ -124,7 +124,7 @@ impl MergeTree {
if !has_pk {
// No primary key.
self.write_no_key(kv);
self.write_no_key(kv)?;
continue;
}
@@ -299,7 +299,7 @@ impl MergeTree {
)
}
fn write_no_key(&self, key_value: KeyValue) {
fn write_no_key(&self, key_value: KeyValue) -> Result<()> {
let partition_key = Partition::get_partition_key(&key_value, self.is_partitioned);
let partition = self.get_or_create_partition(partition_key);

View File

@@ -171,6 +171,8 @@ impl RegionOpener {
// Initial memtable id is 0.
let mutable = self.memtable_builder.build(0, &metadata);
debug!("Create region {} with options: {:?}", region_id, options);
let version = VersionBuilder::new(metadata, mutable)
.options(options)
.build();
@@ -249,6 +251,9 @@ impl RegionOpener {
let region_id = self.region_id;
let object_store = self.object_store(&region_options.storage)?.clone();
debug!("Open region {} with options: {:?}", region_id, self.options);
let access_layer = Arc::new(AccessLayer::new(
self.region_dir.clone(),
object_store,

View File

@@ -13,6 +13,8 @@
// limitations under the License.
//! Options for a region.
//!
//! If we add options in this mod, we also need to modify [store_api::mito_engine_options].
use std::collections::HashMap;
use std::time::Duration;
@@ -358,6 +360,7 @@ mod tests {
("compaction.type", "twcs"),
("storage", "S3"),
("index.inverted_index.ignore_column_ids", "1,2,3"),
("index.inverted_index.segment_row_count", "512"),
(
WAL_OPTIONS_KEY,
&serde_json::to_string(&wal_options).unwrap(),
@@ -376,7 +379,7 @@ mod tests {
index_options: IndexOptions {
inverted_index: InvertedIndexOptions {
ignore_column_ids: vec![1, 2, 3],
segment_row_count: 1024,
segment_row_count: 512,
},
},
};

View File

@@ -219,25 +219,14 @@ pub(crate) fn extract_data_batch(batch: &DataBatch) -> (u16, Vec<(i64, u64)>) {
/// Builds key values with timestamps (ms) and sequences for test.
pub(crate) fn build_key_values_with_ts_seq_values(
schema: &RegionMetadataRef,
metadata: &RegionMetadataRef,
k0: String,
k1: u32,
timestamps: impl Iterator<Item = i64>,
values: impl Iterator<Item = Option<f64>>,
sequence: SequenceNumber,
) -> KeyValues {
let column_schema = schema
.column_metadatas
.iter()
.map(|c| api::v1::ColumnSchema {
column_name: c.column_schema.name.clone(),
datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
.unwrap()
.datatype() as i32,
semantic_type: c.semantic_type as i32,
..Default::default()
})
.collect();
let column_schema = region_metadata_to_row_schema(metadata);
let rows = timestamps
.zip(values)
@@ -269,7 +258,23 @@ pub(crate) fn build_key_values_with_ts_seq_values(
rows,
}),
};
KeyValues::new(schema.as_ref(), mutation).unwrap()
KeyValues::new(metadata.as_ref(), mutation).unwrap()
}
/// Converts the region metadata to column schemas for a row.
pub fn region_metadata_to_row_schema(metadata: &RegionMetadataRef) -> Vec<api::v1::ColumnSchema> {
metadata
.column_metadatas
.iter()
.map(|c| api::v1::ColumnSchema {
column_name: c.column_schema.name.clone(),
datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
.unwrap()
.datatype() as i32,
semantic_type: c.semantic_type as i32,
..Default::default()
})
.collect()
}
/// Encode keys.

View File

@@ -15,7 +15,8 @@
use std::pin::Pin;
use std::task::{Context, Poll};
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use common_recordbatch::adapter::RecordBatchMetrics;
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use datatypes::schema::SchemaRef;
use futures::Stream;
use futures_util::ready;
@@ -78,6 +79,14 @@ impl<F: FnOnce() + Unpin> RecordBatchStream for OnDone<F> {
fn schema(&self) -> SchemaRef {
self.stream.schema()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
self.stream.output_ordering()
}
fn metrics(&self) -> Option<RecordBatchMetrics> {
self.stream.metrics()
}
}
impl<F: FnOnce() + Unpin> Stream for OnDone<F> {

View File

@@ -14,8 +14,8 @@
use std::any::Any;
use std::cmp::Ordering;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, HashMap};
use std::fmt::Display;
use std::pin::Pin;
use std::sync::Arc;
@@ -218,14 +218,15 @@ impl Display for Fill {
}
impl Fill {
pub fn try_from_str(value: &str, datatype: &DataType) -> DfResult<Self> {
pub fn try_from_str(value: &str, datatype: &DataType) -> DfResult<Option<Self>> {
let s = value.to_uppercase();
match s.as_str() {
"NULL" | "" => Ok(Self::Null),
"PREV" => Ok(Self::Prev),
"" => Ok(None),
"NULL" => Ok(Some(Self::Null)),
"PREV" => Ok(Some(Self::Prev)),
"LINEAR" => {
if datatype.is_numeric() {
Ok(Self::Linear)
Ok(Some(Self::Linear))
} else {
Err(DataFusionError::Plan(format!(
"Use FILL LINEAR on Non-numeric DataType {}",
@@ -240,13 +241,17 @@ impl Fill {
s, err
))
})
.map(Fill::Const),
.map(|x| Some(Fill::Const(x))),
}
}
/// The input `data` contains data on a complete time series.
/// If the filling strategy is `PREV` or `LINEAR`, caller must be ensured that the incoming `ts`&`data` is ascending time order.
pub fn apply_fill_strategy(&self, ts: &[i64], data: &mut [ScalarValue]) -> DfResult<()> {
// No calculation need in `Fill::Null`
if matches!(self, Fill::Null) {
return Ok(());
}
let len = data.len();
if *self == Fill::Linear {
return Self::fill_linear(ts, data);
@@ -254,7 +259,6 @@ impl Fill {
for i in 0..len {
if data[i].is_null() {
match self {
Fill::Null => continue,
Fill::Prev => {
if i != 0 {
data[i] = data[i - 1].clone()
@@ -262,7 +266,8 @@ impl Fill {
}
// The calculation of linear interpolation is relatively complicated.
// `Self::fill_linear` is used to dispose `Fill::Linear`.
Fill::Linear => unreachable!(),
// No calculation need in `Fill::Null`
Fill::Linear | Fill::Null => unreachable!(),
Fill::Const(v) => data[i] = v.clone(),
}
}
@@ -359,12 +364,12 @@ fn linear_interpolation(
#[derive(Eq, Clone, Debug)]
pub struct RangeFn {
/// with format like `max(a) RANGE 300s FILL NULL`
/// with format like `max(a) RANGE 300s [FILL NULL]`
pub name: String,
pub data_type: DataType,
pub expr: Expr,
pub range: Duration,
pub fill: Fill,
pub fill: Option<Fill>,
/// If the `FIll` strategy is `Linear` and the output is an integer,
/// it is possible to calculate a floating point number.
/// So for `FILL==LINEAR`, the entire data will be implicitly converted to Float type
@@ -465,7 +470,7 @@ impl RangeSelect {
name,
data_type.clone(),
// Only when data fill with Const option, the data can't be null
!matches!(fill, Fill::Const(..)),
!matches!(fill, Some(Fill::Const(..))),
))
},
)
@@ -810,10 +815,26 @@ struct RangeFnExec {
pub expr: Arc<dyn AggregateExpr>,
pub args: Vec<Arc<dyn PhysicalExpr>>,
pub range: Millisecond,
pub fill: Fill,
pub fill: Option<Fill>,
pub need_cast: Option<DataType>,
}
impl Display for RangeFnExec {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(fill) = &self.fill {
write!(
f,
"{} RANGE {}s FILL {}",
self.expr.name(),
self.range / 1000,
fill
)
} else {
write!(f, "{} RANGE {}s", self.expr.name(), self.range / 1000)
}
}
}
#[derive(Debug)]
pub struct RangeSelectExec {
input: Arc<dyn ExecutionPlan>,
@@ -834,18 +855,8 @@ impl DisplayAs for RangeSelectExec {
match t {
DisplayFormatType::Default | DisplayFormatType::Verbose => {
write!(f, "RangeSelectExec: ")?;
let range_expr_strs: Vec<String> = self
.range_exec
.iter()
.map(|e| {
format!(
"{} RANGE {}s FILL {}",
e.expr.name(),
e.range / 1000,
e.fill
)
})
.collect();
let range_expr_strs: Vec<String> =
self.range_exec.iter().map(RangeFnExec::to_string).collect();
let by: Vec<String> = self.by.iter().map(|e| e.to_string()).collect();
write!(
f,
@@ -939,7 +950,7 @@ impl ExecutionPlan for RangeSelectExec {
by: self.by.clone(),
series_map: HashMap::new(),
exec_state: ExecutionState::ReadingInput,
output_num_rows: 0,
num_not_null_rows: 0,
row_converter,
modify_map: HashMap::new(),
metric: baseline_metric,
@@ -979,8 +990,8 @@ struct RangeSelectStream {
/// value: `[row_ids]`
/// It is used to record the data that needs to be aggregated in each time slot during the data update process
modify_map: HashMap<(u64, Millisecond), Vec<u32>>,
/// The number of rows of the final output
output_num_rows: usize,
/// The number of rows of not null rows in the final output
num_not_null_rows: usize,
metric: BaselineMetrics,
schema_project: Option<Vec<usize>>,
schema_before_project: SchemaRef,
@@ -992,7 +1003,7 @@ struct SeriesState {
row: OwnedRow,
/// key: align_ts
/// value: a vector, each element is a range_fn follow the order of `range_exec`
align_ts_accumulator: HashMap<Millisecond, Vec<Box<dyn Accumulator>>>,
align_ts_accumulator: BTreeMap<Millisecond, Vec<Box<dyn Accumulator>>>,
}
/// Use `align_to` as time origin.
@@ -1108,7 +1119,7 @@ impl RangeSelectStream {
let accumulators_map =
self.series_map.entry(*hash).or_insert_with(|| SeriesState {
row: by_rows.row(*row as usize).owned(),
align_ts_accumulator: HashMap::new(),
align_ts_accumulator: BTreeMap::new(),
});
match accumulators_map.align_ts_accumulator.entry(*ts) {
Entry::Occupied(mut e) => {
@@ -1116,7 +1127,7 @@ impl RangeSelectStream {
accumulators[i].update_batch(&sliced_arrays)
}
Entry::Vacant(e) => {
self.output_num_rows += 1;
self.num_not_null_rows += 1;
let mut accumulators = self
.range_exec
.iter()
@@ -1141,29 +1152,47 @@ impl RangeSelectStream {
// 1 for time index column
let mut columns: Vec<Arc<dyn Array>> =
Vec::with_capacity(1 + self.range_exec.len() + self.by.len());
let mut ts_builder = TimestampMillisecondBuilder::with_capacity(self.output_num_rows);
let mut all_scalar = vec![Vec::with_capacity(self.output_num_rows); self.range_exec.len()];
let mut by_rows = Vec::with_capacity(self.output_num_rows);
let mut ts_builder = TimestampMillisecondBuilder::with_capacity(self.num_not_null_rows);
let mut all_scalar =
vec![Vec::with_capacity(self.num_not_null_rows); self.range_exec.len()];
let mut by_rows = Vec::with_capacity(self.num_not_null_rows);
let mut start_index = 0;
// RangePlan is calculated on a row basis. If a column uses the PREV or LINEAR filling strategy,
// we must arrange the data in the entire data row to determine the NULL filling value.
let need_sort_output = self
// If any range expr need fill, we need fill both the missing align_ts and null value.
let need_fill_output = self.range_exec.iter().any(|range| range.fill.is_some());
// The padding value for each accumulator
let padding_values = self
.range_exec
.iter()
.any(|range| range.fill == Fill::Linear || range.fill == Fill::Prev);
.map(|e| e.expr.create_accumulator()?.evaluate())
.collect::<DfResult<Vec<_>>>()?;
for SeriesState {
row,
align_ts_accumulator,
} in self.series_map.values()
{
// collect data on time series
let mut align_ts = align_ts_accumulator.keys().copied().collect::<Vec<_>>();
if need_sort_output {
align_ts.sort();
// skip empty time series
if align_ts_accumulator.is_empty() {
continue;
}
// find the first and last align_ts
let begin_ts = *align_ts_accumulator.first_key_value().unwrap().0;
let end_ts = *align_ts_accumulator.last_key_value().unwrap().0;
let align_ts = if need_fill_output {
// we need to fill empty align_ts which not data in that solt
(begin_ts..=end_ts).step_by(self.align as usize).collect()
} else {
align_ts_accumulator.keys().copied().collect::<Vec<_>>()
};
for ts in &align_ts {
for (i, accumulator) in align_ts_accumulator.get(ts).unwrap().iter().enumerate() {
all_scalar[i].push(accumulator.evaluate()?);
if let Some(slot) = align_ts_accumulator.get(ts) {
for (column, acc) in all_scalar.iter_mut().zip(slot.iter()) {
column.push(acc.evaluate()?);
}
} else {
// fill null in empty time solt
for (column, padding) in all_scalar.iter_mut().zip(padding_values.iter()) {
column.push(padding.clone())
}
}
}
ts_builder.append_slice(&align_ts);
@@ -1176,14 +1205,16 @@ impl RangeSelectStream {
) in self.range_exec.iter().enumerate()
{
let time_series_data =
&mut all_scalar[i][start_index..start_index + align_ts_accumulator.len()];
&mut all_scalar[i][start_index..start_index + align_ts.len()];
if let Some(data_type) = need_cast {
cast_scalar_values(time_series_data, data_type)?;
}
fill.apply_fill_strategy(&align_ts, time_series_data)?;
if let Some(fill) = fill {
fill.apply_fill_strategy(&align_ts, time_series_data)?;
}
}
by_rows.resize(by_rows.len() + align_ts_accumulator.len(), row.row());
start_index += align_ts_accumulator.len();
by_rows.resize(by_rows.len() + align_ts.len(), row.row());
start_index += align_ts.len();
}
for column_scalar in all_scalar {
columns.push(ScalarValue::iter_to_array(column_scalar)?);
@@ -1304,7 +1335,7 @@ mod test {
const TIME_INDEX_COLUMN: &str = "timestamp";
fn prepare_test_data(is_float: bool) -> MemoryExec {
fn prepare_test_data(is_float: bool, is_gap: bool) -> MemoryExec {
let schema = Arc::new(Schema::new(vec![
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
Field::new(
@@ -1318,16 +1349,23 @@ mod test {
),
Field::new("host", DataType::Utf8, true),
]));
let timestamp_column: Arc<dyn Array> = Arc::new(TimestampMillisecondArray::from(vec![
0, 5_000, 10_000, 15_000, 20_000, // host 1 every 5s
0, 5_000, 10_000, 15_000, 20_000, // host 2 every 5s
])) as _;
let mut host = vec!["host1"; 5];
host.extend(vec!["host2"; 5]);
let value_column: Arc<dyn Array> = if is_float {
Arc::new(nullable_array!(Float64;
0.0, null, 1.0, null, 2.0, // data for host 1
3.0, null, 4.0, null, 5.0 // data for host 2
let timestamp_column: Arc<dyn Array> = if !is_gap {
Arc::new(TimestampMillisecondArray::from(vec![
0, 5_000, 10_000, 15_000, 20_000, // host 1 every 5s
0, 5_000, 10_000, 15_000, 20_000, // host 2 every 5s
])) as _
} else {
Arc::new(TimestampMillisecondArray::from(vec![
0, 15_000, // host 1 every 5s, missing data on 5_000, 10_000
0, 15_000, // host 2 every 5s, missing data on 5_000, 10_000
])) as _
};
let mut host = vec!["host1"; timestamp_column.len() / 2];
host.extend(vec!["host2"; timestamp_column.len() / 2]);
let mut value_column: Arc<dyn Array> = if is_gap {
Arc::new(nullable_array!(Int64;
0, 6, // data for host 1
6, 12 // data for host 2
)) as _
} else {
Arc::new(nullable_array!(Int64;
@@ -1335,6 +1373,11 @@ mod test {
3, null, 4, null, 5 // data for host 2
)) as _
};
if is_float {
value_column =
cast_with_options(&value_column, &DataType::Float64, &CastOptions::default())
.unwrap();
}
let host_column: Arc<dyn Array> = Arc::new(StringArray::from(host)) as _;
let data = RecordBatch::try_new(
schema.clone(),
@@ -1349,8 +1392,9 @@ mod test {
range1: Millisecond,
range2: Millisecond,
align: Millisecond,
fill: Fill,
fill: Option<Fill>,
is_float: bool,
is_gap: bool,
expected: String,
) {
let data_type = if is_float {
@@ -1358,13 +1402,13 @@ mod test {
} else {
DataType::Int64
};
let (need_cast, schema_data_type) = if !is_float && fill == Fill::Linear {
let (need_cast, schema_data_type) = if !is_float && matches!(fill, Some(Fill::Linear)) {
// data_type = DataType::Float64;
(Some(DataType::Float64), DataType::Float64)
} else {
(None, data_type.clone())
};
let memory_exec = Arc::new(prepare_test_data(is_float));
let memory_exec = Arc::new(prepare_test_data(is_float, is_gap));
let schema = Arc::new(Schema::new(vec![
Field::new("MIN(value)", schema_data_type.clone(), true),
Field::new("MAX(value)", schema_data_type, true),
@@ -1449,7 +1493,16 @@ mod test {
\n| 3.0 | 3.0 | 1970-01-01T00:00:00 | host2 |\
\n+------------+------------+---------------------+-------+",
);
do_range_select_test(10_000, 10_000, 1_000_000, Fill::Null, true, expected).await;
do_range_select_test(
10_000,
10_000,
1_000_000,
Some(Fill::Null),
true,
false,
expected,
)
.await;
}
#[tokio::test]
@@ -1472,7 +1525,16 @@ mod test {
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
\n+------------+------------+---------------------+-------+",
);
do_range_select_test(10_000, 5_000, 5_000, Fill::Null, true, expected).await;
do_range_select_test(
10_000,
5_000,
5_000,
Some(Fill::Null),
true,
false,
expected,
)
.await;
}
#[tokio::test]
@@ -1495,7 +1557,16 @@ mod test {
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
\n+------------+------------+---------------------+-------+",
);
do_range_select_test(10_000, 5_000, 5_000, Fill::Prev, true, expected).await;
do_range_select_test(
10_000,
5_000,
5_000,
Some(Fill::Prev),
true,
false,
expected,
)
.await;
}
#[tokio::test]
@@ -1518,7 +1589,16 @@ mod test {
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
\n+------------+------------+---------------------+-------+",
);
do_range_select_test(10_000, 5_000, 5_000, Fill::Linear, true, expected).await;
do_range_select_test(
10_000,
5_000,
5_000,
Some(Fill::Linear),
true,
false,
expected,
)
.await;
}
#[tokio::test]
@@ -1541,7 +1621,16 @@ mod test {
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
\n+------------+------------+---------------------+-------+",
);
do_range_select_test(10_000, 5_000, 5_000, Fill::Linear, false, expected).await;
do_range_select_test(
10_000,
5_000,
5_000,
Some(Fill::Linear),
false,
false,
expected,
)
.await;
}
#[tokio::test]
@@ -1568,7 +1657,101 @@ mod test {
10_000,
5_000,
5_000,
Fill::Const(ScalarValue::Float64(Some(6.6))),
Some(Fill::Const(ScalarValue::Float64(Some(6.6)))),
true,
false,
expected,
)
.await;
}
#[tokio::test]
async fn range_fill_gap() {
let expected = String::from(
"+------------+------------+---------------------+-------+\
\n| MIN(value) | MAX(value) | timestamp | host |\
\n+------------+------------+---------------------+-------+\
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
\n+------------+------------+---------------------+-------+",
);
do_range_select_test(5_000, 5_000, 5_000, None, true, true, expected).await;
let expected = String::from(
"+------------+------------+---------------------+-------+\
\n| MIN(value) | MAX(value) | timestamp | host |\
\n+------------+------------+---------------------+-------+\
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
\n| | | 1970-01-01T00:00:05 | host1 |\
\n| | | 1970-01-01T00:00:10 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
\n| | | 1970-01-01T00:00:05 | host2 |\
\n| | | 1970-01-01T00:00:10 | host2 |\
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
\n+------------+------------+---------------------+-------+",
);
do_range_select_test(5_000, 5_000, 5_000, Some(Fill::Null), true, true, expected).await;
let expected = String::from(
"+------------+------------+---------------------+-------+\
\n| MIN(value) | MAX(value) | timestamp | host |\
\n+------------+------------+---------------------+-------+\
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
\n| 0.0 | 0.0 | 1970-01-01T00:00:05 | host1 |\
\n| 0.0 | 0.0 | 1970-01-01T00:00:10 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:05 | host2 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:10 | host2 |\
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
\n+------------+------------+---------------------+-------+",
);
do_range_select_test(5_000, 5_000, 5_000, Some(Fill::Prev), true, true, expected).await;
let expected = String::from(
"+------------+------------+---------------------+-------+\
\n| MIN(value) | MAX(value) | timestamp | host |\
\n+------------+------------+---------------------+-------+\
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
\n| 2.0 | 2.0 | 1970-01-01T00:00:05 | host1 |\
\n| 4.0 | 4.0 | 1970-01-01T00:00:10 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
\n| 8.0 | 8.0 | 1970-01-01T00:00:05 | host2 |\
\n| 10.0 | 10.0 | 1970-01-01T00:00:10 | host2 |\
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
\n+------------+------------+---------------------+-------+",
);
do_range_select_test(
5_000,
5_000,
5_000,
Some(Fill::Linear),
true,
true,
expected,
)
.await;
let expected = String::from(
"+------------+------------+---------------------+-------+\
\n| MIN(value) | MAX(value) | timestamp | host |\
\n+------------+------------+---------------------+-------+\
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:05 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:10 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:05 | host2 |\
\n| 6.0 | 6.0 | 1970-01-01T00:00:10 | host2 |\
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
\n+------------+------------+---------------------+-------+",
);
do_range_select_test(
5_000,
5_000,
5_000,
Some(Fill::Const(ScalarValue::Float64(Some(6.0)))),
true,
true,
expected,
)
@@ -1577,7 +1760,8 @@ mod test {
#[test]
fn fill_test() {
assert!(Fill::try_from_str("Linear", &DataType::UInt8).unwrap() == Fill::Linear);
assert!(Fill::try_from_str("", &DataType::UInt8).unwrap().is_none());
assert!(Fill::try_from_str("Linear", &DataType::UInt8).unwrap() == Some(Fill::Linear));
assert_eq!(
Fill::try_from_str("Linear", &DataType::Boolean)
.unwrap_err()
@@ -1598,7 +1782,7 @@ mod test {
);
assert!(
Fill::try_from_str("8", &DataType::UInt8).unwrap()
== Fill::Const(ScalarValue::UInt8(Some(8)))
== Some(Fill::Const(ScalarValue::UInt8(Some(8))))
);
let mut test1 = vec![
ScalarValue::UInt8(Some(8)),

View File

@@ -216,7 +216,7 @@ impl<'a> TreeNodeRewriter for RangeExprRewriter<'a> {
let mut data_type = range_expr.get_type(self.input_plan.schema())?;
let mut need_cast = false;
let fill = Fill::try_from_str(parse_str_expr(&func.args, 2)?, &data_type)?;
if matches!(fill, Fill::Linear) && data_type.is_integer() {
if matches!(fill, Some(Fill::Linear)) && data_type.is_integer() {
data_type = DataType::Float64;
need_cast = true;
}
@@ -224,12 +224,20 @@ impl<'a> TreeNodeRewriter for RangeExprRewriter<'a> {
inconsistent_check!(self.align, self.align != Duration::default());
inconsistent_check!(self.align_to, self.align_to != 0);
let range_fn = RangeFn {
name: format!(
"{} RANGE {} FILL {}",
range_expr.display_name()?,
parse_expr_to_string(&func.args, 1)?,
fill
),
name: if let Some(fill) = &fill {
format!(
"{} RANGE {} FILL {}",
range_expr.display_name()?,
parse_expr_to_string(&func.args, 1)?,
fill
)
} else {
format!(
"{} RANGE {}",
range_expr.display_name()?,
parse_expr_to_string(&func.args, 1)?,
)
},
data_type,
expr: range_expr,
range,
@@ -551,7 +559,7 @@ mod test {
async fn range_no_project() {
let query = r#"SELECT timestamp, tag_0, tag_1, avg(field_0 + field_1) RANGE '5m' FROM test ALIGN '1h' by (tag_0,tag_1);"#;
let expected = String::from(
"RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8, AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL:Float64;N]\
"RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8, AVG(test.field_0 + test.field_1) RANGE 5m:Float64;N]\
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
);
query_plan_compare(query, expected).await;
@@ -561,8 +569,8 @@ mod test {
async fn range_expr_calculation() {
let query = r#"SELECT (avg(field_0 + field_1)/4) RANGE '5m' FROM test ALIGN '1h' by (tag_0,tag_1);"#;
let expected = String::from(
"Projection: AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL / Int64(4) [AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL / Int64(4):Float64;N]\
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
"Projection: AVG(test.field_0 + test.field_1) RANGE 5m / Int64(4) [AVG(test.field_0 + test.field_1) RANGE 5m / Int64(4):Float64;N]\
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
);
query_plan_compare(query, expected).await;
@@ -573,8 +581,8 @@ mod test {
let query =
r#"SELECT (covar(field_0 + field_1, field_1)/4) RANGE '5m' FROM test ALIGN '1h';"#;
let expected = String::from(
"Projection: COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m FILL NULL / Int64(4) [COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m FILL NULL / Int64(4):Float64;N]\
\n RangeSelect: range_exprs=[COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1, test.tag_2, test.tag_3, test.tag_4], time_index=timestamp [COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m FILL NULL:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8]\
"Projection: COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m / Int64(4) [COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m / Int64(4):Float64;N]\
\n RangeSelect: range_exprs=[COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1, test.tag_2, test.tag_3, test.tag_4], time_index=timestamp [COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8]\
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
);
query_plan_compare(query, expected).await;
@@ -621,8 +629,8 @@ mod test {
async fn range_in_expr() {
let query = r#"SELECT sin(avg(field_0 + field_1) RANGE '5m' + 1) FROM test ALIGN '1h' by (tag_0,tag_1);"#;
let expected = String::from(
"Projection: sin(AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL + Int64(1)) [sin(AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL + Int64(1)):Float64;N]\
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
"Projection: sin(AVG(test.field_0 + test.field_1) RANGE 5m + Int64(1)) [sin(AVG(test.field_0 + test.field_1) RANGE 5m + Int64(1)):Float64;N]\
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
);
query_plan_compare(query, expected).await;
@@ -643,8 +651,8 @@ mod test {
async fn deep_nest_range_expr() {
let query = r#"SELECT round(sin(avg(field_0 + field_1) RANGE '5m' + 1)) FROM test ALIGN '1h' by (tag_0,tag_1);"#;
let expected = String::from(
"Projection: round(sin(AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL + Int64(1))) [round(sin(AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL + Int64(1))):Float64;N]\
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
"Projection: round(sin(AVG(test.field_0 + test.field_1) RANGE 5m + Int64(1))) [round(sin(AVG(test.field_0 + test.field_1) RANGE 5m + Int64(1))):Float64;N]\
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
);
query_plan_compare(query, expected).await;

View File

@@ -26,9 +26,10 @@ use common_function::function_registry::FUNCTION_REGISTRY;
use common_query::error::{PyUdfSnafu, UdfTempRecordBatchSnafu};
use common_query::prelude::Signature;
use common_query::{Output, OutputData};
use common_recordbatch::adapter::RecordBatchMetrics;
use common_recordbatch::error::{ExternalSnafu, Result as RecordBatchResult};
use common_recordbatch::{
RecordBatch, RecordBatchStream, RecordBatches, SendableRecordBatchStream,
OrderOption, RecordBatch, RecordBatchStream, RecordBatches, SendableRecordBatchStream,
};
use datafusion_expr::Volatility;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
@@ -255,6 +256,14 @@ impl RecordBatchStream for CoprStream {
// FIXME(discord9): use copr returns for schema
self.ret_schema.clone()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
fn metrics(&self) -> Option<RecordBatchMetrics> {
None
}
}
impl Stream for CoprStream {

View File

@@ -49,6 +49,7 @@ datatypes.workspace = true
derive_builder.workspace = true
digest = "0.10"
futures = "0.3"
hashbrown = "0.14"
headers = "0.3"
hex = { version = "0.4" }
hostname = "0.3.1"

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::string::ToString;
use api::prom_store::remote::Sample;
@@ -23,6 +21,8 @@ use api::v1::{
Value,
};
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
use hashbrown::hash_map::Entry;
use hashbrown::HashMap;
use crate::proto::PromLabel;
use crate::repeated_field::Clear;
@@ -86,7 +86,7 @@ impl Default for TableBuilder {
impl TableBuilder {
pub(crate) fn with_capacity(cols: usize, rows: usize) -> Self {
let mut col_indexes = HashMap::with_capacity(cols);
let mut col_indexes = HashMap::with_capacity_and_hasher(cols, Default::default());
col_indexes.insert(GREPTIME_TIMESTAMP.to_string(), 0);
col_indexes.insert(GREPTIME_VALUE.to_string(), 1);

View File

@@ -13,12 +13,13 @@
// limitations under the License.
use std::ops::Deref;
use std::slice;
use api::prom_store::remote::Sample;
use api::v1::RowInsertRequests;
use bytes::{Buf, Bytes};
use prost::encoding::message::merge;
use prost::encoding::{decode_key, decode_varint, DecodeContext, WireType};
use prost::encoding::{decode_key, decode_varint, WireType};
use prost::DecodeError;
use crate::prom_row_builder::TablesBuilder;
@@ -39,29 +40,22 @@ pub struct PromLabel {
}
impl Clear for PromLabel {
fn clear(&mut self) {
self.name.clear();
self.value.clear();
}
fn clear(&mut self) {}
}
impl PromLabel {
pub fn merge_field<B>(
pub fn merge_field(
&mut self,
tag: u32,
wire_type: WireType,
buf: &mut B,
ctx: DecodeContext,
) -> Result<(), DecodeError>
where
B: Buf,
{
buf: &mut Bytes,
) -> Result<(), DecodeError> {
const STRUCT_NAME: &str = "PromLabel";
match tag {
1u32 => {
// decode label name
let value = &mut self.name;
prost::encoding::bytes::merge(wire_type, value, buf, ctx).map_err(|mut error| {
merge_bytes(value, buf).map_err(|mut error| {
error.push(STRUCT_NAME, "name");
error
})
@@ -69,16 +63,69 @@ impl PromLabel {
2u32 => {
// decode label value
let value = &mut self.value;
prost::encoding::bytes::merge(wire_type, value, buf, ctx).map_err(|mut error| {
merge_bytes(value, buf).map_err(|mut error| {
error.push(STRUCT_NAME, "value");
error
})
}
_ => prost::encoding::skip_field(wire_type, tag, buf, ctx),
_ => prost::encoding::skip_field(wire_type, tag, buf, Default::default()),
}
}
}
#[inline(always)]
fn copy_to_bytes(data: &mut Bytes, len: usize) -> Bytes {
if len == data.remaining() {
std::mem::replace(data, Bytes::new())
} else {
let ret = split_to(data, len);
data.advance(len);
ret
}
}
/// Similar to `Bytes::split_to`, but directly operates on underlying memory region.
/// # Safety
/// This function is safe as long as `data` is backed by a consecutive region of memory,
/// for example `Vec<u8>` or `&[u8]`, and caller must ensure that `buf` outlives
/// the `Bytes` returned.
#[inline(always)]
fn split_to(buf: &mut Bytes, end: usize) -> Bytes {
let len = buf.len();
assert!(
end <= len,
"range end out of bounds: {:?} <= {:?}",
end,
len,
);
if end == 0 {
return Bytes::new();
}
let ptr = buf.as_ptr();
let x = unsafe { slice::from_raw_parts(ptr, end) };
// `Bytes::drop` does nothing when it's built via `from_static`.
Bytes::from_static(x)
}
/// Reads a variable-length encoded bytes field from `buf` and assign it to `value`.
/// # Safety
/// Callers must ensure `buf` outlives `value`.
#[inline(always)]
fn merge_bytes(value: &mut Bytes, buf: &mut Bytes) -> Result<(), DecodeError> {
let len = decode_varint(buf)?;
if len > buf.remaining() as u64 {
return Err(DecodeError::new(format!(
"buffer underflow, len: {}, remaining: {}",
len,
buf.remaining()
)));
}
*value = copy_to_bytes(buf, len as usize);
Ok(())
}
#[derive(Default, Debug)]
pub struct PromTimeSeries {
pub table_name: String,
@@ -95,16 +142,12 @@ impl Clear for PromTimeSeries {
}
impl PromTimeSeries {
pub fn merge_field<B>(
pub fn merge_field(
&mut self,
tag: u32,
wire_type: WireType,
buf: &mut B,
ctx: DecodeContext,
) -> Result<(), DecodeError>
where
B: Buf,
{
buf: &mut Bytes,
) -> Result<(), DecodeError> {
const STRUCT_NAME: &str = "PromTimeSeries";
match tag {
1u32 => {
@@ -123,7 +166,7 @@ impl PromTimeSeries {
let limit = remaining - len as usize;
while buf.remaining() > limit {
let (tag, wire_type) = decode_key(buf)?;
label.merge_field(tag, wire_type, buf, ctx.clone())?;
label.merge_field(tag, wire_type, buf)?;
}
if buf.remaining() != limit {
return Err(DecodeError::new("delimited length exceeded"));
@@ -138,15 +181,17 @@ impl PromTimeSeries {
}
2u32 => {
let sample = self.samples.push_default();
merge(WireType::LengthDelimited, sample, buf, ctx).map_err(|mut error| {
error.push(STRUCT_NAME, "samples");
error
})?;
merge(WireType::LengthDelimited, sample, buf, Default::default()).map_err(
|mut error| {
error.push(STRUCT_NAME, "samples");
error
},
)?;
Ok(())
}
// skip exemplars
3u32 => prost::encoding::skip_field(wire_type, tag, buf, ctx),
_ => prost::encoding::skip_field(wire_type, tag, buf, ctx),
// todo(hl): exemplars are skipped temporarily
3u32 => prost::encoding::skip_field(wire_type, tag, buf, Default::default()),
_ => prost::encoding::skip_field(wire_type, tag, buf, Default::default()),
}
}
@@ -181,13 +226,9 @@ impl PromWriteRequest {
self.table_data.as_insert_requests()
}
pub fn merge<B>(&mut self, mut buf: B) -> Result<(), DecodeError>
where
B: Buf,
Self: Sized,
{
// todo(hl): maybe use &[u8] can reduce the overhead introduced with Bytes.
pub fn merge(&mut self, mut buf: Bytes) -> Result<(), DecodeError> {
const STRUCT_NAME: &str = "PromWriteRequest";
let ctx = DecodeContext::default();
while buf.has_remaining() {
let (tag, wire_type) = decode_key(&mut buf)?;
assert_eq!(WireType::LengthDelimited, wire_type);
@@ -206,8 +247,7 @@ impl PromWriteRequest {
let limit = remaining - len as usize;
while buf.remaining() > limit {
let (tag, wire_type) = decode_key(&mut buf)?;
self.series
.merge_field(tag, wire_type, &mut buf, ctx.clone())?;
self.series.merge_field(tag, wire_type, &mut buf)?;
}
if buf.remaining() != limit {
return Err(DecodeError::new("delimited length exceeded"));
@@ -215,10 +255,10 @@ impl PromWriteRequest {
self.series.add_to_table_data(&mut self.table_data);
}
3u32 => {
// we can ignore metadata for now.
prost::encoding::skip_field(wire_type, tag, &mut buf, ctx.clone())?;
// todo(hl): metadata are skipped.
prost::encoding::skip_field(wire_type, tag, &mut buf, Default::default())?;
}
_ => prost::encoding::skip_field(wire_type, tag, &mut buf, ctx.clone())?,
_ => prost::encoding::skip_field(wire_type, tag, &mut buf, Default::default())?,
}
}
Ok(())

View File

@@ -117,7 +117,7 @@ pub enum Error {
source: datatypes::error::Error,
},
#[snafu(display("Invalid table option key: {}", key))]
#[snafu(display("Unrecognized table option key: {}", key))]
InvalidTableOption { key: String, location: Location },
#[snafu(display("Failed to serialize column default constraint"))]

View File

@@ -23,7 +23,7 @@ use sqlparser::keywords::ALL_KEYWORDS;
use sqlparser::parser::IsOptional::Mandatory;
use sqlparser::parser::{Parser, ParserError};
use sqlparser::tokenizer::{Token, TokenWithLocation, Word};
use table::requests::valid_table_option;
use table::requests::validate_table_option;
use crate::ast::{ColumnDef, Ident, TableConstraint};
use crate::error::{
@@ -62,17 +62,21 @@ impl<'a> ParserContext<'a> {
let _ = self.parser.next_token();
self.parser
.expect_keyword(Keyword::TABLE)
.context(error::SyntaxSnafu)?;
.context(SyntaxSnafu)?;
let if_not_exists =
self.parser
.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
let table_name = self.intern_parse_table_name()?;
let (columns, constraints) = self.parse_columns()?;
if !columns.is_empty() {
validate_time_index(&columns, &constraints)?;
}
let engine = self.parse_table_engine(common_catalog::consts::FILE_ENGINE)?;
let options = self
.parser
.parse_options(Keyword::WITH)
.context(error::SyntaxSnafu)?
.context(SyntaxSnafu)?
.into_iter()
.filter_map(|option| {
if let Some(v) = parse_option_string(option.value) {
@@ -84,7 +88,7 @@ impl<'a> ParserContext<'a> {
.collect::<HashMap<String, String>>();
for key in options.keys() {
ensure!(
valid_table_option(key),
validate_table_option(key),
InvalidTableOptionSnafu {
key: key.to_string()
}
@@ -140,8 +144,12 @@ impl<'a> ParserContext<'a> {
}
let (columns, constraints) = self.parse_columns()?;
validate_time_index(&columns, &constraints)?;
let partitions = self.parse_partitions()?;
if let Some(partitions) = &partitions {
validate_partitions(&columns, partitions)?;
}
let engine = self.parse_table_engine(default_engine())?;
let options = self
@@ -150,7 +158,7 @@ impl<'a> ParserContext<'a> {
.context(error::SyntaxSnafu)?;
for option in options.iter() {
ensure!(
valid_table_option(&option.name.value),
validate_table_option(&option.name.value),
InvalidTableOptionSnafu {
key: option.name.value.to_string()
}
@@ -168,7 +176,6 @@ impl<'a> ParserContext<'a> {
table_id: 0, // table id is assigned by catalog manager
partitions,
};
validate_create(&create_table)?;
Ok(Statement::CreateTable(create_table))
}
@@ -553,18 +560,8 @@ impl<'a> ParserContext<'a> {
}
}
fn validate_create(create_table: &CreateTable) -> Result<()> {
if let Some(partitions) = &create_table.partitions {
validate_partitions(&create_table.columns, partitions)?;
}
validate_time_index(create_table)?;
Ok(())
}
fn validate_time_index(create_table: &CreateTable) -> Result<()> {
let time_index_constraints: Vec<_> = create_table
.constraints
fn validate_time_index(columns: &[ColumnDef], constraints: &[TableConstraint]) -> Result<()> {
let time_index_constraints: Vec<_> = constraints
.iter()
.filter_map(|c| {
if let TableConstraint::Unique {
@@ -605,8 +602,7 @@ fn validate_time_index(create_table: &CreateTable) -> Result<()> {
// It's safe to use time_index_constraints[0][0],
// we already check the bound above.
let time_index_column_ident = &time_index_constraints[0][0];
let time_index_column = create_table
.columns
let time_index_column = columns
.iter()
.find(|c| c.name.value == *time_index_column_ident.value)
.with_context(|| InvalidTimeIndexSnafu {
@@ -753,7 +749,7 @@ mod tests {
fn test_validate_external_table_options() {
let sql = "CREATE EXTERNAL TABLE city (
host string,
ts int64,
ts timestamp,
cpu float64 default 0,
memory float64,
TIME INDEX (ts),
@@ -799,6 +795,17 @@ mod tests {
expected_engine: "foo",
expected_if_not_exist: true,
},
Test {
sql: "CREATE EXTERNAL TABLE IF NOT EXISTS city ENGINE=foo with(location='/var/data/city.csv',format='csv','compaction.type'='bar');",
expected_table_name: "city",
expected_options: HashMap::from([
("location".to_string(), "/var/data/city.csv".to_string()),
("format".to_string(), "csv".to_string()),
("compaction.type".to_string(), "bar".to_string()),
]),
expected_engine: "foo",
expected_if_not_exist: true,
},
];
for test in tests {
@@ -825,7 +832,7 @@ mod tests {
fn test_parse_create_external_table_with_schema() {
let sql = "CREATE EXTERNAL TABLE city (
host string,
ts int64,
ts timestamp,
cpu float32 default 0,
memory float64,
TIME INDEX (ts),
@@ -848,7 +855,7 @@ mod tests {
let columns = &c.columns;
assert_column_def(&columns[0], "host", "STRING");
assert_column_def(&columns[1], "ts", "BIGINT");
assert_column_def(&columns[1], "ts", "TIMESTAMP");
assert_column_def(&columns[2], "cpu", "FLOAT");
assert_column_def(&columns[3], "memory", "DOUBLE");
@@ -927,7 +934,7 @@ ENGINE=mito";
let _ = result.unwrap();
let sql = r"
CREATE TABLE rcx ( a INT, b STRING, c INT )
CREATE TABLE rcx ( ts TIMESTAMP TIME INDEX, a INT, b STRING, c INT )
PARTITION ON COLUMNS(x) ()
ENGINE=mito";
let result =
@@ -1315,7 +1322,7 @@ ENGINE=mito";
#[test]
fn test_parse_partitions_with_error_syntax() {
let sql = r"
CREATE TABLE rcx ( a INT, b STRING, c INT )
CREATE TABLE rcx ( ts TIMESTAMP TIME INDEX, a INT, b STRING, c INT )
PARTITION COLUMNS(c, a) (
a < 10,
a > 10 AND a < 20,
@@ -1344,7 +1351,7 @@ ENGINE=mito";
#[test]
fn test_parse_partitions_unreferenced_column() {
let sql = r"
CREATE TABLE rcx ( a INT, b STRING, c INT )
CREATE TABLE rcx ( ts TIMESTAMP TIME INDEX, a INT, b STRING, c INT )
PARTITION ON COLUMNS(c, a) (
b = 'foo'
)
@@ -1360,7 +1367,7 @@ ENGINE=mito";
#[test]
fn test_parse_partitions_not_binary_expr() {
let sql = r"
CREATE TABLE rcx ( a INT, b STRING, c INT )
CREATE TABLE rcx ( ts TIMESTAMP TIME INDEX, a INT, b STRING, c INT )
PARTITION ON COLUMNS(c, a) (
b
)
@@ -1495,4 +1502,25 @@ ENGINE=mito";
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
let _ = result.unwrap();
}
#[test]
fn test_incorrect_default_value_issue_3479() {
let sql = r#"CREATE TABLE `ExcePTuRi`(
non TIMESTAMP(6) TIME INDEX,
`iUSTO` DOUBLE DEFAULT 0.047318541668048164
)"#;
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
.unwrap();
assert_eq!(1, result.len());
match &result[0] {
Statement::CreateTable(c) => {
assert_eq!(
"`iUSTO` DOUBLE DEFAULT 0.047318541668048164",
c.columns[1].to_string()
);
}
_ => unreachable!(),
}
}
}

View File

@@ -904,6 +904,29 @@ mod tests {
);
}
#[test]
fn test_incorrect_default_value_issue_3479() {
let opts = vec![ColumnOptionDef {
name: None,
option: ColumnOption::Default(Expr::Value(SqlValue::Number(
"0.047318541668048164".into(),
false,
))),
}];
let constraint = parse_column_default_constraint(
"coll",
&ConcreteDataType::float64_datatype(),
&opts,
None,
)
.unwrap()
.unwrap();
assert_eq!("0.047318541668048164", constraint.to_string());
let encoded: Vec<u8> = constraint.clone().try_into().unwrap();
let decoded = ColumnDefaultConstraint::try_from(encoded.as_ref()).unwrap();
assert_eq!(decoded, constraint);
}
#[test]
pub fn test_sql_column_def_to_grpc_column_def() {
// test basic

View File

@@ -229,8 +229,10 @@ pub struct CreateTableLike {
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use crate::dialect::GreptimeDbDialect;
use crate::error::Error::InvalidTableOption;
use crate::error::Error;
use crate::parser::{ParseOptions, ParserContext};
use crate::statements::statement::Statement;
@@ -344,7 +346,29 @@ ENGINE=mito
fn test_validate_table_options() {
let sql = r"create table if not exists demo(
host string,
ts bigint,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
)
PARTITION ON COLUMNS (host) ()
engine=mito
with(regions=1, ttl='7d', 'compaction.type'='world');
";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
.unwrap();
match &result[0] {
Statement::CreateTable(c) => {
assert_eq!(3, c.options.len());
}
_ => unreachable!(),
}
let sql = r"create table if not exists demo(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
@@ -356,6 +380,6 @@ ENGINE=mito
";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default());
assert!(matches!(result, Err(InvalidTableOption { .. })))
assert_matches!(result, Err(Error::InvalidTableOption { .. }))
}
}

View File

@@ -20,6 +20,7 @@ pub mod logstore;
pub mod manifest;
pub mod metadata;
pub mod metric_engine_consts;
pub mod mito_engine_options;
pub mod path_utils;
pub mod region_engine;
pub mod region_request;

View File

@@ -0,0 +1,61 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Option keys for the mito engine.
//! We define them in this mod so the create parser can use it to validate table options.
use common_wal::options::WAL_OPTIONS_KEY;
/// Returns true if the `key` is a valid option key for the mito engine.
pub fn is_mito_engine_option_key(key: &str) -> bool {
[
"ttl",
"compaction.type",
"compaction.twcs.max_active_window_files",
"compaction.twcs.max_inactive_window_files",
"compaction.twcs.time_window",
"storage",
"index.inverted_index.ignore_column_ids",
"index.inverted_index.segment_row_count",
WAL_OPTIONS_KEY,
]
.contains(&key)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_mito_engine_option_key() {
assert!(is_mito_engine_option_key("ttl"));
assert!(is_mito_engine_option_key("compaction.type"));
assert!(is_mito_engine_option_key(
"compaction.twcs.max_active_window_files"
));
assert!(is_mito_engine_option_key(
"compaction.twcs.max_inactive_window_files"
));
assert!(is_mito_engine_option_key("compaction.twcs.time_window"));
assert!(is_mito_engine_option_key("storage"));
assert!(is_mito_engine_option_key(
"index.inverted_index.ignore_column_ids"
));
assert!(is_mito_engine_option_key(
"index.inverted_index.segment_row_count"
));
assert!(is_mito_engine_option_key("wal_options"));
assert!(!is_mito_engine_option_key("foo"));
}
}

View File

@@ -26,6 +26,7 @@ use datatypes::prelude::VectorRef;
use datatypes::schema::{ColumnSchema, RawSchema};
use serde::{Deserialize, Serialize};
use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, PHYSICAL_TABLE_METADATA_KEY};
use store_api::mito_engine_options::is_mito_engine_option_key;
use store_api::storage::RegionNumber;
use crate::error;
@@ -38,6 +39,33 @@ pub const FILE_TABLE_LOCATION_KEY: &str = "location";
pub const FILE_TABLE_PATTERN_KEY: &str = "pattern";
pub const FILE_TABLE_FORMAT_KEY: &str = "format";
/// Returns true if the `key` is a valid key for any engine or storage.
pub fn validate_table_option(key: &str) -> bool {
if is_supported_in_s3(key) {
return true;
}
if is_mito_engine_option_key(key) {
return true;
}
[
// common keys:
WRITE_BUFFER_SIZE_KEY,
TTL_KEY,
REGIONS_KEY,
STORAGE_KEY,
// file engine keys:
FILE_TABLE_LOCATION_KEY,
FILE_TABLE_FORMAT_KEY,
FILE_TABLE_PATTERN_KEY,
// metric engine keys:
PHYSICAL_TABLE_METADATA_KEY,
LOGICAL_TABLE_METADATA_KEY,
]
.contains(&key)
}
#[derive(Debug, Clone)]
pub struct CreateDatabaseRequest {
pub db_name: String,
@@ -315,21 +343,6 @@ impl TruncateTableRequest {
}
}
pub fn valid_table_option(key: &str) -> bool {
matches!(
key,
FILE_TABLE_LOCATION_KEY
| FILE_TABLE_FORMAT_KEY
| FILE_TABLE_PATTERN_KEY
| WRITE_BUFFER_SIZE_KEY
| TTL_KEY
| REGIONS_KEY
| STORAGE_KEY
| PHYSICAL_TABLE_METADATA_KEY
| LOGICAL_TABLE_METADATA_KEY
) | is_supported_in_s3(key)
}
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct CopyDatabaseRequest {
pub catalog_name: String,
@@ -346,14 +359,14 @@ mod tests {
#[test]
fn test_validate_table_option() {
assert!(valid_table_option(FILE_TABLE_LOCATION_KEY));
assert!(valid_table_option(FILE_TABLE_FORMAT_KEY));
assert!(valid_table_option(FILE_TABLE_PATTERN_KEY));
assert!(valid_table_option(TTL_KEY));
assert!(valid_table_option(REGIONS_KEY));
assert!(valid_table_option(WRITE_BUFFER_SIZE_KEY));
assert!(valid_table_option(STORAGE_KEY));
assert!(!valid_table_option("foo"));
assert!(validate_table_option(FILE_TABLE_LOCATION_KEY));
assert!(validate_table_option(FILE_TABLE_FORMAT_KEY));
assert!(validate_table_option(FILE_TABLE_PATTERN_KEY));
assert!(validate_table_option(TTL_KEY));
assert!(validate_table_option(REGIONS_KEY));
assert!(validate_table_option(WRITE_BUFFER_SIZE_KEY));
assert!(validate_table_option(STORAGE_KEY));
assert!(!validate_table_option("foo"));
}
#[test]

View File

@@ -17,8 +17,9 @@ use std::sync::Arc;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::ext::BoxedError;
use common_recordbatch::adapter::RecordBatchMetrics;
use common_recordbatch::error::Result as RecordBatchResult;
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use datafusion::arrow::record_batch::RecordBatch as DfRecordBatch;
use datatypes::arrow::array::UInt32Array;
use datatypes::data_type::ConcreteDataType;
@@ -123,6 +124,14 @@ impl RecordBatchStream for NumbersStream {
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
fn metrics(&self) -> Option<RecordBatchMetrics> {
None
}
}
impl Stream for NumbersStream {

View File

@@ -23,7 +23,7 @@ use common_query::error::Result as QueryResult;
use common_query::physical_plan::{Partitioning, PhysicalPlan, PhysicalPlanRef};
use common_recordbatch::adapter::RecordBatchMetrics;
use common_recordbatch::error::Result as RecordBatchResult;
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use common_telemetry::tracing::Span;
use common_telemetry::tracing_context::TracingContext;
use datafusion::execution::context::TaskContext;
@@ -157,6 +157,10 @@ impl RecordBatchStream for StreamWithMetricWrapper {
fn metrics(&self) -> Option<RecordBatchMetrics> {
self.stream.metrics()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
self.stream.output_ordering()
}
}
#[cfg(test)]

View File

@@ -17,8 +17,9 @@ use std::sync::Arc;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::ext::BoxedError;
use common_recordbatch::adapter::RecordBatchMetrics;
use common_recordbatch::error::Result as RecordBatchResult;
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use datatypes::prelude::*;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::UInt32Vector;
@@ -165,6 +166,14 @@ impl RecordBatchStream for MemtableStream {
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn output_ordering(&self) -> Option<&[OrderOption]> {
None
}
fn metrics(&self) -> Option<RecordBatchMetrics> {
None
}
}
struct MemtableStream {

View File

@@ -18,6 +18,7 @@ common-macro = { workspace = true }
common-query = { workspace = true }
common-runtime = { workspace = true }
common-telemetry = { workspace = true }
common-time = { workspace = true }
datatypes = { workspace = true }
derive_builder = { workspace = true }
dotenv = "0.15"
@@ -39,13 +40,6 @@ sqlx = { version = "0.6", features = [
] }
[dev-dependencies]
dotenv = "0.15"
sqlx = { version = "0.6", features = [
"runtime-tokio-rustls",
"mysql",
"postgres",
"chrono",
] }
tokio = { workspace = true }
[[bin]]
@@ -54,3 +48,17 @@ path = "targets/fuzz_create_table.rs"
test = false
bench = false
doc = false
[[bin]]
name = "fuzz_insert"
path = "targets/fuzz_insert.rs"
test = false
bench = false
doc = false
[[bin]]
name = "fuzz_alter_table"
path = "targets/fuzz_alter_table.rs"
test = false
bench = false
doc = false

View File

@@ -14,13 +14,20 @@
use std::sync::Arc;
use common_query::AddColumnLocation;
use partition::partition::PartitionDef;
use rand::Rng;
use snafu::{ensure, OptionExt};
use crate::ir::{Column, CreateTableExpr, Ident};
use crate::error::{self, Result};
use crate::generator::Random;
use crate::ir::alter_expr::AlterTableOperation;
use crate::ir::{AlterTableExpr, Column, CreateTableExpr, Ident};
pub type TableContextRef = Arc<TableContext>;
/// TableContext stores table info.
#[derive(Debug, Clone)]
pub struct TableContext {
pub name: Ident,
pub columns: Vec<Column>,
@@ -48,3 +55,183 @@ impl From<&CreateTableExpr> for TableContext {
}
}
}
impl TableContext {
/// Applies the [AlterTableExpr].
pub fn alter(mut self, expr: AlterTableExpr) -> Result<TableContext> {
match expr.alter_options {
AlterTableOperation::AddColumn { column, location } => {
ensure!(
!self.columns.iter().any(|col| col.name == column.name),
error::UnexpectedSnafu {
violated: format!("Column {} exists", column.name),
}
);
match location {
Some(AddColumnLocation::First) => {
let mut columns = Vec::with_capacity(self.columns.len() + 1);
columns.push(column);
columns.extend(self.columns);
self.columns = columns;
}
Some(AddColumnLocation::After { column_name }) => {
let index = self
.columns
.iter()
// TODO(weny): find a better way?
.position(|col| col.name.to_string() == column_name)
.context(error::UnexpectedSnafu {
violated: format!("Column: {column_name} not found"),
})?;
self.columns.insert(index + 1, column);
}
None => self.columns.push(column),
}
// Re-generates the primary_keys
self.primary_keys = self
.columns
.iter()
.enumerate()
.flat_map(|(idx, col)| {
if col.is_primary_key() {
Some(idx)
} else {
None
}
})
.collect();
Ok(self)
}
AlterTableOperation::DropColumn { name } => {
self.columns.retain(|col| col.name != name);
// Re-generates the primary_keys
self.primary_keys = self
.columns
.iter()
.enumerate()
.flat_map(|(idx, col)| {
if col.is_primary_key() {
Some(idx)
} else {
None
}
})
.collect();
Ok(self)
}
AlterTableOperation::RenameTable { new_table_name } => {
ensure!(
new_table_name != self.name,
error::UnexpectedSnafu {
violated: "The new table name is equal the current name",
}
);
self.name = new_table_name;
Ok(self)
}
}
}
pub fn generate_unique_column_name<R: Rng>(
&self,
rng: &mut R,
generator: &dyn Random<Ident, R>,
) -> Ident {
let mut name = generator.gen(rng);
while self.columns.iter().any(|col| col.name.value == name.value) {
name = generator.gen(rng);
}
name
}
pub fn generate_unique_table_name<R: Rng>(
&self,
rng: &mut R,
generator: &dyn Random<Ident, R>,
) -> Ident {
let mut name = generator.gen(rng);
while self.name.value == name.value {
name = generator.gen(rng);
}
name
}
}
#[cfg(test)]
mod tests {
use common_query::AddColumnLocation;
use datatypes::data_type::ConcreteDataType;
use super::TableContext;
use crate::ir::alter_expr::AlterTableOperation;
use crate::ir::create_expr::ColumnOption;
use crate::ir::{AlterTableExpr, Column, Ident};
#[test]
fn test_table_context_alter() {
let table_ctx = TableContext {
name: "foo".into(),
columns: vec![],
partition: None,
primary_keys: vec![],
};
// Add a column
let expr = AlterTableExpr {
table_name: "foo".into(),
alter_options: AlterTableOperation::AddColumn {
column: Column {
name: "a".into(),
column_type: ConcreteDataType::timestamp_microsecond_datatype(),
options: vec![ColumnOption::PrimaryKey],
},
location: None,
},
};
let table_ctx = table_ctx.alter(expr).unwrap();
assert_eq!(table_ctx.columns[0].name, Ident::new("a"));
assert_eq!(table_ctx.primary_keys, vec![0]);
// Add a column at first
let expr = AlterTableExpr {
table_name: "foo".into(),
alter_options: AlterTableOperation::AddColumn {
column: Column {
name: "b".into(),
column_type: ConcreteDataType::timestamp_microsecond_datatype(),
options: vec![ColumnOption::PrimaryKey],
},
location: Some(AddColumnLocation::First),
},
};
let table_ctx = table_ctx.alter(expr).unwrap();
assert_eq!(table_ctx.columns[0].name, Ident::new("b"));
assert_eq!(table_ctx.primary_keys, vec![0, 1]);
// Add a column after "b"
let expr = AlterTableExpr {
table_name: "foo".into(),
alter_options: AlterTableOperation::AddColumn {
column: Column {
name: "c".into(),
column_type: ConcreteDataType::timestamp_microsecond_datatype(),
options: vec![ColumnOption::PrimaryKey],
},
location: Some(AddColumnLocation::After {
column_name: "b".into(),
}),
},
};
let table_ctx = table_ctx.alter(expr).unwrap();
assert_eq!(table_ctx.columns[1].name, Ident::new("c"));
assert_eq!(table_ctx.primary_keys, vec![0, 1, 2]);
// Drop the column "b"
let expr = AlterTableExpr {
table_name: "foo".into(),
alter_options: AlterTableOperation::DropColumn { name: "b".into() },
};
let table_ctx = table_ctx.alter(expr).unwrap();
assert_eq!(table_ctx.columns[1].name, Ident::new("a"));
assert_eq!(table_ctx.primary_keys, vec![0, 1]);
}
}

View File

@@ -46,4 +46,7 @@ pub enum Error {
error: sqlx::error::Error,
location: Location,
},
#[snafu(display("Failed to assert: {}", reason))]
Assert { reason: String, location: Location },
}

View File

@@ -15,6 +15,7 @@
use std::marker::PhantomData;
use common_query::AddColumnLocation;
use datatypes::data_type::ConcreteDataType;
use derive_builder::Builder;
use rand::Rng;
use snafu::ensure;
@@ -24,10 +25,38 @@ use crate::error::{self, Error, Result};
use crate::fake::WordGenerator;
use crate::generator::{ColumnOptionGenerator, ConcreteDataTypeGenerator, Generator, Random};
use crate::ir::alter_expr::{AlterTableExpr, AlterTableOperation};
use crate::ir::create_expr::ColumnOption;
use crate::ir::{
column_options_generator, droppable_columns, generate_columns, ColumnTypeGenerator, Ident,
droppable_columns, generate_columns, generate_random_value, ColumnTypeGenerator, Ident,
};
fn add_column_options_generator<R: Rng>(
rng: &mut R,
column_type: &ConcreteDataType,
) -> Vec<ColumnOption> {
// 0 -> NULL
// 1 -> DEFAULT VALUE
// 2 -> PRIMARY KEY + DEFAULT VALUE
let idx = rng.gen_range(0..3);
match idx {
0 => vec![ColumnOption::Null],
1 => {
vec![ColumnOption::DefaultValue(generate_random_value(
rng,
column_type,
None,
))]
}
2 => {
vec![
ColumnOption::PrimaryKey,
ColumnOption::DefaultValue(generate_random_value(rng, column_type, None)),
]
}
_ => unreachable!(),
}
}
/// Generates the [AlterTableOperation::AddColumn] of [AlterTableExpr].
#[derive(Builder)]
#[builder(pattern = "owned")]
@@ -37,7 +66,7 @@ pub struct AlterExprAddColumnGenerator<R: Rng + 'static> {
location: bool,
#[builder(default = "Box::new(WordGenerator)")]
name_generator: Box<dyn Random<Ident, R>>,
#[builder(default = "Box::new(column_options_generator)")]
#[builder(default = "Box::new(add_column_options_generator)")]
column_options_generator: ColumnOptionGenerator<R>,
#[builder(default = "Box::new(ColumnTypeGenerator)")]
column_type_generator: ConcreteDataTypeGenerator<R>,
@@ -65,7 +94,9 @@ impl<R: Rng + 'static> Generator<AlterTableExpr, R> for AlterExprAddColumnGenera
None
};
let name = self.name_generator.gen(rng);
let name = self
.table_ctx
.generate_unique_column_name(rng, self.name_generator.as_ref());
let column = generate_columns(
rng,
vec![name],
@@ -116,7 +147,9 @@ impl<R: Rng> Generator<AlterTableExpr, R> for AlterExprRenameGenerator<R> {
type Error = Error;
fn generate(&self, rng: &mut R) -> Result<AlterTableExpr> {
let new_table_name = self.name_generator.gen(rng);
let new_table_name = self
.table_ctx
.generate_unique_table_name(rng, self.name_generator.as_ref());
Ok(AlterTableExpr {
table_name: self.table_ctx.name.clone(),
alter_options: AlterTableOperation::RenameTable { new_table_name },
@@ -153,7 +186,7 @@ mod tests {
.generate(&mut rng)
.unwrap();
let serialized = serde_json::to_string(&expr).unwrap();
let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"AddColumn":{"column":{"name":{"value":"velit","quote_style":null},"column_type":{"Int32":{}},"options":[{"DefaultValue":{"Int32":853246610}}]},"location":null}}}"#;
let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"AddColumn":{"column":{"name":{"value":"velit","quote_style":null},"column_type":{"Int32":{}},"options":[{"DefaultValue":{"Int32":1606462472}}]},"location":null}}}"#;
assert_eq!(expected, serialized);
let expr = AlterExprRenameGeneratorBuilder::default()
@@ -163,7 +196,7 @@ mod tests {
.generate(&mut rng)
.unwrap();
let serialized = serde_json::to_string(&expr).unwrap();
let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"RenameTable":{"new_table_name":{"value":"iure","quote_style":null}}}}"#;
let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"RenameTable":{"new_table_name":{"value":"nihil","quote_style":null}}}}"#;
assert_eq!(expected, serialized);
let expr = AlterExprDropColumnGeneratorBuilder::default()
@@ -173,7 +206,7 @@ mod tests {
.generate(&mut rng)
.unwrap();
let serialized = serde_json::to_string(&expr).unwrap();
let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"DropColumn":{"name":{"value":"toTAm","quote_style":null}}}}"#;
let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"DropColumn":{"name":{"value":"cUmquE","quote_style":null}}}}"#;
assert_eq!(expected, serialized);
}
}

View File

@@ -189,10 +189,19 @@ impl<R: Rng + 'static> Generator<CreateTableExpr, R> for CreateTableExprGenerato
#[cfg(test)]
mod tests {
use datatypes::value::Value;
use rand::SeedableRng;
use super::*;
#[test]
fn test_float64() {
let value = Value::from(0.047318541668048164);
assert_eq!("0.047318541668048164", value.to_string());
let value: f64 = "0.047318541668048164".parse().unwrap();
assert_eq!("0.047318541668048164", value.to_string());
}
#[test]
fn test_create_table_expr_generator() {
let mut rng = rand::thread_rng();

View File

@@ -14,6 +14,7 @@
use std::marker::PhantomData;
use datatypes::value::Value;
use derive_builder::Builder;
use rand::seq::SliceRandom;
use rand::Rng;
@@ -22,7 +23,7 @@ use crate::context::TableContextRef;
use crate::error::{Error, Result};
use crate::fake::WordGenerator;
use crate::generator::{Generator, Random};
use crate::ir::insert_expr::InsertIntoExpr;
use crate::ir::insert_expr::{InsertIntoExpr, RowValue};
use crate::ir::{generate_random_value, Ident};
/// Generates [InsertIntoExpr].
@@ -41,30 +42,64 @@ pub struct InsertExprGenerator<R: Rng + 'static> {
impl<R: Rng + 'static> Generator<InsertIntoExpr, R> for InsertExprGenerator<R> {
type Error = Error;
/// Generates the [CreateTableExpr].
/// Generates the [InsertIntoExpr].
fn generate(&self, rng: &mut R) -> Result<InsertIntoExpr> {
let mut columns = self.table_ctx.columns.clone();
columns.shuffle(rng);
// Whether to omit all columns, i.e. INSERT INTO table_name VALUES (...)
let omit_column_list = rng.gen_bool(0.2);
let mut rows = Vec::with_capacity(self.rows);
let mut values_columns = vec![];
if omit_column_list {
// If omit column list, then all columns are required in the values list
values_columns = self.table_ctx.columns.clone();
} else {
for column in &self.table_ctx.columns {
let can_omit = column.is_nullable() || column.has_default_value();
// 50% chance to omit a column if it's not required
if !can_omit || rng.gen_bool(0.5) {
values_columns.push(column.clone());
}
}
values_columns.shuffle(rng);
// If all columns are omitted, pick a random column
if values_columns.is_empty() {
values_columns.push(self.table_ctx.columns.choose(rng).unwrap().clone());
}
}
let mut values_list = Vec::with_capacity(self.rows);
for _ in 0..self.rows {
let mut row = Vec::with_capacity(columns.len());
for column in &columns {
// TODO(weny): generates the special cases
row.push(generate_random_value(
let mut row = Vec::with_capacity(values_columns.len());
for column in &values_columns {
if column.is_nullable() && rng.gen_bool(0.2) {
row.push(RowValue::Value(Value::Null));
continue;
}
if column.has_default_value() && rng.gen_bool(0.2) {
row.push(RowValue::Default);
continue;
}
row.push(RowValue::Value(generate_random_value(
rng,
&column.column_type,
Some(self.word_generator.as_ref()),
));
)));
}
rows.push(row);
values_list.push(row);
}
Ok(InsertIntoExpr {
table_name: self.table_ctx.name.to_string(),
columns,
rows,
columns: if omit_column_list {
vec![]
} else {
values_columns
},
values_list,
})
}
}

View File

@@ -22,10 +22,13 @@ pub(crate) mod select_expr;
use core::fmt;
pub use alter_expr::AlterTableExpr;
use common_time::{Date, DateTime, Timestamp};
pub use create_expr::CreateTableExpr;
use datatypes::data_type::ConcreteDataType;
use datatypes::types::TimestampType;
use datatypes::value::Value;
use derive_builder::Builder;
pub use insert_expr::InsertIntoExpr;
use lazy_static::lazy_static;
use rand::seq::SliceRandom;
use rand::Rng;
@@ -91,14 +94,62 @@ pub fn generate_random_value<R: Rng>(
Some(random) => Value::from(random.gen(rng).value),
None => Value::from(rng.gen::<char>().to_string()),
},
ConcreteDataType::Date(_) => Value::from(rng.gen::<i32>()),
ConcreteDataType::DateTime(_) => Value::from(rng.gen::<i64>()),
&ConcreteDataType::Timestamp(_) => Value::from(rng.gen::<u64>()),
ConcreteDataType::Date(_) => generate_random_date(rng),
ConcreteDataType::DateTime(_) => generate_random_datetime(rng),
&ConcreteDataType::Timestamp(ts_type) => generate_random_timestamp(rng, ts_type),
_ => unimplemented!("unsupported type: {datatype}"),
}
}
fn generate_random_timestamp<R: Rng>(rng: &mut R, ts_type: TimestampType) -> Value {
let v = match ts_type {
TimestampType::Second(_) => {
let min = i64::from(Timestamp::MIN_SECOND);
let max = i64::from(Timestamp::MAX_SECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_second(value)
}
TimestampType::Millisecond(_) => {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_millisecond(value)
}
TimestampType::Microsecond(_) => {
let min = i64::from(Timestamp::MIN_MICROSECOND);
let max = i64::from(Timestamp::MAX_MICROSECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_microsecond(value)
}
TimestampType::Nanosecond(_) => {
let min = i64::from(Timestamp::MIN_NANOSECOND);
let max = i64::from(Timestamp::MAX_NANOSECOND);
let value = rng.gen_range(min..=max);
Timestamp::new_nanosecond(value)
}
};
Value::from(v)
}
fn generate_random_datetime<R: Rng>(rng: &mut R) -> Value {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.gen_range(min..=max);
let datetime = Timestamp::new_millisecond(value)
.to_chrono_datetime()
.unwrap();
Value::from(DateTime::from(datetime))
}
fn generate_random_date<R: Rng>(rng: &mut R) -> Value {
let min = i64::from(Timestamp::MIN_MILLISECOND);
let max = i64::from(Timestamp::MAX_MILLISECOND);
let value = rng.gen_range(min..=max);
let date = Timestamp::new_millisecond(value).to_chrono_date().unwrap();
Value::from(Date::from(date))
}
/// An identifier.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord, Hash)]
pub struct Ident {
@@ -181,6 +232,24 @@ impl Column {
.iter()
.any(|opt| opt == &ColumnOption::PrimaryKey)
}
/// Returns true if it's nullable.
pub fn is_nullable(&self) -> bool {
!self
.options
.iter()
.any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
}
// Returns true if it has default value.
pub fn has_default_value(&self) -> bool {
self.options.iter().any(|opt| {
matches!(
opt,
ColumnOption::DefaultValue(_) | ColumnOption::DefaultFn(_)
)
})
}
}
/// Returns droppable columns. i.e., non-primary key columns, non-ts columns.

View File

@@ -22,7 +22,7 @@ use serde::{Deserialize, Serialize};
use crate::ir::{Column, Ident};
// The column options
/// The column options
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
pub enum ColumnOption {
Null,

View File

@@ -12,14 +12,37 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Display;
use datatypes::value::Value;
use crate::ir::Column;
pub type RowValue = Vec<Value>;
pub struct InsertIntoExpr {
pub table_name: String,
pub columns: Vec<Column>,
pub rows: Vec<RowValue>,
pub values_list: Vec<RowValues>,
}
pub type RowValues = Vec<RowValue>;
pub enum RowValue {
Value(Value),
Default,
}
impl Display for RowValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
RowValue::Value(v) => match v {
Value::Null => write!(f, "NULL"),
v @ (Value::String(_)
| Value::Timestamp(_)
| Value::DateTime(_)
| Value::Date(_)) => write!(f, "'{}'", v),
v => write!(f, "{}", v),
},
RowValue::Default => write!(f, "DEFAULT"),
}
}
}

View File

@@ -22,6 +22,7 @@ pub mod generator;
pub mod ir;
pub mod translator;
pub mod utils;
pub mod validator;
#[cfg(test)]
pub mod test_utils;

View File

@@ -22,33 +22,42 @@ impl DslTranslator<InsertIntoExpr, String> for InsertIntoExprTranslator {
type Error = Error;
fn translate(&self, input: &InsertIntoExpr) -> Result<String> {
let columns = input
.columns
.iter()
.map(|c| c.name.to_string())
.collect::<Vec<_>>()
.join(", ")
.to_string();
Ok(format!(
"INSERT INTO {} ({})\nVALUES\n{};",
"INSERT INTO {} {} VALUES\n{};",
input.table_name,
columns,
Self::format_columns(input),
Self::format_values(input)
))
}
}
impl InsertIntoExprTranslator {
fn format_columns(input: &InsertIntoExpr) -> String {
if input.columns.is_empty() {
"".to_string()
} else {
let list = input
.columns
.iter()
.map(|c| c.name.to_string())
.collect::<Vec<_>>()
.join(", ")
.to_string();
format!("({})", list)
}
}
fn format_values(input: &InsertIntoExpr) -> String {
input
.rows
.values_list
.iter()
.map(|row| {
.map(|value| {
format!(
"({})",
row.iter()
.map(|v| format!("'{v}'"))
value
.iter()
.map(|v| v.to_string())
.collect::<Vec<_>>()
.join(", ")
)
@@ -64,7 +73,7 @@ mod tests {
use rand::SeedableRng;
use super::InsertIntoExprTranslator;
use super::*;
use crate::generator::insert_expr::InsertExprGeneratorBuilder;
use crate::generator::Generator;
use crate::test_utils;
@@ -84,10 +93,23 @@ mod tests {
let insert_expr = insert_expr_generator.generate(&mut rng).unwrap();
let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap();
let expected = r#"INSERT INTO test (host, idc, memory_util, ts, cpu_util, disk_util)
VALUES
('adipisci', 'debitis', '0.5495312687894465', '15292064470292927036', '0.9354265029131291', '0.8037816422279636'),
('ut', 'sequi', '0.8807117723618908', '14214208091261382505', '0.5240550121500691', '0.350785883750684');"#;
let expected = r#"INSERT INTO test (ts, host, cpu_util) VALUES
('+199601-11-07 21:32:56.695+0000', 'corrupti', 0.051130243193075464),
('+40822-03-25 02:17:34.328+0000', NULL, 0.6552502332327004);"#;
assert_eq!(output, expected);
let insert_expr = insert_expr_generator.generate(&mut rng).unwrap();
let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap();
let expected = r#"INSERT INTO test (cpu_util, disk_util, ts) VALUES
(0.7074194466620976, 0.661288102315126, '-47252-05-08 07:33:49.567+0000'),
(0.8266101224213618, 0.7947724277743285, '-224292-12-07 02:51:53.371+0000');"#;
assert_eq!(output, expected);
let insert_expr = insert_expr_generator.generate(&mut rng).unwrap();
let output = InsertIntoExprTranslator.translate(&insert_expr).unwrap();
let expected = r#"INSERT INTO test VALUES
('odio', NULL, 0.48809950435391647, 0.5228925709595407, 0.9091528874275897, '+241156-12-16 20:52:15.185+0000'),
('dignissimos', 'labore', NULL, 0.12983559048685023, 0.6362040919831425, '-30691-06-17 23:41:09.938+0000');"#;
assert_eq!(output, expected);
}
}

View File

@@ -12,10 +12,4 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod merge_tree_bench;
use criterion::criterion_main;
criterion_main! {
merge_tree_bench::benches
}
pub mod column;

View File

@@ -0,0 +1,291 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_telemetry::debug;
use datatypes::data_type::DataType;
use snafu::{ensure, ResultExt};
use sqlx::database::HasArguments;
use sqlx::{ColumnIndex, Database, Decode, Encode, Executor, IntoArguments, Type};
use crate::error::{self, Result};
use crate::ir::create_expr::ColumnOption;
use crate::ir::{Column, Ident};
#[derive(Debug, sqlx::FromRow)]
pub struct ColumnEntry {
pub table_schema: String,
pub table_name: String,
pub column_name: String,
pub data_type: String,
pub semantic_type: String,
pub column_default: Option<String>,
pub is_nullable: String,
}
fn is_nullable(str: &str) -> bool {
str.to_uppercase() == "YES"
}
enum SemanticType {
Timestamp,
Field,
Tag,
}
fn semantic_type(str: &str) -> Option<SemanticType> {
match str {
"TIMESTAMP" => Some(SemanticType::Timestamp),
"FIELD" => Some(SemanticType::Field),
"TAG" => Some(SemanticType::Tag),
_ => None,
}
}
impl PartialEq<Column> for ColumnEntry {
fn eq(&self, other: &Column) -> bool {
// Checks `table_name`
if other.name.value != self.column_name {
debug!(
"expected name: {}, got: {}",
other.name.value, self.column_name
);
return false;
}
// Checks `data_type`
if other.column_type.name() != self.data_type {
debug!(
"expected column_type: {}, got: {}",
other.column_type.name(),
self.data_type
);
return false;
}
// Checks `column_default`
match &self.column_default {
Some(value) => {
let default_value_opt = other.options.iter().find(|opt| {
matches!(
opt,
ColumnOption::DefaultFn(_) | ColumnOption::DefaultValue(_)
)
});
if default_value_opt.is_none() {
debug!("default value options is not found");
return false;
}
let default_value = match default_value_opt.unwrap() {
ColumnOption::DefaultValue(v) => v.to_string(),
ColumnOption::DefaultFn(f) => f.to_string(),
_ => unreachable!(),
};
if &default_value != value {
debug!("expected default value: {default_value}, got: {value}");
return false;
}
}
None => {
if other.options.iter().any(|opt| {
matches!(
opt,
ColumnOption::DefaultFn(_) | ColumnOption::DefaultValue(_)
)
}) {
return false;
}
}
};
// Checks `is_nullable`
if is_nullable(&self.is_nullable) {
// Null is the default value. Therefore, we only ensure there is no `ColumnOption::NotNull` option.
if other
.options
.iter()
.any(|opt| matches!(opt, ColumnOption::NotNull))
{
debug!("ColumnOption::NotNull is found");
return false;
}
} else {
// `ColumnOption::TimeIndex` imply means the field is not nullable.
if !other
.options
.iter()
.any(|opt| matches!(opt, ColumnOption::NotNull | ColumnOption::TimeIndex))
{
debug!("ColumnOption::NotNull or ColumnOption::TimeIndex is not found");
return false;
}
}
//TODO: Checks `semantic_type`
match semantic_type(&self.semantic_type) {
Some(SemanticType::Tag) => {
if !other
.options
.iter()
.any(|opt| matches!(opt, ColumnOption::PrimaryKey))
{
debug!("ColumnOption::PrimaryKey is not found");
return false;
}
}
Some(SemanticType::Field) => {
if other
.options
.iter()
.any(|opt| matches!(opt, ColumnOption::PrimaryKey | ColumnOption::TimeIndex))
{
debug!("unexpected ColumnOption::PrimaryKey or ColumnOption::TimeIndex");
return false;
}
}
Some(SemanticType::Timestamp) => {
if !other
.options
.iter()
.any(|opt| matches!(opt, ColumnOption::TimeIndex))
{
debug!("ColumnOption::TimeIndex is not found");
return false;
}
}
None => {
debug!("unknown semantic type: {}", self.semantic_type);
return false;
}
};
true
}
}
/// Asserts [&[ColumnEntry]] is equal to [&[Column]]
pub fn assert_eq(fetched_columns: &[ColumnEntry], columns: &[Column]) -> Result<()> {
ensure!(
columns.len() == fetched_columns.len(),
error::AssertSnafu {
reason: format!(
"Expected columns length: {}, got: {}",
columns.len(),
fetched_columns.len(),
)
}
);
for (idx, fetched) in fetched_columns.iter().enumerate() {
ensure!(
fetched == &columns[idx],
error::AssertSnafu {
reason: format!(
"ColumnEntry {fetched:?} is not equal to Column {:?}",
columns[idx]
)
}
);
}
Ok(())
}
/// Returns all [ColumnEntry] of the `table_name` from `information_schema`.
pub async fn fetch_columns<'a, DB, E>(
e: E,
schema_name: Ident,
table_name: Ident,
) -> Result<Vec<ColumnEntry>>
where
DB: Database,
<DB as HasArguments<'a>>::Arguments: IntoArguments<'a, DB>,
for<'c> E: 'a + Executor<'c, Database = DB>,
for<'c> String: Decode<'c, DB> + Type<DB>,
for<'c> String: Encode<'c, DB> + Type<DB>,
for<'c> &'c str: ColumnIndex<<DB as Database>::Row>,
{
let sql = "SELECT * FROM information_schema.columns WHERE table_schema = ? AND table_name = ?";
sqlx::query_as::<_, ColumnEntry>(sql)
.bind(schema_name.value.to_string())
.bind(table_name.value.to_string())
.fetch_all(e)
.await
.context(error::ExecuteQuerySnafu { sql })
}
#[cfg(test)]
mod tests {
use datatypes::data_type::{ConcreteDataType, DataType};
use datatypes::value::Value;
use super::ColumnEntry;
use crate::ir::create_expr::ColumnOption;
use crate::ir::{Column, Ident};
#[test]
fn test_column_eq() {
common_telemetry::init_default_ut_logging();
let column_entry = ColumnEntry {
table_schema: String::new(),
table_name: String::new(),
column_name: "test".to_string(),
data_type: ConcreteDataType::int8_datatype().name(),
semantic_type: "FIELD".to_string(),
column_default: None,
is_nullable: "Yes".to_string(),
};
// Naive
let column = Column {
name: Ident::new("test"),
column_type: ConcreteDataType::int8_datatype(),
options: vec![],
};
assert!(column_entry == column);
// With quote
let column = Column {
name: Ident::with_quote('\'', "test"),
column_type: ConcreteDataType::int8_datatype(),
options: vec![],
};
assert!(column_entry == column);
// With default value
let column_entry = ColumnEntry {
table_schema: String::new(),
table_name: String::new(),
column_name: "test".to_string(),
data_type: ConcreteDataType::int8_datatype().to_string(),
semantic_type: "FIELD".to_string(),
column_default: Some("1".to_string()),
is_nullable: "Yes".to_string(),
};
let column = Column {
name: Ident::with_quote('\'', "test"),
column_type: ConcreteDataType::int8_datatype(),
options: vec![ColumnOption::DefaultValue(Value::from(1))],
};
assert!(column_entry == column);
// With default function
let column_entry = ColumnEntry {
table_schema: String::new(),
table_name: String::new(),
column_name: "test".to_string(),
data_type: ConcreteDataType::int8_datatype().to_string(),
semantic_type: "FIELD".to_string(),
column_default: Some("Hello()".to_string()),
is_nullable: "Yes".to_string(),
};
let column = Column {
name: Ident::with_quote('\'', "test"),
column_type: ConcreteDataType::int8_datatype(),
options: vec![ColumnOption::DefaultFn("Hello()".to_string())],
};
assert!(column_entry == column);
}
}

View File

@@ -0,0 +1,185 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![no_main]
use std::sync::Arc;
use arbitrary::{Arbitrary, Unstructured};
use common_telemetry::info;
use libfuzzer_sys::fuzz_target;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaChaRng;
use snafu::ResultExt;
use sqlx::{MySql, Pool};
use tests_fuzz::context::{TableContext, TableContextRef};
use tests_fuzz::error::{self, Result};
use tests_fuzz::fake::{
merge_two_word_map_fn, random_capitalize_map, uppercase_and_keyword_backtick_map,
MappedGenerator, WordGenerator,
};
use tests_fuzz::generator::alter_expr::{
AlterExprAddColumnGeneratorBuilder, AlterExprDropColumnGeneratorBuilder,
AlterExprRenameGeneratorBuilder,
};
use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder;
use tests_fuzz::generator::Generator;
use tests_fuzz::ir::{droppable_columns, AlterTableExpr, CreateTableExpr};
use tests_fuzz::translator::mysql::alter_expr::AlterTableExprTranslator;
use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator;
use tests_fuzz::translator::DslTranslator;
use tests_fuzz::utils::{init_greptime_connections, Connections};
use tests_fuzz::validator;
struct FuzzContext {
greptime: Pool<MySql>,
}
impl FuzzContext {
async fn close(self) {
self.greptime.close().await;
}
}
#[derive(Clone, Debug)]
struct FuzzInput {
seed: u64,
actions: usize,
}
fn generate_create_table_expr<R: Rng + 'static>(rng: &mut R) -> Result<CreateTableExpr> {
let columns = rng.gen_range(2..30);
let create_table_generator = CreateTableExprGeneratorBuilder::default()
.name_generator(Box::new(MappedGenerator::new(
WordGenerator,
merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
)))
.columns(columns)
.engine("mito")
.build()
.unwrap();
create_table_generator.generate(rng)
}
fn generate_alter_table_expr<R: Rng + 'static>(
table_ctx: TableContextRef,
rng: &mut R,
) -> Result<AlterTableExpr> {
let rename = rng.gen_bool(0.2);
if rename {
let expr_generator = AlterExprRenameGeneratorBuilder::default()
.table_ctx(table_ctx)
.name_generator(Box::new(MappedGenerator::new(
WordGenerator,
merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
)))
.build()
.unwrap();
expr_generator.generate(rng)
} else {
let drop_column = rng.gen_bool(0.5) && !droppable_columns(&table_ctx.columns).is_empty();
if drop_column {
let expr_generator = AlterExprDropColumnGeneratorBuilder::default()
.table_ctx(table_ctx)
.build()
.unwrap();
expr_generator.generate(rng)
} else {
let location = rng.gen_bool(0.5);
let expr_generator = AlterExprAddColumnGeneratorBuilder::default()
.table_ctx(table_ctx)
.location(location)
.build()
.unwrap();
expr_generator.generate(rng)
}
}
}
impl Arbitrary<'_> for FuzzInput {
fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result<Self> {
let seed = u.int_in_range(u64::MIN..=u64::MAX)?;
let mut rng = ChaChaRng::seed_from_u64(seed);
let actions = rng.gen_range(1..256);
Ok(FuzzInput { seed, actions })
}
}
async fn execute_alter_table(ctx: FuzzContext, input: FuzzInput) -> Result<()> {
info!("input: {input:?}");
let mut rng = ChaChaRng::seed_from_u64(input.seed);
// Create table
let expr = generate_create_table_expr(&mut rng).unwrap();
let translator = CreateTableExprTranslator;
let sql = translator.translate(&expr)?;
let result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql: &sql })?;
info!("Create table: {sql}, result: {result:?}");
// Alter table actions
let mut table_ctx = Arc::new(TableContext::from(&expr));
for _ in 0..input.actions {
let expr = generate_alter_table_expr(table_ctx.clone(), &mut rng).unwrap();
let translator = AlterTableExprTranslator;
let sql = translator.translate(&expr)?;
let result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql: &sql })?;
info!("Alter table: {sql}, result: {result:?}");
// Applies changes
table_ctx = Arc::new(Arc::unwrap_or_clone(table_ctx).alter(expr).unwrap());
// Validates columns
let mut column_entries = validator::column::fetch_columns(
&ctx.greptime,
"public".into(),
table_ctx.name.clone(),
)
.await?;
column_entries.sort_by(|a, b| a.column_name.cmp(&b.column_name));
let mut columns = table_ctx.columns.clone();
columns.sort_by(|a, b| a.name.value.cmp(&b.name.value));
validator::column::assert_eq(&column_entries, &columns)?;
}
// Cleans up
let table_name = table_ctx.name.clone();
let sql = format!("DROP TABLE {}", table_name);
let result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql })?;
info!("Drop table: {}, result: {result:?}", table_name);
ctx.close().await;
Ok(())
}
fuzz_target!(|input: FuzzInput| {
common_telemetry::init_default_ut_logging();
common_runtime::block_on_write(async {
let Connections { mysql } = init_greptime_connections().await;
let ctx = FuzzContext {
greptime: mysql.expect("mysql connection init must be succeed"),
};
execute_alter_table(ctx, input)
.await
.unwrap_or_else(|err| panic!("fuzz test must be succeed: {err:?}"));
})
});

View File

@@ -32,6 +32,7 @@ use tests_fuzz::ir::CreateTableExpr;
use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator;
use tests_fuzz::translator::DslTranslator;
use tests_fuzz::utils::{init_greptime_connections, Connections};
use tests_fuzz::validator;
struct FuzzContext {
greptime: Pool<MySql>,
@@ -52,7 +53,8 @@ struct FuzzInput {
impl Arbitrary<'_> for FuzzInput {
fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result<Self> {
let seed = u.int_in_range(u64::MIN..=u64::MAX)?;
let columns = u.int_in_range(2..=10)?;
let mut rng = ChaChaRng::seed_from_u64(seed);
let columns = rng.gen_range(2..30);
Ok(FuzzInput { columns, seed })
}
}
@@ -64,7 +66,7 @@ fn generate_expr(input: FuzzInput) -> Result<CreateTableExpr> {
WordGenerator,
merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
)))
.columns(rng.gen_range(1..input.columns))
.columns(input.columns)
.engine("mito")
.build()
.unwrap();
@@ -82,6 +84,15 @@ async fn execute_create_table(ctx: FuzzContext, input: FuzzInput) -> Result<()>
.context(error::ExecuteQuerySnafu { sql: &sql })?;
info!("Create table: {sql}, result: {result:?}");
// Validates columns
let mut column_entries =
validator::column::fetch_columns(&ctx.greptime, "public".into(), expr.table_name.clone())
.await?;
column_entries.sort_by(|a, b| a.column_name.cmp(&b.column_name));
let mut columns = expr.columns.clone();
columns.sort_by(|a, b| a.name.value.cmp(&b.name.value));
validator::column::assert_eq(&column_entries, &columns)?;
// Cleans up
let sql = format!("DROP TABLE {}", expr.table_name);
let result = sqlx::query(&sql)

View File

@@ -0,0 +1,163 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![no_main]
use std::sync::Arc;
use common_telemetry::info;
use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured};
use libfuzzer_sys::fuzz_target;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaChaRng;
use snafu::{ensure, ResultExt};
use sqlx::{Executor, MySql, Pool};
use tests_fuzz::context::{TableContext, TableContextRef};
use tests_fuzz::error::{self, Result};
use tests_fuzz::fake::{
merge_two_word_map_fn, random_capitalize_map, uppercase_and_keyword_backtick_map,
MappedGenerator, WordGenerator,
};
use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder;
use tests_fuzz::generator::insert_expr::InsertExprGeneratorBuilder;
use tests_fuzz::generator::Generator;
use tests_fuzz::ir::{CreateTableExpr, InsertIntoExpr};
use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator;
use tests_fuzz::translator::mysql::insert_expr::InsertIntoExprTranslator;
use tests_fuzz::translator::DslTranslator;
use tests_fuzz::utils::{init_greptime_connections, Connections};
struct FuzzContext {
greptime: Pool<MySql>,
}
impl FuzzContext {
async fn close(self) {
self.greptime.close().await;
}
}
#[derive(Copy, Clone, Debug)]
struct FuzzInput {
seed: u64,
columns: usize,
rows: usize,
}
impl Arbitrary<'_> for FuzzInput {
fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result<Self> {
let seed = u.int_in_range(u64::MIN..=u64::MAX)?;
let mut rng = ChaChaRng::seed_from_u64(seed);
let columns = rng.gen_range(2..30);
let rows = rng.gen_range(1..4096);
Ok(FuzzInput {
columns,
rows,
seed,
})
}
}
fn generate_create_expr<R: Rng + 'static>(
input: FuzzInput,
rng: &mut R,
) -> Result<CreateTableExpr> {
let create_table_generator = CreateTableExprGeneratorBuilder::default()
.name_generator(Box::new(MappedGenerator::new(
WordGenerator,
merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
)))
.columns(input.columns)
.engine("mito")
.build()
.unwrap();
create_table_generator.generate(rng)
}
fn generate_insert_expr<R: Rng + 'static>(
input: FuzzInput,
rng: &mut R,
table_ctx: TableContextRef,
) -> Result<InsertIntoExpr> {
let insert_generator = InsertExprGeneratorBuilder::default()
.table_ctx(table_ctx)
.rows(input.rows)
.build()
.unwrap();
insert_generator.generate(rng)
}
async fn execute_insert(ctx: FuzzContext, input: FuzzInput) -> Result<()> {
info!("input: {input:?}");
let mut rng = ChaChaRng::seed_from_u64(input.seed);
let create_expr = generate_create_expr(input, &mut rng)?;
let translator = CreateTableExprTranslator;
let sql = translator.translate(&create_expr)?;
let _result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql: &sql })?;
let table_ctx = Arc::new(TableContext::from(&create_expr));
let insert_expr = generate_insert_expr(input, &mut rng, table_ctx)?;
let translator = InsertIntoExprTranslator;
let sql = translator.translate(&insert_expr)?;
let result = ctx
.greptime
// unprepared query, see <https://github.com/GreptimeTeam/greptimedb/issues/3500>
.execute(sql.as_str())
.await
.context(error::ExecuteQuerySnafu { sql: &sql })?;
ensure!(
result.rows_affected() == input.rows as u64,
error::AssertSnafu {
reason: format!(
"expected rows affected: {}, actual: {}",
input.rows,
result.rows_affected(),
)
}
);
// TODO: Validate inserted rows
// Cleans up
let sql = format!("DROP TABLE {}", create_expr.table_name);
let result = sqlx::query(&sql)
.execute(&ctx.greptime)
.await
.context(error::ExecuteQuerySnafu { sql })?;
info!(
"Drop table: {}\n\nResult: {result:?}\n\n",
create_expr.table_name
);
ctx.close().await;
Ok(())
}
fuzz_target!(|input: FuzzInput| {
common_telemetry::init_default_ut_logging();
common_runtime::block_on_write(async {
let Connections { mysql } = init_greptime_connections().await;
let ctx = FuzzContext {
greptime: mysql.expect("mysql connection init must be succeed"),
};
execute_insert(ctx, input)
.await
.unwrap_or_else(|err| panic!("fuzz test must be succeed: {err:?}"));
})
});

View File

@@ -1,7 +1,7 @@
version: '3.8'
services:
etcd:
image: public.ecr.aws/bitnami/etcd:3.5
image: ghcr.io/zcube/bitnami-compat/etcd:3.5
ports:
- "2379:2379"
- "2380:2380"

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(assert_matches)]
pub mod cluster;
mod grpc;
mod influxdb;

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::assert_matches::assert_matches;
use std::env;
use std::sync::Arc;
@@ -160,7 +161,7 @@ PARTITION ON COLUMNS (n) (
}
#[apply(both_instances_cases)]
async fn test_validate_external_table_options(instance: Arc<dyn MockInstance>) {
async fn test_extra_external_table_options(instance: Arc<dyn MockInstance>) {
let frontend = instance.frontend();
let format = "json";
let location = find_testing_resource("/tests/data/json/various_type.json");
@@ -639,7 +640,7 @@ async fn test_execute_external_create_without_ts(instance: Arc<dyn MockInstance>
),
)
.await;
assert!(matches!(result, Err(Error::TableOperation { .. })));
assert_matches!(result, Err(Error::ParseSql { .. }));
}
#[apply(both_instances_cases)]

View File

@@ -789,7 +789,7 @@ intermediate_path = ""
[datanode.region_engine.mito.memtable]
type = "experimental"
index_max_keys_per_shard = 8192
data_freeze_threshold = 32768
data_freeze_threshold = 131072
dedup = true
fork_dictionary_bytes = "1GiB"

View File

@@ -155,6 +155,23 @@ CREATE TABLE test_like_2 LIKE test_like_1;
Error: 4000(TableAlreadyExists), Table already exists: `greptime.public.test_like_2`
CREATE TABLE `ExcePTuRi`(
non TIMESTAMP(6) TIME INDEX,
`iUSTO` DOUBLE DEFAULT 0.047318541668048164
)
ENGINE=mito;
Affected Rows: 0
DESC table `ExcePTuRi`;
+--------+----------------------+-----+------+----------------------+---------------+
| Column | Type | Key | Null | Default | Semantic Type |
+--------+----------------------+-----+------+----------------------+---------------+
| non | TimestampMicrosecond | PRI | NO | | TIMESTAMP |
| iUSTO | Float64 | | YES | 0.047318541668048164 | FIELD |
+--------+----------------------+-----+------+----------------------+---------------+
DESC TABLE test_like_1;
+--------+----------------------+-----+------+---------+---------------+
@@ -183,3 +200,7 @@ DROP TABLE test_like_2;
Affected Rows: 0
DROP table `ExcePTuRi`;
Affected Rows: 0

View File

@@ -64,6 +64,14 @@ CREATE TABLE test_like_2 LIKE test_like_1;
CREATE TABLE test_like_2 LIKE test_like_1;
CREATE TABLE `ExcePTuRi`(
non TIMESTAMP(6) TIME INDEX,
`iUSTO` DOUBLE DEFAULT 0.047318541668048164
)
ENGINE=mito;
DESC table `ExcePTuRi`;
DESC TABLE test_like_1;
DESC TABLE test_like_2;
@@ -71,3 +79,5 @@ DESC TABLE test_like_2;
DROP TABLE test_like_1;
DROP TABLE test_like_2;
DROP table `ExcePTuRi`;

View File

@@ -0,0 +1,84 @@
CREATE TABLE not_supported_table_options_keys (
id INT UNSIGNED,
host STRING,
cpu DOUBLE,
disk FLOAT,
ts TIMESTAMP NOT NULL DEFAULT current_timestamp(),
TIME INDEX (ts),
PRIMARY KEY (id, host)
)
PARTITION ON COLUMNS (id) (
id < 5,
id >= 5 AND id < 9,
id >= 9
)
ENGINE=mito
WITH(
foo = 123,
ttl = '7d',
write_buffer_size = 1024
);
Error: 1004(InvalidArguments), Unrecognized table option key: foo
create table if not exists test_opts(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
)
engine=mito
with(regions=1, ttl='7d', 'compaction.type'='twcs', 'compaction.twcs.time_window'='1d');
Affected Rows: 0
drop table test_opts;
Affected Rows: 0
create table if not exists test_opts(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
)
engine=mito
with('regions'=1, 'ttl'='7d', 'compaction.type'='twcs', 'compaction.twcs.time_window'='1d');
Affected Rows: 0
drop table test_opts;
Affected Rows: 0
create table if not exists test_mito_options(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
)
engine=mito
with(
'regions'=1,
'ttl'='7d',
'compaction.type'='twcs',
'compaction.twcs.max_active_window_files'='8',
'compaction.twcs.max_inactive_window_files'='2',
'compaction.twcs.time_window'='1d',
'index.inverted_index.ignore_column_ids'='1,2,3',
'index.inverted_index.segment_row_count'='512',
'wal_options'='{"wal.provider":"raft_engine"}',
);
Affected Rows: 0
drop table test_mito_options;
Affected Rows: 0

View File

@@ -0,0 +1,69 @@
CREATE TABLE not_supported_table_options_keys (
id INT UNSIGNED,
host STRING,
cpu DOUBLE,
disk FLOAT,
ts TIMESTAMP NOT NULL DEFAULT current_timestamp(),
TIME INDEX (ts),
PRIMARY KEY (id, host)
)
PARTITION ON COLUMNS (id) (
id < 5,
id >= 5 AND id < 9,
id >= 9
)
ENGINE=mito
WITH(
foo = 123,
ttl = '7d',
write_buffer_size = 1024
);
create table if not exists test_opts(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
)
engine=mito
with(regions=1, ttl='7d', 'compaction.type'='twcs', 'compaction.twcs.time_window'='1d');
drop table test_opts;
create table if not exists test_opts(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
)
engine=mito
with('regions'=1, 'ttl'='7d', 'compaction.type'='twcs', 'compaction.twcs.time_window'='1d');
drop table test_opts;
create table if not exists test_mito_options(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
)
engine=mito
with(
'regions'=1,
'ttl'='7d',
'compaction.type'='twcs',
'compaction.twcs.max_active_window_files'='8',
'compaction.twcs.max_inactive_window_files'='2',
'compaction.twcs.time_window'='1d',
'index.inverted_index.ignore_column_ids'='1,2,3',
'index.inverted_index.segment_row_count'='512',
'wal_options'='{"wal.provider":"raft_engine"}',
);
drop table test_mito_options;

View File

@@ -20,3 +20,39 @@ select GREATEST('2000-02-11'::Date, '2020-12-30'::Date);
| 2020-12-30 |
+-------------------------------------------------+
select to_timezone('2022-09-20T14:16:43.012345+08:00', 'Europe/Berlin');
+-----------------------------------------------------------------------------+
| to_timezone(Utf8("2022-09-20T14:16:43.012345+08:00"),Utf8("Europe/Berlin")) |
+-----------------------------------------------------------------------------+
| 2022-09-20 08:16:43.012345 |
+-----------------------------------------------------------------------------+
select to_timezone('2022-09-20T14:16:43.012345+08:00'::Timestamp, 'Europe/Berlin');
+-----------------------------------------------------------------------------+
| to_timezone(Utf8("2022-09-20T14:16:43.012345+08:00"),Utf8("Europe/Berlin")) |
+-----------------------------------------------------------------------------+
| 2022-09-20 08:16:43.012 |
+-----------------------------------------------------------------------------+
select to_timezone('2024-03-29T14:16:43.012345Z', 'Asia/Shanghai');
+------------------------------------------------------------------------+
| to_timezone(Utf8("2024-03-29T14:16:43.012345Z"),Utf8("Asia/Shanghai")) |
+------------------------------------------------------------------------+
| 2024-03-29 22:16:43.012345 |
+------------------------------------------------------------------------+
select to_timezone('2024-03-29T14:16:43.012345Z'::Timestamp, 'Asia/Shanghai');
+------------------------------------------------------------------------+
| to_timezone(Utf8("2024-03-29T14:16:43.012345Z"),Utf8("Asia/Shanghai")) |
+------------------------------------------------------------------------+
| 2024-03-29 22:16:43.012 |
+------------------------------------------------------------------------+
select to_timezone(1709992225, 'Asia/Shanghai');
Error: 3001(EngineExecuteQuery), DataFusion error: Error during planning: Coercion from [Int64, Utf8] to the signature OneOf([Exact([Timestamp(Second, None), Utf8]), Exact([Timestamp(Millisecond, None), Utf8]), Exact([Timestamp(Microsecond, None), Utf8]), Exact([Timestamp(Nanosecond, None), Utf8])]) failed.

View File

@@ -3,5 +3,11 @@
select current_time();
select GREATEST('1999-01-30', '2023-03-01');
select GREATEST('2000-02-11'::Date, '2020-12-30'::Date);
select to_timezone('2022-09-20T14:16:43.012345+08:00', 'Europe/Berlin');
select to_timezone('2022-09-20T14:16:43.012345+08:00'::Timestamp, 'Europe/Berlin');
select to_timezone('2024-03-29T14:16:43.012345Z', 'Asia/Shanghai');
select to_timezone('2024-03-29T14:16:43.012345Z'::Timestamp, 'Asia/Shanghai');
select to_timezone(1709992225, 'Asia/Shanghai');

View File

@@ -23,47 +23,47 @@ Affected Rows: 10
-- Test by calculate
SELECT ts, length(host), max(val) RANGE '5s' FROM host ALIGN '20s' BY (length(host)) ORDER BY ts;
+---------------------+-----------------------------+----------------------------------+
| ts | character_length(host.host) | MAX(host.val) RANGE 5s FILL NULL |
+---------------------+-----------------------------+----------------------------------+
| 1970-01-01T00:00:00 | 5 | 3 |
| 1970-01-01T00:00:20 | 5 | 5 |
+---------------------+-----------------------------+----------------------------------+
+---------------------+-----------------------------+------------------------+
| ts | character_length(host.host) | MAX(host.val) RANGE 5s |
+---------------------+-----------------------------+------------------------+
| 1970-01-01T00:00:00 | 5 | 3 |
| 1970-01-01T00:00:20 | 5 | 5 |
+---------------------+-----------------------------+------------------------+
SELECT ts, max(val) RANGE '5s' FROM host ALIGN '20s' BY (2) ORDER BY ts;
+---------------------+----------------------------------+
| ts | MAX(host.val) RANGE 5s FILL NULL |
+---------------------+----------------------------------+
| 1970-01-01T00:00:00 | 3 |
| 1970-01-01T00:00:20 | 5 |
+---------------------+----------------------------------+
+---------------------+------------------------+
| ts | MAX(host.val) RANGE 5s |
+---------------------+------------------------+
| 1970-01-01T00:00:00 | 3 |
| 1970-01-01T00:00:20 | 5 |
+---------------------+------------------------+
-- The user explicitly specifies that the aggregation key is empty. In this case, there is no aggregation key. All data will be aggregated into a group.
-- Implement by rewrite `BY()` to `BY(1)` automatically through sqlparser. They are semantically equivalent.
SELECT ts, max(val) RANGE '5s' FROM host ALIGN '20s' BY () ORDER BY ts;
+---------------------+----------------------------------+
| ts | MAX(host.val) RANGE 5s FILL NULL |
+---------------------+----------------------------------+
| 1970-01-01T00:00:00 | 3 |
| 1970-01-01T00:00:20 | 5 |
+---------------------+----------------------------------+
+---------------------+------------------------+
| ts | MAX(host.val) RANGE 5s |
+---------------------+------------------------+
| 1970-01-01T00:00:00 | 3 |
| 1970-01-01T00:00:20 | 5 |
+---------------------+------------------------+
SELECT ts, length(host)::INT64 + 2, max(val) RANGE '5s' FROM host ALIGN '20s' BY (length(host)::INT64 + 2) ORDER BY ts;
+---------------------+----------------------------------------+----------------------------------+
| ts | character_length(host.host) + Int64(2) | MAX(host.val) RANGE 5s FILL NULL |
+---------------------+----------------------------------------+----------------------------------+
| 1970-01-01T00:00:00 | 7 | 3 |
| 1970-01-01T00:00:20 | 7 | 5 |
+---------------------+----------------------------------------+----------------------------------+
+---------------------+----------------------------------------+------------------------+
| ts | character_length(host.host) + Int64(2) | MAX(host.val) RANGE 5s |
+---------------------+----------------------------------------+------------------------+
| 1970-01-01T00:00:00 | 7 | 3 |
| 1970-01-01T00:00:20 | 7 | 5 |
+---------------------+----------------------------------------+------------------------+
-- Test error
-- project non-aggregation key
SELECT ts, host, max(val) RANGE '5s' FROM host ALIGN '20s' BY () ORDER BY ts;
Error: 3001(EngineExecuteQuery), DataFusion error: No field named host.host. Valid fields are "MAX(host.val) RANGE 5s FILL NULL", host.ts, "Int64(1)".
Error: 3001(EngineExecuteQuery), DataFusion error: No field named host.host. Valid fields are "MAX(host.val) RANGE 5s", host.ts, "Int64(1)".
DROP TABLE host;
@@ -94,12 +94,12 @@ Affected Rows: 10
SELECT ts, max(val) RANGE '5s' FROM host ALIGN '20s' ORDER BY ts;
+---------------------+----------------------------------+
| ts | MAX(host.val) RANGE 5s FILL NULL |
+---------------------+----------------------------------+
| 1970-01-01T00:00:00 | 3 |
| 1970-01-01T00:00:20 | 5 |
+---------------------+----------------------------------+
+---------------------+------------------------+
| ts | MAX(host.val) RANGE 5s |
+---------------------+------------------------+
| 1970-01-01T00:00:00 | 3 |
| 1970-01-01T00:00:20 | 5 |
+---------------------+------------------------+
DROP TABLE host;

View File

@@ -23,151 +23,151 @@ Affected Rows: 10
-- Test range expr calculate
SELECT ts, host, covar(val::DOUBLE, val::DOUBLE) RANGE '20s' FROM host ALIGN '10s' ORDER BY host, ts;
+---------------------+-------+---------------------------------------------------+
| ts | host | COVARIANCE(host.val,host.val) RANGE 20s FILL NULL |
+---------------------+-------+---------------------------------------------------+
| 1969-12-31T23:59:50 | host1 | |
| 1970-01-01T00:00:00 | host1 | 0.5 |
| 1970-01-01T00:00:10 | host1 | 0.5 |
| 1970-01-01T00:00:20 | host1 | |
| 1969-12-31T23:59:50 | host2 | |
| 1970-01-01T00:00:00 | host2 | 0.5 |
| 1970-01-01T00:00:10 | host2 | 0.5 |
| 1970-01-01T00:00:20 | host2 | |
+---------------------+-------+---------------------------------------------------+
+---------------------+-------+-----------------------------------------+
| ts | host | COVARIANCE(host.val,host.val) RANGE 20s |
+---------------------+-------+-----------------------------------------+
| 1969-12-31T23:59:50 | host1 | |
| 1970-01-01T00:00:00 | host1 | 0.5 |
| 1970-01-01T00:00:10 | host1 | 0.5 |
| 1970-01-01T00:00:20 | host1 | |
| 1969-12-31T23:59:50 | host2 | |
| 1970-01-01T00:00:00 | host2 | 0.5 |
| 1970-01-01T00:00:10 | host2 | 0.5 |
| 1970-01-01T00:00:20 | host2 | |
+---------------------+-------+-----------------------------------------+
SELECT ts, host, 2 * min(val) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+---------------------------------------------+
| ts | host | Int64(2) * MIN(host.val) RANGE 5s FILL NULL |
+---------------------+-------+---------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 2 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 4 |
| 1970-01-01T00:00:00 | host2 | 6 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 8 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 10 |
+---------------------+-------+---------------------------------------------+
+---------------------+-------+-----------------------------------+
| ts | host | Int64(2) * MIN(host.val) RANGE 5s |
+---------------------+-------+-----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 2 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 4 |
| 1970-01-01T00:00:00 | host2 | 6 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 8 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 10 |
+---------------------+-------+-----------------------------------+
SELECT ts, host, min(val * 2) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+---------------------------------------------+
| ts | host | MIN(host.val * Int64(2)) RANGE 5s FILL NULL |
+---------------------+-------+---------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 2 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 4 |
| 1970-01-01T00:00:00 | host2 | 6 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 8 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 10 |
+---------------------+-------+---------------------------------------------+
+---------------------+-------+-----------------------------------+
| ts | host | MIN(host.val * Int64(2)) RANGE 5s |
+---------------------+-------+-----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 2 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 4 |
| 1970-01-01T00:00:00 | host2 | 6 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 8 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 10 |
+---------------------+-------+-----------------------------------+
SELECT ts, host, min(val::DOUBLE) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 5s FILL NULL |
+---------------------+-------+----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0.0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1.0 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2.0 |
| 1970-01-01T00:00:00 | host2 | 3.0 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 4.0 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 5.0 |
+---------------------+-------+----------------------------------+
+---------------------+-------+------------------------+
| ts | host | MIN(host.val) RANGE 5s |
+---------------------+-------+------------------------+
| 1970-01-01T00:00:00 | host1 | 0.0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1.0 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2.0 |
| 1970-01-01T00:00:00 | host2 | 3.0 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 4.0 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 5.0 |
+---------------------+-------+------------------------+
SELECT ts, host, min(floor(val::DOUBLE)) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+-----------------------------------------+
| ts | host | MIN(floor(host.val)) RANGE 5s FILL NULL |
+---------------------+-------+-----------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0.0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1.0 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2.0 |
| 1970-01-01T00:00:00 | host2 | 3.0 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 4.0 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 5.0 |
+---------------------+-------+-----------------------------------------+
+---------------------+-------+-------------------------------+
| ts | host | MIN(floor(host.val)) RANGE 5s |
+---------------------+-------+-------------------------------+
| 1970-01-01T00:00:00 | host1 | 0.0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1.0 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2.0 |
| 1970-01-01T00:00:00 | host2 | 3.0 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 4.0 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 5.0 |
+---------------------+-------+-------------------------------+
SELECT ts, host, floor(min(val) RANGE '5s') FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+-----------------------------------------+
| ts | host | floor(MIN(host.val) RANGE 5s FILL NULL) |
+---------------------+-------+-----------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0.0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1.0 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2.0 |
| 1970-01-01T00:00:00 | host2 | 3.0 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 4.0 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 5.0 |
+---------------------+-------+-----------------------------------------+
+---------------------+-------+-------------------------------+
| ts | host | floor(MIN(host.val) RANGE 5s) |
+---------------------+-------+-------------------------------+
| 1970-01-01T00:00:00 | host1 | 0.0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1.0 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2.0 |
| 1970-01-01T00:00:00 | host2 | 3.0 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 4.0 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 5.0 |
+---------------------+-------+-------------------------------+
-- Test complex range expr calculate
SELECT ts, host, (min(val) + max(val)) RANGE '20s' + 1.0 FROM host ALIGN '10s' ORDER BY host, ts;
+---------------------+-------+------------------------------------------------------------------------------------+
| ts | host | MIN(host.val) RANGE 20s FILL NULL + MAX(host.val) RANGE 20s FILL NULL + Float64(1) |
+---------------------+-------+------------------------------------------------------------------------------------+
| 1969-12-31T23:59:50 | host1 | 1.0 |
| 1970-01-01T00:00:00 | host1 | 2.0 |
| 1970-01-01T00:00:10 | host1 | 4.0 |
| 1970-01-01T00:00:20 | host1 | 5.0 |
| 1969-12-31T23:59:50 | host2 | 7.0 |
| 1970-01-01T00:00:00 | host2 | 8.0 |
| 1970-01-01T00:00:10 | host2 | 10.0 |
| 1970-01-01T00:00:20 | host2 | 11.0 |
+---------------------+-------+------------------------------------------------------------------------------------+
+---------------------+-------+----------------------------------------------------------------+
| ts | host | MIN(host.val) RANGE 20s + MAX(host.val) RANGE 20s + Float64(1) |
+---------------------+-------+----------------------------------------------------------------+
| 1969-12-31T23:59:50 | host1 | 1.0 |
| 1970-01-01T00:00:00 | host1 | 2.0 |
| 1970-01-01T00:00:10 | host1 | 4.0 |
| 1970-01-01T00:00:20 | host1 | 5.0 |
| 1969-12-31T23:59:50 | host2 | 7.0 |
| 1970-01-01T00:00:00 | host2 | 8.0 |
| 1970-01-01T00:00:10 | host2 | 10.0 |
| 1970-01-01T00:00:20 | host2 | 11.0 |
+---------------------+-------+----------------------------------------------------------------+
SELECT ts, host, covar(ceil(val::DOUBLE), floor(val::DOUBLE)) RANGE '20s' FROM host ALIGN '10s' ORDER BY host, ts;
+---------------------+-------+----------------------------------------------------------------+
| ts | host | COVARIANCE(ceil(host.val),floor(host.val)) RANGE 20s FILL NULL |
+---------------------+-------+----------------------------------------------------------------+
| 1969-12-31T23:59:50 | host1 | |
| 1970-01-01T00:00:00 | host1 | 0.5 |
| 1970-01-01T00:00:10 | host1 | 0.5 |
| 1970-01-01T00:00:20 | host1 | |
| 1969-12-31T23:59:50 | host2 | |
| 1970-01-01T00:00:00 | host2 | 0.5 |
| 1970-01-01T00:00:10 | host2 | 0.5 |
| 1970-01-01T00:00:20 | host2 | |
+---------------------+-------+----------------------------------------------------------------+
+---------------------+-------+------------------------------------------------------+
| ts | host | COVARIANCE(ceil(host.val),floor(host.val)) RANGE 20s |
+---------------------+-------+------------------------------------------------------+
| 1969-12-31T23:59:50 | host1 | |
| 1970-01-01T00:00:00 | host1 | 0.5 |
| 1970-01-01T00:00:10 | host1 | 0.5 |
| 1970-01-01T00:00:20 | host1 | |
| 1969-12-31T23:59:50 | host2 | |
| 1970-01-01T00:00:00 | host2 | 0.5 |
| 1970-01-01T00:00:10 | host2 | 0.5 |
| 1970-01-01T00:00:20 | host2 | |
+---------------------+-------+------------------------------------------------------+
SELECT ts, host, floor(cos(ceil(sin(min(val) RANGE '5s')))) FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+---------------------------------------------------------+
| ts | host | floor(cos(ceil(sin(MIN(host.val) RANGE 5s FILL NULL)))) |
+---------------------+-------+---------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 1.0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 0.0 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 0.0 |
| 1970-01-01T00:00:00 | host2 | 0.0 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 1.0 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 1.0 |
+---------------------+-------+---------------------------------------------------------+
+---------------------+-------+-----------------------------------------------+
| ts | host | floor(cos(ceil(sin(MIN(host.val) RANGE 5s)))) |
+---------------------+-------+-----------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 1.0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 0.0 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 0.0 |
| 1970-01-01T00:00:00 | host2 | 0.0 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 1.0 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 1.0 |
+---------------------+-------+-----------------------------------------------+
SELECT ts, host, gcd(CAST(max(floor(val::DOUBLE)) RANGE '10s' FILL PREV as INT64) * 4, max(val * 4) RANGE '10s' FILL PREV) * length(host) + 1 FROM host ALIGN '5s' ORDER BY host, ts;

View File

@@ -41,7 +41,7 @@ Error: 2000(InvalidSyntax), sql parser error: Illegal Range select, no RANGE key
SELECT min(val) RANGE '10s', max(val) FROM host ALIGN '5s';
Error: 3001(EngineExecuteQuery), DataFusion error: No field named "MAX(host.val)". Valid fields are "MIN(host.val) RANGE 10s FILL NULL", host.ts, host.host.
Error: 3001(EngineExecuteQuery), DataFusion error: No field named "MAX(host.val)". Valid fields are "MIN(host.val) RANGE 10s", host.ts, host.host.
SELECT min(val) * 2 RANGE '10s' FROM host ALIGN '5s';
@@ -73,10 +73,6 @@ SELECT rank() OVER (PARTITION BY host ORDER BY ts DESC) RANGE '10s' FROM host AL
Error: 2000(InvalidSyntax), Range Query: Window functions is not allowed in Range Query
-- 2.6 invalid fill
SELECT min(val) RANGE '5s', min(val) RANGE '5s' FILL NULL FROM host ALIGN '5s';
Error: 3001(EngineExecuteQuery), DataFusion error: Schema contains duplicate unqualified field name "MIN(host.val) RANGE 5s FILL NULL"
SELECT min(val) RANGE '5s' FROM host ALIGN '5s' FILL 3.0;
Error: 3000(PlanQuery), DataFusion error: Error during planning: 3.0 is not a valid fill option, fail to convert to a const value. { Arrow error: Cast error: Cannot cast string '3.0' to value of Int64 type }
@@ -96,7 +92,7 @@ Error: 2000(InvalidSyntax), Range Query: Can't use 0 as align in Range Query
SELECT min(val) RANGE (INTERVAL '0' day) FROM host ALIGN '5s';
Error: 2000(InvalidSyntax), Range Query: Invalid Range expr `MIN(host.val) RANGE IntervalMonthDayNano("0") FILL NULL`, Can't use 0 as range in Range Query
Error: 2000(InvalidSyntax), Range Query: Invalid Range expr `MIN(host.val) RANGE IntervalMonthDayNano("0")`, Can't use 0 as range in Range Query
DROP TABLE host;

View File

@@ -54,8 +54,6 @@ SELECT rank() OVER (PARTITION BY host ORDER BY ts DESC) RANGE '10s' FROM host AL
-- 2.6 invalid fill
SELECT min(val) RANGE '5s', min(val) RANGE '5s' FILL NULL FROM host ALIGN '5s';
SELECT min(val) RANGE '5s' FROM host ALIGN '5s' FILL 3.0;
-- 2.7 zero align/range

View File

@@ -20,23 +20,23 @@ INSERT INTO TABLE host VALUES
Affected Rows: 10
-- Test Fill
-- Test Fill when aggregate result is null
SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 5s FILL NULL |
+---------------------+-------+----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 3 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 4 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 5 |
+---------------------+-------+----------------------------------+
+---------------------+-------+------------------------+
| ts | host | MIN(host.val) RANGE 5s |
+---------------------+-------+------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 3 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 4 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 5 |
+---------------------+-------+------------------------+
SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s' FILL NULL ORDER BY host, ts;
@@ -57,54 +57,231 @@ SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s' FILL NULL ORDER BY hos
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL 6 FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+----------------------------------+-------------------------------+
| ts | host | MIN(host.val) RANGE 5s FILL NULL | MIN(host.val) RANGE 5s FILL 6 |
+---------------------+-------+----------------------------------+-------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 0 |
| 1970-01-01T00:00:05 | host1 | | 6 |
| 1970-01-01T00:00:10 | host1 | 1 | 1 |
| 1970-01-01T00:00:15 | host1 | | 6 |
| 1970-01-01T00:00:20 | host1 | 2 | 2 |
| 1970-01-01T00:00:00 | host2 | 3 | 3 |
| 1970-01-01T00:00:05 | host2 | | 6 |
| 1970-01-01T00:00:10 | host2 | 4 | 4 |
| 1970-01-01T00:00:15 | host2 | | 6 |
| 1970-01-01T00:00:20 | host2 | 5 | 5 |
+---------------------+-------+----------------------------------+-------------------------------+
+---------------------+-------+------------------------+-------------------------------+
| ts | host | MIN(host.val) RANGE 5s | MIN(host.val) RANGE 5s FILL 6 |
+---------------------+-------+------------------------+-------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 0 |
| 1970-01-01T00:00:05 | host1 | | 6 |
| 1970-01-01T00:00:10 | host1 | 1 | 1 |
| 1970-01-01T00:00:15 | host1 | | 6 |
| 1970-01-01T00:00:20 | host1 | 2 | 2 |
| 1970-01-01T00:00:00 | host2 | 3 | 3 |
| 1970-01-01T00:00:05 | host2 | | 6 |
| 1970-01-01T00:00:10 | host2 | 4 | 4 |
| 1970-01-01T00:00:15 | host2 | | 6 |
| 1970-01-01T00:00:20 | host2 | 5 | 5 |
+---------------------+-------+------------------------+-------------------------------+
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL PREV FROM host ALIGN '5s'ORDER BY host, ts;
+---------------------+-------+----------------------------------+----------------------------------+
| ts | host | MIN(host.val) RANGE 5s FILL NULL | MIN(host.val) RANGE 5s FILL PREV |
+---------------------+-------+----------------------------------+----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 0 |
| 1970-01-01T00:00:05 | host1 | | 0 |
| 1970-01-01T00:00:10 | host1 | 1 | 1 |
| 1970-01-01T00:00:15 | host1 | | 1 |
| 1970-01-01T00:00:20 | host1 | 2 | 2 |
| 1970-01-01T00:00:00 | host2 | 3 | 3 |
| 1970-01-01T00:00:05 | host2 | | 3 |
| 1970-01-01T00:00:10 | host2 | 4 | 4 |
| 1970-01-01T00:00:15 | host2 | | 4 |
| 1970-01-01T00:00:20 | host2 | 5 | 5 |
+---------------------+-------+----------------------------------+----------------------------------+
+---------------------+-------+------------------------+----------------------------------+
| ts | host | MIN(host.val) RANGE 5s | MIN(host.val) RANGE 5s FILL PREV |
+---------------------+-------+------------------------+----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 0 |
| 1970-01-01T00:00:05 | host1 | | 0 |
| 1970-01-01T00:00:10 | host1 | 1 | 1 |
| 1970-01-01T00:00:15 | host1 | | 1 |
| 1970-01-01T00:00:20 | host1 | 2 | 2 |
| 1970-01-01T00:00:00 | host2 | 3 | 3 |
| 1970-01-01T00:00:05 | host2 | | 3 |
| 1970-01-01T00:00:10 | host2 | 4 | 4 |
| 1970-01-01T00:00:15 | host2 | | 4 |
| 1970-01-01T00:00:20 | host2 | 5 | 5 |
+---------------------+-------+------------------------+----------------------------------+
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL LINEAR FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+----------------------------------+------------------------------------+
| ts | host | MIN(host.val) RANGE 5s FILL NULL | MIN(host.val) RANGE 5s FILL LINEAR |
+---------------------+-------+----------------------------------+------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 0.0 |
| 1970-01-01T00:00:05 | host1 | | 0.5 |
| 1970-01-01T00:00:10 | host1 | 1 | 1.0 |
| 1970-01-01T00:00:15 | host1 | | 1.5 |
| 1970-01-01T00:00:20 | host1 | 2 | 2.0 |
| 1970-01-01T00:00:00 | host2 | 3 | 3.0 |
| 1970-01-01T00:00:05 | host2 | | 3.5 |
| 1970-01-01T00:00:10 | host2 | 4 | 4.0 |
| 1970-01-01T00:00:15 | host2 | | 4.5 |
| 1970-01-01T00:00:20 | host2 | 5 | 5.0 |
+---------------------+-------+----------------------------------+------------------------------------+
+---------------------+-------+------------------------+------------------------------------+
| ts | host | MIN(host.val) RANGE 5s | MIN(host.val) RANGE 5s FILL LINEAR |
+---------------------+-------+------------------------+------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 0.0 |
| 1970-01-01T00:00:05 | host1 | | 0.5 |
| 1970-01-01T00:00:10 | host1 | 1 | 1.0 |
| 1970-01-01T00:00:15 | host1 | | 1.5 |
| 1970-01-01T00:00:20 | host1 | 2 | 2.0 |
| 1970-01-01T00:00:00 | host2 | 3 | 3.0 |
| 1970-01-01T00:00:05 | host2 | | 3.5 |
| 1970-01-01T00:00:10 | host2 | 4 | 4.0 |
| 1970-01-01T00:00:15 | host2 | | 4.5 |
| 1970-01-01T00:00:20 | host2 | 5 | 5.0 |
+---------------------+-------+------------------------+------------------------------------+
DROP TABLE host;
Affected Rows: 0
-- Test Fill when time slot data is missing
CREATE TABLE host (
ts timestamp(3) time index,
host STRING PRIMARY KEY,
val BIGINT,
);
Affected Rows: 0
INSERT INTO TABLE host VALUES
(0, 'host1', 0),
(1000, 'host1', 1),
(2000, 'host1', 2),
-- missing data for 5000, 10000
(15000, 'host1', 6),
(16000, 'host1', 7),
(17000, 'host1', 8),
(0, 'host2', 6),
(1000, 'host2', 7),
(2000, 'host2', 8),
-- missing data for 5000, 10000
(15000, 'host2', 12),
(16000, 'host2', 13),
(17000, 'host2', 14);
Affected Rows: 12
SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+------------------------+
| ts | host | MIN(host.val) RANGE 5s |
+---------------------+-------+------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:15 | host1 | 6 |
| 1970-01-01T00:00:00 | host2 | 6 |
| 1970-01-01T00:00:15 | host2 | 12 |
+---------------------+-------+------------------------+
SELECT ts, host, min(val) RANGE '5s' FILL NULL FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 5s FILL NULL |
+---------------------+-------+----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | |
| 1970-01-01T00:00:15 | host1 | 6 |
| 1970-01-01T00:00:00 | host2 | 6 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | |
| 1970-01-01T00:00:15 | host2 | 12 |
+---------------------+-------+----------------------------------+
SELECT ts, host, min(val) RANGE '5s' FILL PREV FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 5s FILL PREV |
+---------------------+-------+----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | 0 |
| 1970-01-01T00:00:10 | host1 | 0 |
| 1970-01-01T00:00:15 | host1 | 6 |
| 1970-01-01T00:00:00 | host2 | 6 |
| 1970-01-01T00:00:05 | host2 | 6 |
| 1970-01-01T00:00:10 | host2 | 6 |
| 1970-01-01T00:00:15 | host2 | 12 |
+---------------------+-------+----------------------------------+
SELECT ts, host, min(val) RANGE '5s' FILL LINEAR FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+------------------------------------+
| ts | host | MIN(host.val) RANGE 5s FILL LINEAR |
+---------------------+-------+------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0.0 |
| 1970-01-01T00:00:05 | host1 | 2.0 |
| 1970-01-01T00:00:10 | host1 | 4.0 |
| 1970-01-01T00:00:15 | host1 | 6.0 |
| 1970-01-01T00:00:00 | host2 | 6.0 |
| 1970-01-01T00:00:05 | host2 | 8.0 |
| 1970-01-01T00:00:10 | host2 | 10.0 |
| 1970-01-01T00:00:15 | host2 | 12.0 |
+---------------------+-------+------------------------------------+
SELECT ts, host, min(val) RANGE '5s' FILL 6 FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+-------------------------------+
| ts | host | MIN(host.val) RANGE 5s FILL 6 |
+---------------------+-------+-------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | 6 |
| 1970-01-01T00:00:10 | host1 | 6 |
| 1970-01-01T00:00:15 | host1 | 6 |
| 1970-01-01T00:00:00 | host2 | 6 |
| 1970-01-01T00:00:05 | host2 | 6 |
| 1970-01-01T00:00:10 | host2 | 6 |
| 1970-01-01T00:00:15 | host2 | 12 |
+---------------------+-------+-------------------------------+
SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s' FILL NULL ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 5s FILL NULL |
+---------------------+-------+----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | |
| 1970-01-01T00:00:15 | host1 | 6 |
| 1970-01-01T00:00:00 | host2 | 6 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | |
| 1970-01-01T00:00:15 | host2 | 12 |
+---------------------+-------+----------------------------------+
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL NULL FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+------------------------+----------------------------------+
| ts | host | MIN(host.val) RANGE 5s | MIN(host.val) RANGE 5s FILL NULL |
+---------------------+-------+------------------------+----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 0 |
| 1970-01-01T00:00:05 | host1 | | |
| 1970-01-01T00:00:10 | host1 | | |
| 1970-01-01T00:00:15 | host1 | 6 | 6 |
| 1970-01-01T00:00:00 | host2 | 6 | 6 |
| 1970-01-01T00:00:05 | host2 | | |
| 1970-01-01T00:00:10 | host2 | | |
| 1970-01-01T00:00:15 | host2 | 12 | 12 |
+---------------------+-------+------------------------+----------------------------------+
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL 6 FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+------------------------+-------------------------------+
| ts | host | MIN(host.val) RANGE 5s | MIN(host.val) RANGE 5s FILL 6 |
+---------------------+-------+------------------------+-------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 0 |
| 1970-01-01T00:00:05 | host1 | | 6 |
| 1970-01-01T00:00:10 | host1 | | 6 |
| 1970-01-01T00:00:15 | host1 | 6 | 6 |
| 1970-01-01T00:00:00 | host2 | 6 | 6 |
| 1970-01-01T00:00:05 | host2 | | 6 |
| 1970-01-01T00:00:10 | host2 | | 6 |
| 1970-01-01T00:00:15 | host2 | 12 | 12 |
+---------------------+-------+------------------------+-------------------------------+
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL PREV FROM host ALIGN '5s'ORDER BY host, ts;
+---------------------+-------+------------------------+----------------------------------+
| ts | host | MIN(host.val) RANGE 5s | MIN(host.val) RANGE 5s FILL PREV |
+---------------------+-------+------------------------+----------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 0 |
| 1970-01-01T00:00:05 | host1 | | 0 |
| 1970-01-01T00:00:10 | host1 | | 0 |
| 1970-01-01T00:00:15 | host1 | 6 | 6 |
| 1970-01-01T00:00:00 | host2 | 6 | 6 |
| 1970-01-01T00:00:05 | host2 | | 6 |
| 1970-01-01T00:00:10 | host2 | | 6 |
| 1970-01-01T00:00:15 | host2 | 12 | 12 |
+---------------------+-------+------------------------+----------------------------------+
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL LINEAR FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+------------------------+------------------------------------+
| ts | host | MIN(host.val) RANGE 5s | MIN(host.val) RANGE 5s FILL LINEAR |
+---------------------+-------+------------------------+------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 0.0 |
| 1970-01-01T00:00:05 | host1 | | 2.0 |
| 1970-01-01T00:00:10 | host1 | | 4.0 |
| 1970-01-01T00:00:15 | host1 | 6 | 6.0 |
| 1970-01-01T00:00:00 | host2 | 6 | 6.0 |
| 1970-01-01T00:00:05 | host2 | | 8.0 |
| 1970-01-01T00:00:10 | host2 | | 10.0 |
| 1970-01-01T00:00:15 | host2 | 12 | 12.0 |
+---------------------+-------+------------------------+------------------------------------+
DROP TABLE host;

View File

@@ -16,7 +16,7 @@ INSERT INTO TABLE host VALUES
(15000, 'host2', null),
(20000, 'host2', 5);
-- Test Fill
-- Test Fill when aggregate result is null
SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
@@ -29,3 +29,50 @@ SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL PREV FROM host AL
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL LINEAR FROM host ALIGN '5s' ORDER BY host, ts;
DROP TABLE host;
-- Test Fill when time slot data is missing
CREATE TABLE host (
ts timestamp(3) time index,
host STRING PRIMARY KEY,
val BIGINT,
);
INSERT INTO TABLE host VALUES
(0, 'host1', 0),
(1000, 'host1', 1),
(2000, 'host1', 2),
-- missing data for 5000, 10000
(15000, 'host1', 6),
(16000, 'host1', 7),
(17000, 'host1', 8),
(0, 'host2', 6),
(1000, 'host2', 7),
(2000, 'host2', 8),
-- missing data for 5000, 10000
(15000, 'host2', 12),
(16000, 'host2', 13),
(17000, 'host2', 14);
SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '5s' FILL NULL FROM host ALIGN '5s' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '5s' FILL PREV FROM host ALIGN '5s' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '5s' FILL LINEAR FROM host ALIGN '5s' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '5s' FILL 6 FROM host ALIGN '5s' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s' FILL NULL ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL NULL FROM host ALIGN '5s' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL 6 FROM host ALIGN '5s' ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL PREV FROM host ALIGN '5s'ORDER BY host, ts;
SELECT ts, host, min(val) RANGE '5s', min(val) RANGE '5s' FILL LINEAR FROM host ALIGN '5s' ORDER BY host, ts;
DROP TABLE host;

View File

@@ -20,25 +20,25 @@ Affected Rows: 8
SELECT ts, host, min(val) RANGE (INTERVAL '1 year') FROM host ALIGN (INTERVAL '1 year') ORDER BY host, ts;
+---------------------+-------+--------------------------------------------------------------------------------------+
| ts | host | MIN(host.val) RANGE IntervalMonthDayNano("950737950171172051122527404032") FILL NULL |
+---------------------+-------+--------------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-12-27T00:00:00 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 4 |
| 1970-12-27T00:00:00 | host2 | 6 |
+---------------------+-------+--------------------------------------------------------------------------------------+
+---------------------+-------+----------------------------------------------------------------------------+
| ts | host | MIN(host.val) RANGE IntervalMonthDayNano("950737950171172051122527404032") |
+---------------------+-------+----------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-12-27T00:00:00 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 4 |
| 1970-12-27T00:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------------------------------------------------+
SELECT ts, host, min(val) RANGE (INTERVAL '1' year) FROM host ALIGN (INTERVAL '1' year) ORDER BY host, ts;
+---------------------+-------+--------------------------------------------------------------------------------------+
| ts | host | MIN(host.val) RANGE IntervalMonthDayNano("950737950171172051122527404032") FILL NULL |
+---------------------+-------+--------------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-12-27T00:00:00 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 4 |
| 1970-12-27T00:00:00 | host2 | 6 |
+---------------------+-------+--------------------------------------------------------------------------------------+
+---------------------+-------+----------------------------------------------------------------------------+
| ts | host | MIN(host.val) RANGE IntervalMonthDayNano("950737950171172051122527404032") |
+---------------------+-------+----------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-12-27T00:00:00 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 4 |
| 1970-12-27T00:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------------------------------------------------+
DROP TABLE host;

View File

@@ -35,15 +35,15 @@ SELECT ts, host, foo FROM (SELECT ts, host, min(val) RANGE '5s' AS foo FROM host
SELECT ts, b, min(c) RANGE '5s' FROM (SELECT ts, host AS b, val AS c FROM host WHERE host = 'host1') ALIGN '5s' BY (b) ORDER BY b, ts;
+---------------------+-------+---------------------------+
| ts | b | MIN(c) RANGE 5s FILL NULL |
+---------------------+-------+---------------------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2 |
+---------------------+-------+---------------------------+
+---------------------+-------+-----------------+
| ts | b | MIN(c) RANGE 5s |
+---------------------+-------+-----------------+
| 1970-01-01T00:00:00 | host1 | 0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2 |
+---------------------+-------+-----------------+
-- Test EXPLAIN and ANALYZE
-- SQLNESS REPLACE (-+) -
@@ -55,9 +55,9 @@ EXPLAIN SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s';
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| RangeSelect: range_exprs=[MIN(host.val) RANGE 5s FILL NULL], align=5000ms, align_to=0ms, align_by=[host.host], time_index=ts |
| logical_plan_| RangeSelect: range_exprs=[MIN(host.val) RANGE 5s], align=5000ms, align_to=0ms, align_by=[host.host], time_index=ts |
|_|_MergeScan [is_placeholder=false]_|
| physical_plan | RangeSelectExec: range_expr=[MIN(host.val) RANGE 5s FILL NULL], align=5000ms, align_to=0ms, align_by=[host@1], time_index=ts |
| physical_plan | RangeSelectExec: range_expr=[MIN(host.val) RANGE 5s], align=5000ms, align_to=0ms, align_by=[host@1], time_index=ts |
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+
@@ -71,7 +71,7 @@ EXPLAIN ANALYZE SELECT ts, host, min(val) RANGE '5s' FROM host ALIGN '5s';
+-+-+
| plan_type_| plan_|
+-+-+
| Plan with Metrics | RangeSelectExec: range_expr=[MIN(host.val) RANGE 5s FILL NULL], align=5000ms, align_to=0ms, align_by=[host@1], time_index=ts, REDACTED
| Plan with Metrics | RangeSelectExec: range_expr=[MIN(host.val) RANGE 5s], align=5000ms, align_to=0ms, align_by=[host@1], time_index=ts, REDACTED
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+

View File

@@ -23,20 +23,20 @@ Affected Rows: 10
-- Test on Timestamps of different precisions
SELECT ts, host, min(val) RANGE '5s' FROM host_sec ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+--------------------------------------+
| ts | host | MIN(host_sec.val) RANGE 5s FILL NULL |
+---------------------+-------+--------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0.0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1.0 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2.0 |
| 1970-01-01T00:00:00 | host2 | 3.0 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 4.0 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 5.0 |
+---------------------+-------+--------------------------------------+
+---------------------+-------+----------------------------+
| ts | host | MIN(host_sec.val) RANGE 5s |
+---------------------+-------+----------------------------+
| 1970-01-01T00:00:00 | host1 | 0.0 |
| 1970-01-01T00:00:05 | host1 | |
| 1970-01-01T00:00:10 | host1 | 1.0 |
| 1970-01-01T00:00:15 | host1 | |
| 1970-01-01T00:00:20 | host1 | 2.0 |
| 1970-01-01T00:00:00 | host2 | 3.0 |
| 1970-01-01T00:00:05 | host2 | |
| 1970-01-01T00:00:10 | host2 | 4.0 |
| 1970-01-01T00:00:15 | host2 | |
| 1970-01-01T00:00:20 | host2 | 5.0 |
+---------------------+-------+----------------------------+
DROP TABLE host_sec;

View File

@@ -43,173 +43,173 @@ Affected Rows: 30
SELECT ts, host, first_value(val) RANGE '5s', last_value(val) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+------------------------------------------+-----------------------------------------+
| ts | host | FIRST_VALUE(host.val) RANGE 5s FILL NULL | LAST_VALUE(host.val) RANGE 5s FILL NULL |
+---------------------+-------+------------------------------------------+-----------------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 2 |
| 1970-01-01T00:00:05 | host1 | | |
| 1970-01-01T00:00:10 | host1 | | 5 |
| 1970-01-01T00:00:15 | host1 | 6 | 7 |
| 1970-01-01T00:00:20 | host1 | 8 | |
| 1970-01-01T00:00:00 | host2 | 0 | 2 |
| 1970-01-01T00:00:05 | host2 | | |
| 1970-01-01T00:00:10 | host2 | | 5 |
| 1970-01-01T00:00:15 | host2 | 6 | 7 |
| 1970-01-01T00:00:20 | host2 | 8 | |
+---------------------+-------+------------------------------------------+-----------------------------------------+
+---------------------+-------+--------------------------------+-------------------------------+
| ts | host | FIRST_VALUE(host.val) RANGE 5s | LAST_VALUE(host.val) RANGE 5s |
+---------------------+-------+--------------------------------+-------------------------------+
| 1970-01-01T00:00:00 | host1 | 0 | 2 |
| 1970-01-01T00:00:05 | host1 | | |
| 1970-01-01T00:00:10 | host1 | | 5 |
| 1970-01-01T00:00:15 | host1 | 6 | 7 |
| 1970-01-01T00:00:20 | host1 | 8 | |
| 1970-01-01T00:00:00 | host2 | 0 | 2 |
| 1970-01-01T00:00:05 | host2 | | |
| 1970-01-01T00:00:10 | host2 | | 5 |
| 1970-01-01T00:00:15 | host2 | 6 | 7 |
| 1970-01-01T00:00:20 | host2 | 8 | |
+---------------------+-------+--------------------------------+-------------------------------+
SELECT ts, host, first_value(addon ORDER BY val DESC) RANGE '5s', last_value(addon ORDER BY val DESC) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+---------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
| ts | host | FIRST_VALUE(host.addon) ORDER BY [host.val DESC NULLS FIRST] RANGE 5s FILL NULL | LAST_VALUE(host.addon) ORDER BY [host.val DESC NULLS FIRST] RANGE 5s FILL NULL |
+---------------------+-------+---------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 3 | 1 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:10 | host1 | 7 | 8 |
| 1970-01-01T00:00:15 | host1 | 11 | 10 |
| 1970-01-01T00:00:20 | host1 | 15 | 13 |
| 1970-01-01T00:00:00 | host2 | 18 | 16 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:10 | host2 | 22 | 23 |
| 1970-01-01T00:00:15 | host2 | 26 | 25 |
| 1970-01-01T00:00:20 | host2 | 30 | 28 |
+---------------------+-------+---------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+---------------------+-------+-----------------------------------------------------------------------+----------------------------------------------------------------------+
| ts | host | FIRST_VALUE(host.addon) ORDER BY [host.val DESC NULLS FIRST] RANGE 5s | LAST_VALUE(host.addon) ORDER BY [host.val DESC NULLS FIRST] RANGE 5s |
+---------------------+-------+-----------------------------------------------------------------------+----------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 3 | 1 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:10 | host1 | 7 | 8 |
| 1970-01-01T00:00:15 | host1 | 11 | 10 |
| 1970-01-01T00:00:20 | host1 | 15 | 13 |
| 1970-01-01T00:00:00 | host2 | 18 | 16 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:10 | host2 | 22 | 23 |
| 1970-01-01T00:00:15 | host2 | 26 | 25 |
| 1970-01-01T00:00:20 | host2 | 30 | 28 |
+---------------------+-------+-----------------------------------------------------------------------+----------------------------------------------------------------------+
SELECT ts, host, first_value(addon ORDER BY val DESC NULLS LAST) RANGE '5s', last_value(addon ORDER BY val DESC NULLS LAST) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+--------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
| ts | host | FIRST_VALUE(host.addon) ORDER BY [host.val DESC NULLS LAST] RANGE 5s FILL NULL | LAST_VALUE(host.addon) ORDER BY [host.val DESC NULLS LAST] RANGE 5s FILL NULL |
+---------------------+-------+--------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 3 | 1 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:10 | host1 | 9 | 7 |
| 1970-01-01T00:00:15 | host1 | 12 | 11 |
| 1970-01-01T00:00:20 | host1 | 14 | 15 |
| 1970-01-01T00:00:00 | host2 | 18 | 16 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:10 | host2 | 24 | 22 |
| 1970-01-01T00:00:15 | host2 | 27 | 26 |
| 1970-01-01T00:00:20 | host2 | 29 | 30 |
+---------------------+-------+--------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
+---------------------+-------+----------------------------------------------------------------------+---------------------------------------------------------------------+
| ts | host | FIRST_VALUE(host.addon) ORDER BY [host.val DESC NULLS LAST] RANGE 5s | LAST_VALUE(host.addon) ORDER BY [host.val DESC NULLS LAST] RANGE 5s |
+---------------------+-------+----------------------------------------------------------------------+---------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 3 | 1 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:10 | host1 | 9 | 7 |
| 1970-01-01T00:00:15 | host1 | 12 | 11 |
| 1970-01-01T00:00:20 | host1 | 14 | 15 |
| 1970-01-01T00:00:00 | host2 | 18 | 16 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:10 | host2 | 24 | 22 |
| 1970-01-01T00:00:15 | host2 | 27 | 26 |
| 1970-01-01T00:00:20 | host2 | 29 | 30 |
+---------------------+-------+----------------------------------------------------------------------+---------------------------------------------------------------------+
SELECT ts, host, first_value(addon ORDER BY val ASC) RANGE '5s', last_value(addon ORDER BY val ASC) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+-------------------------------------------------------------------------------+------------------------------------------------------------------------------+
| ts | host | FIRST_VALUE(host.addon) ORDER BY [host.val ASC NULLS LAST] RANGE 5s FILL NULL | LAST_VALUE(host.addon) ORDER BY [host.val ASC NULLS LAST] RANGE 5s FILL NULL |
+---------------------+-------+-------------------------------------------------------------------------------+------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 1 | 3 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:10 | host1 | 8 | 7 |
| 1970-01-01T00:00:15 | host1 | 10 | 11 |
| 1970-01-01T00:00:20 | host1 | 13 | 15 |
| 1970-01-01T00:00:00 | host2 | 16 | 18 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:10 | host2 | 23 | 22 |
| 1970-01-01T00:00:15 | host2 | 25 | 26 |
| 1970-01-01T00:00:20 | host2 | 28 | 30 |
+---------------------+-------+-------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+---------------------+-------+---------------------------------------------------------------------+--------------------------------------------------------------------+
| ts | host | FIRST_VALUE(host.addon) ORDER BY [host.val ASC NULLS LAST] RANGE 5s | LAST_VALUE(host.addon) ORDER BY [host.val ASC NULLS LAST] RANGE 5s |
+---------------------+-------+---------------------------------------------------------------------+--------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 1 | 3 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:10 | host1 | 8 | 7 |
| 1970-01-01T00:00:15 | host1 | 10 | 11 |
| 1970-01-01T00:00:20 | host1 | 13 | 15 |
| 1970-01-01T00:00:00 | host2 | 16 | 18 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:10 | host2 | 23 | 22 |
| 1970-01-01T00:00:15 | host2 | 25 | 26 |
| 1970-01-01T00:00:20 | host2 | 28 | 30 |
+---------------------+-------+---------------------------------------------------------------------+--------------------------------------------------------------------+
SELECT ts, host, first_value(addon ORDER BY val ASC NULLS FIRST) RANGE '5s', last_value(addon ORDER BY val ASC NULLS FIRST) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+--------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
| ts | host | FIRST_VALUE(host.addon) ORDER BY [host.val ASC NULLS FIRST] RANGE 5s FILL NULL | LAST_VALUE(host.addon) ORDER BY [host.val ASC NULLS FIRST] RANGE 5s FILL NULL |
+---------------------+-------+--------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 1 | 3 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:10 | host1 | 7 | 9 |
| 1970-01-01T00:00:15 | host1 | 11 | 12 |
| 1970-01-01T00:00:20 | host1 | 15 | 14 |
| 1970-01-01T00:00:00 | host2 | 16 | 18 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:10 | host2 | 22 | 24 |
| 1970-01-01T00:00:15 | host2 | 26 | 27 |
| 1970-01-01T00:00:20 | host2 | 30 | 29 |
+---------------------+-------+--------------------------------------------------------------------------------+-------------------------------------------------------------------------------+
+---------------------+-------+----------------------------------------------------------------------+---------------------------------------------------------------------+
| ts | host | FIRST_VALUE(host.addon) ORDER BY [host.val ASC NULLS FIRST] RANGE 5s | LAST_VALUE(host.addon) ORDER BY [host.val ASC NULLS FIRST] RANGE 5s |
+---------------------+-------+----------------------------------------------------------------------+---------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 1 | 3 |
| 1970-01-01T00:00:05 | host1 | 4 | 4 |
| 1970-01-01T00:00:10 | host1 | 7 | 9 |
| 1970-01-01T00:00:15 | host1 | 11 | 12 |
| 1970-01-01T00:00:20 | host1 | 15 | 14 |
| 1970-01-01T00:00:00 | host2 | 16 | 18 |
| 1970-01-01T00:00:05 | host2 | 19 | 19 |
| 1970-01-01T00:00:10 | host2 | 22 | 24 |
| 1970-01-01T00:00:15 | host2 | 26 | 27 |
| 1970-01-01T00:00:20 | host2 | 30 | 29 |
+---------------------+-------+----------------------------------------------------------------------+---------------------------------------------------------------------+
SELECT ts, host, first_value(addon ORDER BY val ASC, ts ASC) RANGE '5s', last_value(addon ORDER BY val ASC, ts ASC) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+-------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+
| ts | host | FIRST_VALUE(host.addon) ORDER BY [host.val ASC NULLS LAST, host.ts ASC NULLS LAST] RANGE 5s FILL NULL | LAST_VALUE(host.addon) ORDER BY [host.val ASC NULLS LAST, host.ts ASC NULLS LAST] RANGE 5s FILL NULL |
+---------------------+-------+-------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 1 | 3 |
| 1970-01-01T00:00:05 | host1 | 4 | 6 |
| 1970-01-01T00:00:10 | host1 | 8 | 7 |
| 1970-01-01T00:00:15 | host1 | 10 | 11 |
| 1970-01-01T00:00:20 | host1 | 13 | 15 |
| 1970-01-01T00:00:00 | host2 | 16 | 18 |
| 1970-01-01T00:00:05 | host2 | 19 | 21 |
| 1970-01-01T00:00:10 | host2 | 23 | 22 |
| 1970-01-01T00:00:15 | host2 | 25 | 26 |
| 1970-01-01T00:00:20 | host2 | 28 | 30 |
+---------------------+-------+-------------------------------------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------+
+---------------------+-------+---------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------+
| ts | host | FIRST_VALUE(host.addon) ORDER BY [host.val ASC NULLS LAST, host.ts ASC NULLS LAST] RANGE 5s | LAST_VALUE(host.addon) ORDER BY [host.val ASC NULLS LAST, host.ts ASC NULLS LAST] RANGE 5s |
+---------------------+-------+---------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 1 | 3 |
| 1970-01-01T00:00:05 | host1 | 4 | 6 |
| 1970-01-01T00:00:10 | host1 | 8 | 7 |
| 1970-01-01T00:00:15 | host1 | 10 | 11 |
| 1970-01-01T00:00:20 | host1 | 13 | 15 |
| 1970-01-01T00:00:00 | host2 | 16 | 18 |
| 1970-01-01T00:00:05 | host2 | 19 | 21 |
| 1970-01-01T00:00:10 | host2 | 23 | 22 |
| 1970-01-01T00:00:15 | host2 | 25 | 26 |
| 1970-01-01T00:00:20 | host2 | 28 | 30 |
+---------------------+-------+---------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------+
SELECT ts, host, count(val) RANGE '5s'FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+------------------------------------+
| ts | host | COUNT(host.val) RANGE 5s FILL NULL |
+---------------------+-------+------------------------------------+
| 1970-01-01T00:00:00 | host1 | 3 |
| 1970-01-01T00:00:05 | host1 | 0 |
| 1970-01-01T00:00:10 | host1 | 2 |
| 1970-01-01T00:00:15 | host1 | 2 |
| 1970-01-01T00:00:20 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 3 |
| 1970-01-01T00:00:05 | host2 | 0 |
| 1970-01-01T00:00:10 | host2 | 2 |
| 1970-01-01T00:00:15 | host2 | 2 |
| 1970-01-01T00:00:20 | host2 | 2 |
+---------------------+-------+------------------------------------+
+---------------------+-------+--------------------------+
| ts | host | COUNT(host.val) RANGE 5s |
+---------------------+-------+--------------------------+
| 1970-01-01T00:00:00 | host1 | 3 |
| 1970-01-01T00:00:05 | host1 | 0 |
| 1970-01-01T00:00:10 | host1 | 2 |
| 1970-01-01T00:00:15 | host1 | 2 |
| 1970-01-01T00:00:20 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 3 |
| 1970-01-01T00:00:05 | host2 | 0 |
| 1970-01-01T00:00:10 | host2 | 2 |
| 1970-01-01T00:00:15 | host2 | 2 |
| 1970-01-01T00:00:20 | host2 | 2 |
+---------------------+-------+--------------------------+
SELECT ts, host, count(distinct val) RANGE '5s'FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+---------------------------------------------+
| ts | host | COUNT(DISTINCT host.val) RANGE 5s FILL NULL |
+---------------------+-------+---------------------------------------------+
| 1970-01-01T00:00:00 | host1 | 3 |
| 1970-01-01T00:00:05 | host1 | 0 |
| 1970-01-01T00:00:10 | host1 | 2 |
| 1970-01-01T00:00:15 | host1 | 2 |
| 1970-01-01T00:00:20 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 3 |
| 1970-01-01T00:00:05 | host2 | 0 |
| 1970-01-01T00:00:10 | host2 | 2 |
| 1970-01-01T00:00:15 | host2 | 2 |
| 1970-01-01T00:00:20 | host2 | 2 |
+---------------------+-------+---------------------------------------------+
+---------------------+-------+-----------------------------------+
| ts | host | COUNT(DISTINCT host.val) RANGE 5s |
+---------------------+-------+-----------------------------------+
| 1970-01-01T00:00:00 | host1 | 3 |
| 1970-01-01T00:00:05 | host1 | 0 |
| 1970-01-01T00:00:10 | host1 | 2 |
| 1970-01-01T00:00:15 | host1 | 2 |
| 1970-01-01T00:00:20 | host1 | 2 |
| 1970-01-01T00:00:00 | host2 | 3 |
| 1970-01-01T00:00:05 | host2 | 0 |
| 1970-01-01T00:00:10 | host2 | 2 |
| 1970-01-01T00:00:15 | host2 | 2 |
| 1970-01-01T00:00:20 | host2 | 2 |
+---------------------+-------+-----------------------------------+
SELECT ts, host, count(*) RANGE '5s'FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+-----------------------------+
| ts | host | COUNT(*) RANGE 5s FILL NULL |
+---------------------+-------+-----------------------------+
| 1970-01-01T00:00:00 | host1 | 3 |
| 1970-01-01T00:00:05 | host1 | 3 |
| 1970-01-01T00:00:10 | host1 | 3 |
| 1970-01-01T00:00:15 | host1 | 3 |
| 1970-01-01T00:00:20 | host1 | 3 |
| 1970-01-01T00:00:00 | host2 | 3 |
| 1970-01-01T00:00:05 | host2 | 3 |
| 1970-01-01T00:00:10 | host2 | 3 |
| 1970-01-01T00:00:15 | host2 | 3 |
| 1970-01-01T00:00:20 | host2 | 3 |
+---------------------+-------+-----------------------------+
+---------------------+-------+-------------------+
| ts | host | COUNT(*) RANGE 5s |
+---------------------+-------+-------------------+
| 1970-01-01T00:00:00 | host1 | 3 |
| 1970-01-01T00:00:05 | host1 | 3 |
| 1970-01-01T00:00:10 | host1 | 3 |
| 1970-01-01T00:00:15 | host1 | 3 |
| 1970-01-01T00:00:20 | host1 | 3 |
| 1970-01-01T00:00:00 | host2 | 3 |
| 1970-01-01T00:00:05 | host2 | 3 |
| 1970-01-01T00:00:10 | host2 | 3 |
| 1970-01-01T00:00:15 | host2 | 3 |
| 1970-01-01T00:00:20 | host2 | 3 |
+---------------------+-------+-------------------+
SELECT ts, host, count(distinct *) RANGE '5s'FROM host ALIGN '5s' ORDER BY host, ts;
+---------------------+-------+--------------------------------------+
| ts | host | COUNT(DISTINCT *) RANGE 5s FILL NULL |
+---------------------+-------+--------------------------------------+
| 1970-01-01T00:00:00 | host1 | 3 |
| 1970-01-01T00:00:05 | host1 | 3 |
| 1970-01-01T00:00:10 | host1 | 3 |
| 1970-01-01T00:00:15 | host1 | 3 |
| 1970-01-01T00:00:20 | host1 | 3 |
| 1970-01-01T00:00:00 | host2 | 3 |
| 1970-01-01T00:00:05 | host2 | 3 |
| 1970-01-01T00:00:10 | host2 | 3 |
| 1970-01-01T00:00:15 | host2 | 3 |
| 1970-01-01T00:00:20 | host2 | 3 |
+---------------------+-------+--------------------------------------+
+---------------------+-------+----------------------------+
| ts | host | COUNT(DISTINCT *) RANGE 5s |
+---------------------+-------+----------------------------+
| 1970-01-01T00:00:00 | host1 | 3 |
| 1970-01-01T00:00:05 | host1 | 3 |
| 1970-01-01T00:00:10 | host1 | 3 |
| 1970-01-01T00:00:15 | host1 | 3 |
| 1970-01-01T00:00:20 | host1 | 3 |
| 1970-01-01T00:00:00 | host2 | 3 |
| 1970-01-01T00:00:05 | host2 | 3 |
| 1970-01-01T00:00:10 | host2 | 3 |
| 1970-01-01T00:00:15 | host2 | 3 |
| 1970-01-01T00:00:20 | host2 | 3 |
+---------------------+-------+----------------------------+
-- Test error first_value/last_value
SELECT ts, host, first_value(val, val) RANGE '5s' FROM host ALIGN '5s' ORDER BY host, ts;
@@ -241,11 +241,11 @@ Affected Rows: 3
SELECT ts, first_value(val ORDER BY addon ASC) RANGE '5s', last_value(val ORDER BY addon ASC) RANGE '5s' FROM host ALIGN '5s';
+---------------------+-------------------------------------------------------------------------------+------------------------------------------------------------------------------+
| ts | FIRST_VALUE(host.val) ORDER BY [host.addon ASC NULLS LAST] RANGE 5s FILL NULL | LAST_VALUE(host.val) ORDER BY [host.addon ASC NULLS LAST] RANGE 5s FILL NULL |
+---------------------+-------------------------------------------------------------------------------+------------------------------------------------------------------------------+
| 1970-01-01T00:00:00 | 2 | 0 |
+---------------------+-------------------------------------------------------------------------------+------------------------------------------------------------------------------+
+---------------------+---------------------------------------------------------------------+--------------------------------------------------------------------+
| ts | FIRST_VALUE(host.val) ORDER BY [host.addon ASC NULLS LAST] RANGE 5s | LAST_VALUE(host.val) ORDER BY [host.addon ASC NULLS LAST] RANGE 5s |
+---------------------+---------------------------------------------------------------------+--------------------------------------------------------------------+
| 1970-01-01T00:00:00 | 2 | 0 |
+---------------------+---------------------------------------------------------------------+--------------------------------------------------------------------+
DROP TABLE host;

View File

@@ -20,14 +20,14 @@ Affected Rows: 8
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 2024-01-23T00:00:00 | host1 | 0 |
| 2024-01-24T00:00:00 | host1 | 2 |
| 2024-01-23T00:00:00 | host2 | 4 |
| 2024-01-24T00:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------+
+---------------------+-------+------------------------+
| ts | host | MIN(host.val) RANGE 1d |
+---------------------+-------+------------------------+
| 2024-01-23T00:00:00 | host1 | 0 |
| 2024-01-24T00:00:00 | host1 | 2 |
| 2024-01-23T00:00:00 | host2 | 4 |
| 2024-01-24T00:00:00 | host2 | 6 |
+---------------------+-------+------------------------+
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO UNKNOWN ORDER BY host, ts;
@@ -35,52 +35,52 @@ Error: 3000(PlanQuery), DataFusion error: Error during planning: Illegal `align
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '1900-01-01T00:00:00+01:00' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+----------------------------------+
+---------------------+-------+------------------------+
| ts | host | MIN(host.val) RANGE 1d |
+---------------------+-------+------------------------+
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+------------------------+
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2024-01-23T00:00:00+01:00' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+----------------------------------+
+---------------------+-------+------------------------+
| ts | host | MIN(host.val) RANGE 1d |
+---------------------+-------+------------------------+
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+------------------------+
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' TO '2023-01-01T00:00:00+01:00' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+----------------------------------+
+---------------------+-------+------------------------+
| ts | host | MIN(host.val) RANGE 1d |
+---------------------+-------+------------------------+
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+------------------------+
SELECT ts, min(val) RANGE (INTERVAL '1' day) FROM host ALIGN (INTERVAL '1' day) TO '1900-01-01T00:00:00+01:00' by (1) ORDER BY ts;
+---------------------+----------------------------------------------------------------------------+
| ts | MIN(host.val) RANGE IntervalMonthDayNano("18446744073709551616") FILL NULL |
+---------------------+----------------------------------------------------------------------------+
| 2024-01-22T23:00:00 | 0 |
| 2024-01-23T23:00:00 | 1 |
| 2024-01-24T23:00:00 | 3 |
+---------------------+----------------------------------------------------------------------------+
+---------------------+------------------------------------------------------------------+
| ts | MIN(host.val) RANGE IntervalMonthDayNano("18446744073709551616") |
+---------------------+------------------------------------------------------------------+
| 2024-01-22T23:00:00 | 0 |
| 2024-01-23T23:00:00 | 1 |
| 2024-01-24T23:00:00 | 3 |
+---------------------+------------------------------------------------------------------+
--- ALIGN TO with time zone ---
set time_zone='Asia/Shanghai';
@@ -90,14 +90,14 @@ Affected Rows: 0
---- align to 'Asia/Shanghai' unix epoch 0 ----
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 2024-01-23T08:00:00 | host1 | 0 |
| 2024-01-24T08:00:00 | host1 | 2 |
| 2024-01-23T08:00:00 | host2 | 4 |
| 2024-01-24T08:00:00 | host2 | 6 |
+---------------------+-------+----------------------------------+
+---------------------+-------+------------------------+
| ts | host | MIN(host.val) RANGE 1d |
+---------------------+-------+------------------------+
| 2024-01-23T08:00:00 | host1 | 0 |
| 2024-01-24T08:00:00 | host1 | 2 |
| 2024-01-23T08:00:00 | host2 | 4 |
| 2024-01-24T08:00:00 | host2 | 6 |
+---------------------+-------+------------------------+
set time_zone='+23:00';
@@ -106,16 +106,16 @@ Affected Rows: 0
---- align to '+23:00' unix epoch 0 ----
SELECT ts, host, min(val) RANGE '1d' FROM host ALIGN '1d' ORDER BY host, ts;
+---------------------+-------+----------------------------------+
| ts | host | MIN(host.val) RANGE 1d FILL NULL |
+---------------------+-------+----------------------------------+
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+----------------------------------+
+---------------------+-------+------------------------+
| ts | host | MIN(host.val) RANGE 1d |
+---------------------+-------+------------------------+
| 2024-01-22T23:00:00 | host1 | 0 |
| 2024-01-23T23:00:00 | host1 | 1 |
| 2024-01-24T23:00:00 | host1 | 3 |
| 2024-01-22T23:00:00 | host2 | 4 |
| 2024-01-23T23:00:00 | host2 | 5 |
| 2024-01-24T23:00:00 | host2 | 7 |
+---------------------+-------+------------------------+
set time_zone='UTC';

View File

@@ -77,29 +77,6 @@ drop table table_without_partition;
Affected Rows: 0
CREATE TABLE not_supported_table_options_keys (
id INT UNSIGNED,
host STRING,
cpu DOUBLE,
disk FLOAT,
ts TIMESTAMP NOT NULL DEFAULT current_timestamp(),
TIME INDEX (ts),
PRIMARY KEY (id, host)
)
PARTITION ON COLUMNS (id) (
id < 5,
id >= 5 AND id < 9,
id >= 9
)
ENGINE=mito
WITH(
foo = 123,
ttl = '7d',
write_buffer_size = 1024
);
Error: 1004(InvalidArguments), Invalid table option key: foo
CREATE TABLE not_supported_table_storage_option (
id INT UNSIGNED,
host STRING,

View File

@@ -30,26 +30,6 @@ show create table table_without_partition;
drop table table_without_partition;
CREATE TABLE not_supported_table_options_keys (
id INT UNSIGNED,
host STRING,
cpu DOUBLE,
disk FLOAT,
ts TIMESTAMP NOT NULL DEFAULT current_timestamp(),
TIME INDEX (ts),
PRIMARY KEY (id, host)
)
PARTITION ON COLUMNS (id) (
id < 5,
id >= 5 AND id < 9,
id >= 9
)
ENGINE=mito
WITH(
foo = 123,
ttl = '7d',
write_buffer_size = 1024
);
CREATE TABLE not_supported_table_storage_option (
id INT UNSIGNED,
host STRING,