mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-04 12:22:55 +00:00
* rename symbol Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * handle binary op Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * update test results Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * Update src/query/src/log_query/planner.rs Co-authored-by: Yingwen <realevenyag@gmail.com> * fix format Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * reduce duplication Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: Yingwen <realevenyag@gmail.com>
457 lines
16 KiB
Rust
457 lines
16 KiB
Rust
// Copyright 2023 Greptime Team
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
use chrono::{DateTime, Datelike, Duration, NaiveDate, NaiveTime, TimeZone, Utc};
|
|
use serde::{Deserialize, Serialize};
|
|
use table::table_name::TableName;
|
|
|
|
use crate::error::{
|
|
EndBeforeStartSnafu, InvalidDateFormatSnafu, InvalidSpanFormatSnafu, InvalidTimeFilterSnafu,
|
|
Result,
|
|
};
|
|
|
|
/// GreptimeDB's log query request.
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
pub struct LogQuery {
|
|
// Global query parameters
|
|
/// A fully qualified table name to query logs from.
|
|
pub table: TableName,
|
|
/// Specifies the time range for the log query. See [`TimeFilter`] for more details.
|
|
pub time_filter: TimeFilter,
|
|
/// Controls row skipping and fetch on the result set.
|
|
pub limit: Limit,
|
|
/// Columns to return in the result set.
|
|
///
|
|
/// The columns can be either from the original log or derived from processing exprs.
|
|
/// Default (empty) means all columns.
|
|
///
|
|
/// TODO(ruihang): Do we need negative select?
|
|
pub columns: Vec<String>,
|
|
|
|
// Filters
|
|
/// Conjunction of filters to apply for the raw logs.
|
|
///
|
|
/// Filters here can apply to any LogExpr.
|
|
pub filters: Vec<ColumnFilters>,
|
|
/// Adjacent lines to return. Applies to all filters above.
|
|
///
|
|
/// TODO(ruihang): Do we need per-filter context?
|
|
pub context: Context,
|
|
|
|
// Processors
|
|
/// Expressions to calculate after filter.
|
|
pub exprs: Vec<LogExpr>,
|
|
}
|
|
|
|
/// Expression to calculate on log after filtering.
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub enum LogExpr {
|
|
NamedIdent(String),
|
|
PositionalIdent(usize),
|
|
Literal(String),
|
|
ScalarFunc {
|
|
name: String,
|
|
args: Vec<LogExpr>,
|
|
alias: Option<String>,
|
|
},
|
|
AggrFunc {
|
|
name: String,
|
|
args: Vec<LogExpr>,
|
|
/// Optional range function parameter. Stands for the time range for both step and align.
|
|
range: Option<String>,
|
|
by: Vec<LogExpr>,
|
|
alias: Option<String>,
|
|
},
|
|
Decompose {
|
|
expr: Box<LogExpr>,
|
|
/// JSON, CSV, etc.
|
|
schema: String,
|
|
/// Fields with type name to extract from the decomposed value.
|
|
fields: Vec<(String, String)>,
|
|
},
|
|
BinaryOp {
|
|
left: Box<LogExpr>,
|
|
op: BinaryOperator,
|
|
right: Box<LogExpr>,
|
|
},
|
|
Alias {
|
|
expr: Box<LogExpr>,
|
|
alias: String,
|
|
},
|
|
Filter {
|
|
filter: ColumnFilters,
|
|
},
|
|
}
|
|
|
|
impl Default for LogQuery {
|
|
fn default() -> Self {
|
|
Self {
|
|
table: TableName::new("", "", ""),
|
|
time_filter: Default::default(),
|
|
filters: vec![],
|
|
limit: Limit::default(),
|
|
context: Default::default(),
|
|
columns: vec![],
|
|
exprs: vec![],
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Represents a time range for log query.
|
|
///
|
|
/// This struct allows various formats to express a time range from the user side
|
|
/// for best flexibility:
|
|
/// - Only `start` is provided: the `start` string can be any valid "date" or vaguer
|
|
/// content. For example: "2024-12-01", "2024-12", "2024", etc. It will be treated
|
|
/// as an time range corresponding to the provided date. E.g., "2024-12-01" refers
|
|
/// to the entire 24 hours in that day. In this case, the `start` field cannot be a
|
|
/// timestamp (like "2024-12-01T12:00:00Z").
|
|
/// - Both `start` and `end` are provided: the `start` and `end` strings can be either
|
|
/// a date or a timestamp. The `end` field is exclusive (`[start, end)`). When
|
|
/// `start` is a date it implies the start of the day, and when `end` is a date it
|
|
/// implies the end of the day.
|
|
/// - `span` with `start` OR `end`: the `span` string can be any valid "interval"
|
|
/// For example: "1024s", "1 week", "1 month", etc. The `span` field is applied to
|
|
/// the `start` or `end` field to calculate the other one correspondingly. If `start`
|
|
/// is provided, `end` is calculated as `start + span` and vice versa.
|
|
/// - Only `span` is provided: the `span` string can be any valid "interval" as mentioned
|
|
/// above. In this case, the current time (on the server side) is considered as the `end`.
|
|
/// - All fields are provided: in this case, the `start` and `end` fields are considered
|
|
/// with higher priority, and the `span` field is ignored.
|
|
///
|
|
/// This struct doesn't require a timezone to be presented. When the timezone is not
|
|
/// provided, it will fill the default timezone with the same rules akin to other queries.
|
|
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
pub struct TimeFilter {
|
|
pub start: Option<String>,
|
|
pub end: Option<String>,
|
|
pub span: Option<String>,
|
|
}
|
|
|
|
impl TimeFilter {
|
|
/// Validate and canonicalize the time filter.
|
|
///
|
|
/// This function will try to fill the missing fields and convert all dates to timestamps
|
|
#[allow(unused_assignments)] // false positive
|
|
pub fn canonicalize(&mut self) -> Result<()> {
|
|
let mut start_dt = None;
|
|
let mut end_dt = None;
|
|
|
|
if self.start.is_some() && self.end.is_none() && self.span.is_none() {
|
|
// Only 'start' is provided
|
|
let s = self.start.as_ref().unwrap();
|
|
let (start, end_opt) = Self::parse_datetime(s)?;
|
|
if end_opt.is_none() {
|
|
return Err(InvalidTimeFilterSnafu {
|
|
filter: self.clone(),
|
|
}
|
|
.build());
|
|
}
|
|
start_dt = Some(start);
|
|
end_dt = end_opt;
|
|
} else if self.start.is_some() && self.end.is_some() {
|
|
// Both 'start' and 'end' are provided
|
|
let (start, _) = Self::parse_datetime(self.start.as_ref().unwrap())?;
|
|
let (end, _) = Self::parse_datetime(self.end.as_ref().unwrap())?;
|
|
start_dt = Some(start);
|
|
end_dt = Some(end);
|
|
} else if self.span.is_some() && (self.start.is_some() || self.end.is_some()) {
|
|
// 'span' with 'start' or 'end'
|
|
let span = Self::parse_span(self.span.as_ref().unwrap())?;
|
|
if self.start.is_some() {
|
|
let (start, _) = Self::parse_datetime(self.start.as_ref().unwrap())?;
|
|
let end = start + span;
|
|
start_dt = Some(start);
|
|
end_dt = Some(end);
|
|
} else {
|
|
let (end, _) = Self::parse_datetime(self.end.as_ref().unwrap())?;
|
|
let start = end - span;
|
|
start_dt = Some(start);
|
|
end_dt = Some(end);
|
|
}
|
|
} else if self.span.is_some() && self.start.is_none() && self.end.is_none() {
|
|
// Only 'span' is provided
|
|
let span = Self::parse_span(self.span.as_ref().unwrap())?;
|
|
let end = Utc::now();
|
|
let start = end - span;
|
|
start_dt = Some(start);
|
|
end_dt = Some(end);
|
|
} else if self.start.is_some() && self.span.is_some() && self.end.is_some() {
|
|
// All fields are provided; 'start' and 'end' take priority
|
|
let (start, _) = Self::parse_datetime(self.start.as_ref().unwrap())?;
|
|
let (end, _) = Self::parse_datetime(self.end.as_ref().unwrap())?;
|
|
start_dt = Some(start);
|
|
end_dt = Some(end);
|
|
} else {
|
|
// Exception
|
|
return Err(InvalidTimeFilterSnafu {
|
|
filter: self.clone(),
|
|
}
|
|
.build());
|
|
}
|
|
|
|
// Validate that end is after start
|
|
if let (Some(start), Some(end)) = (&start_dt, &end_dt) {
|
|
if end <= start {
|
|
return Err(EndBeforeStartSnafu {
|
|
start: start.to_rfc3339(),
|
|
end: end.to_rfc3339(),
|
|
}
|
|
.build());
|
|
}
|
|
}
|
|
|
|
// Update the fields with canonicalized timestamps
|
|
if let Some(start) = start_dt {
|
|
self.start = Some(start.to_rfc3339());
|
|
}
|
|
|
|
if let Some(end) = end_dt {
|
|
self.end = Some(end.to_rfc3339());
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Util function returns a start and optional end DateTime
|
|
fn parse_datetime(s: &str) -> Result<(DateTime<Utc>, Option<DateTime<Utc>>)> {
|
|
if let Ok(dt) = DateTime::parse_from_rfc3339(s) {
|
|
Ok((dt.with_timezone(&Utc), None))
|
|
} else {
|
|
let formats = ["%Y-%m-%d", "%Y-%m", "%Y"];
|
|
for format in &formats {
|
|
if let Ok(naive_date) = NaiveDate::parse_from_str(s, format) {
|
|
let start = Utc.from_utc_datetime(
|
|
&naive_date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap()),
|
|
);
|
|
let end = match *format {
|
|
"%Y-%m-%d" => start + Duration::days(1),
|
|
"%Y-%m" => {
|
|
let next_month = if naive_date.month() == 12 {
|
|
NaiveDate::from_ymd_opt(naive_date.year() + 1, 1, 1).unwrap()
|
|
} else {
|
|
NaiveDate::from_ymd_opt(
|
|
naive_date.year(),
|
|
naive_date.month() + 1,
|
|
1,
|
|
)
|
|
.unwrap()
|
|
};
|
|
Utc.from_utc_datetime(&next_month.and_hms_opt(0, 0, 0).unwrap())
|
|
}
|
|
"%Y" => {
|
|
let next_year =
|
|
NaiveDate::from_ymd_opt(naive_date.year() + 1, 1, 1).unwrap();
|
|
Utc.from_utc_datetime(&next_year.and_hms_opt(0, 0, 0).unwrap())
|
|
}
|
|
_ => unreachable!(),
|
|
};
|
|
return Ok((start, Some(end)));
|
|
}
|
|
}
|
|
Err(InvalidDateFormatSnafu {
|
|
input: s.to_string(),
|
|
}
|
|
.build())
|
|
}
|
|
}
|
|
|
|
/// Util function handles durations like "1 week", "1 month", etc (unimplemented).
|
|
fn parse_span(s: &str) -> Result<Duration> {
|
|
// Simplified parsing logic
|
|
if let Ok(seconds) = s.parse::<i64>() {
|
|
Ok(Duration::seconds(seconds))
|
|
} else {
|
|
Err(InvalidSpanFormatSnafu {
|
|
input: s.to_string(),
|
|
}
|
|
.build())
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Represents an expression with filters to query.
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ColumnFilters {
|
|
/// Expression to apply filters to. Can be a column reference or any other LogExpr.
|
|
pub expr: Box<LogExpr>,
|
|
/// Filters to apply to the expression result. Can be empty.
|
|
pub filters: Vec<ContentFilter>,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub enum ContentFilter {
|
|
// Search-based filters
|
|
/// Only match the exact content.
|
|
///
|
|
/// For example, if the content is "pale blue dot", the filter "pale" or "pale blue" will match.
|
|
Exact(String),
|
|
/// Match the content with a prefix.
|
|
///
|
|
/// For example, if the content is "error message", the filter "err" or "error mess" will match.
|
|
Prefix(String),
|
|
/// Match the content with a postfix. Similar to `Prefix`.
|
|
Postfix(String),
|
|
/// Match the content with a substring.
|
|
Contains(String),
|
|
/// Match the content with a regex pattern. The pattern should be a valid Rust regex.
|
|
Regex(String),
|
|
|
|
// Value-based filters
|
|
/// Content exists, a.k.a. not null.
|
|
Exist,
|
|
Between {
|
|
start: String,
|
|
end: String,
|
|
start_inclusive: bool,
|
|
end_inclusive: bool,
|
|
},
|
|
GreatThan {
|
|
value: String,
|
|
inclusive: bool,
|
|
},
|
|
LessThan {
|
|
value: String,
|
|
inclusive: bool,
|
|
},
|
|
In(Vec<String>),
|
|
IsTrue,
|
|
IsFalse,
|
|
|
|
// Compound filters
|
|
Compound(Vec<ContentFilter>, ConjunctionOperator),
|
|
}
|
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub enum ConjunctionOperator {
|
|
And,
|
|
Or,
|
|
}
|
|
|
|
/// Binary operators for LogExpr::BinaryOp.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub enum BinaryOperator {
|
|
// Comparison operators
|
|
Eq,
|
|
Ne,
|
|
Lt,
|
|
Le,
|
|
Gt,
|
|
Ge,
|
|
|
|
// Arithmetic operators
|
|
Plus,
|
|
Minus,
|
|
Multiply,
|
|
Divide,
|
|
Modulo,
|
|
|
|
// Logical operators
|
|
And,
|
|
Or,
|
|
}
|
|
|
|
/// Controls how many adjacent lines to return.
|
|
#[derive(Debug, Default, Serialize, Deserialize)]
|
|
pub enum Context {
|
|
#[default]
|
|
None,
|
|
/// Specify the number of lines before and after the matched line separately.
|
|
Lines(usize, usize),
|
|
/// Specify the number of seconds before and after the matched line occurred.
|
|
Seconds(usize, usize),
|
|
}
|
|
|
|
/// Represents limit and offset parameters for query pagination.
|
|
#[derive(Debug, Default, Serialize, Deserialize)]
|
|
pub struct Limit {
|
|
/// Optional number of items to skip before starting to return results
|
|
pub skip: Option<usize>,
|
|
/// Optional number of items to return after skipping
|
|
pub fetch: Option<usize>,
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::error::Error;
|
|
|
|
#[test]
|
|
fn test_canonicalize() {
|
|
// with 'start' only
|
|
let mut tf = TimeFilter {
|
|
start: Some("2023-10-01".to_string()),
|
|
end: None,
|
|
span: None,
|
|
};
|
|
tf.canonicalize().unwrap();
|
|
assert!(tf.end.is_some());
|
|
|
|
// with 'start' and 'span'
|
|
let mut tf = TimeFilter {
|
|
start: Some("2023-10-01T00:00:00Z".to_string()),
|
|
end: None,
|
|
span: Some("86400".to_string()), // 1 day in seconds
|
|
};
|
|
tf.canonicalize().unwrap();
|
|
assert_eq!(tf.end.as_ref().unwrap(), "2023-10-02T00:00:00+00:00");
|
|
|
|
// with 'end' and 'span'
|
|
let mut tf = TimeFilter {
|
|
start: None,
|
|
end: Some("2023-10-02T00:00:00Z".to_string()),
|
|
span: Some("86400".to_string()), // 1 day in seconds
|
|
};
|
|
tf.canonicalize().unwrap();
|
|
assert_eq!(tf.start.as_ref().unwrap(), "2023-10-01T00:00:00+00:00");
|
|
|
|
// with both 'start' and 'end'
|
|
let mut tf = TimeFilter {
|
|
start: Some("2023-10-01T00:00:00Z".to_string()),
|
|
end: Some("2023-10-02T00:00:00Z".to_string()),
|
|
span: None,
|
|
};
|
|
tf.canonicalize().unwrap();
|
|
assert_eq!(tf.start.as_ref().unwrap(), "2023-10-01T00:00:00+00:00");
|
|
assert_eq!(tf.end.as_ref().unwrap(), "2023-10-02T00:00:00+00:00");
|
|
|
|
// with invalid date format
|
|
let mut tf = TimeFilter {
|
|
start: Some("invalid-date".to_string()),
|
|
end: None,
|
|
span: None,
|
|
};
|
|
let result = tf.canonicalize();
|
|
assert!(matches!(result, Err(Error::InvalidDateFormat { .. })));
|
|
|
|
// with missing 'start' and 'end'
|
|
let mut tf = TimeFilter {
|
|
start: None,
|
|
end: None,
|
|
span: None,
|
|
};
|
|
let result = tf.canonicalize();
|
|
assert!(matches!(result, Err(Error::InvalidTimeFilter { .. })));
|
|
|
|
// 'end' is before 'start'
|
|
let mut tf = TimeFilter {
|
|
start: Some("2023-10-02T00:00:00Z".to_string()),
|
|
end: Some("2023-10-01T00:00:00Z".to_string()),
|
|
span: None,
|
|
};
|
|
let result = tf.canonicalize();
|
|
assert!(matches!(result, Err(Error::EndBeforeStart { .. })));
|
|
}
|
|
}
|