feat: define basic structures and implement TimeFilter (#5086)

* feat: define basic structures and implement TimeFilter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* document column filter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* define context

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* change variable name to avoid typo checker

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* change error referring style

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refine context definition

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2024-12-04 15:39:33 +08:00
committed by GitHub
parent ff4c153d4b
commit 5092f5f451
6 changed files with 413 additions and 0 deletions

11
Cargo.lock generated
View File

@@ -6091,6 +6091,17 @@ version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "log-query"
version = "0.11.0"
dependencies = [
"chrono",
"common-error",
"common-macro",
"snafu 0.8.5",
"table",
]
[[package]]
name = "log-store"
version = "0.11.0"

View File

@@ -40,6 +40,7 @@ members = [
"src/flow",
"src/frontend",
"src/index",
"src/log-query",
"src/log-store",
"src/meta-client",
"src/meta-srv",

15
src/log-query/Cargo.toml Normal file
View File

@@ -0,0 +1,15 @@
[package]
name = "log-query"
version.workspace = true
edition.workspace = true
license.workspace = true
[lints]
workspace = true
[dependencies]
chrono.workspace = true
common-error.workspace = true
common-macro.workspace = true
snafu.workspace = true
table.workspace = true

View File

@@ -0,0 +1,46 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use common_error::ext::ErrorExt;
use common_macro::stack_trace_debug;
use snafu::Snafu;
use crate::TimeFilter;
#[derive(Snafu)]
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display("Invalid time filter: {filter:?}"))]
InvalidTimeFilter { filter: TimeFilter },
#[snafu(display("Invalid date format: {input}"))]
InvalidDateFormat { input: String },
#[snafu(display("Invalid span format: {input}"))]
InvalidSpanFormat { input: String },
#[snafu(display("End time {end} is before start time {start}"))]
EndBeforeStart { start: String, end: String },
}
impl ErrorExt for Error {
fn as_any(&self) -> &dyn Any {
self
}
}
pub type Result<T> = std::result::Result<T, Error>;

18
src/log-query/src/lib.rs Normal file
View File

@@ -0,0 +1,18 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod error;
mod log_query;
pub use log_query::*;

View File

@@ -0,0 +1,322 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use chrono::{DateTime, Datelike, Duration, NaiveDate, NaiveTime, TimeZone, Utc};
use table::table_name::TableName;
use crate::error::{
EndBeforeStartSnafu, InvalidDateFormatSnafu, InvalidSpanFormatSnafu, InvalidTimeFilterSnafu,
Result,
};
/// GreptimeDB's log query request.
pub struct LogQuery {
/// A fully qualified table name to query logs from.
pub table_name: TableName,
/// Specifies the time range for the log query. See [`TimeFilter`] for more details.
pub time_filter: TimeFilter,
/// Columns with filters to query.
pub columns: Vec<ColumnFilters>,
/// Maximum number of logs to return. If not provided, it will return all matched logs.
pub limit: Option<usize>,
/// Adjacent lines to return.
pub context: Context,
}
/// Represents a time range for log query.
///
/// This struct allows various formats to express a time range from the user side
/// for best flexibility:
/// - Only `start` is provided: the `start` string can be any valid "date" or vaguer
/// content. For example: "2024-12-01", "2024-12", "2024", etc. It will be treated
/// as an time range corresponding to the provided date. E.g., "2024-12-01" refers
/// to the entire 24 hours in that day. In this case, the `start` field cannot be a
/// timestamp (like "2024-12-01T12:00:00Z").
/// - Both `start` and `end` are provided: the `start` and `end` strings can be either
/// a date or a timestamp. The `end` field is exclusive (`[start, end)`). When
/// `start` is a date it implies the start of the day, and when `end` is a date it
/// implies the end of the day.
/// - `span` with `start` OR `end`: the `span` string can be any valid "interval"
/// For example: "1024s", "1 week", "1 month", etc. The `span` field is applied to
/// the `start` or `end` field to calculate the other one correspondingly. If `start`
/// is provided, `end` is calculated as `start + span` and vice versa.
/// - Only `span` is provided: the `span` string can be any valid "interval" as mentioned
/// above. In this case, the current time (on the server side) is considered as the `end`.
/// - All fields are provided: in this case, the `start` and `end` fields are considered
/// with higher priority, and the `span` field is ignored.
///
/// This struct doesn't require a timezone to be presented. When the timezone is not
/// provided, it will fill the default timezone with the same rules akin to other queries.
#[derive(Debug, Clone)]
pub struct TimeFilter {
pub start: Option<String>,
pub end: Option<String>,
pub span: Option<String>,
}
impl TimeFilter {
/// Validate and canonicalize the time filter.
///
/// This function will try to fill the missing fields and convert all dates to timestamps
// false positive
#[allow(unused_assignments)]
pub fn canonicalize(&mut self) -> Result<()> {
let mut start_dt = None;
let mut end_dt = None;
if self.start.is_some() && self.end.is_none() && self.span.is_none() {
// Only 'start' is provided
let s = self.start.as_ref().unwrap();
let (start, end_opt) = Self::parse_datetime(s)?;
if end_opt.is_none() {
return Err(InvalidTimeFilterSnafu {
filter: self.clone(),
}
.build());
}
start_dt = Some(start);
end_dt = end_opt;
} else if self.start.is_some() && self.end.is_some() {
// Both 'start' and 'end' are provided
let (start, _) = Self::parse_datetime(self.start.as_ref().unwrap())?;
let (end, _) = Self::parse_datetime(self.end.as_ref().unwrap())?;
start_dt = Some(start);
end_dt = Some(end);
} else if self.span.is_some() && (self.start.is_some() || self.end.is_some()) {
// 'span' with 'start' or 'end'
let span = Self::parse_span(self.span.as_ref().unwrap())?;
if self.start.is_some() {
let (start, _) = Self::parse_datetime(self.start.as_ref().unwrap())?;
let end = start + span;
start_dt = Some(start);
end_dt = Some(end);
} else {
let (end, _) = Self::parse_datetime(self.end.as_ref().unwrap())?;
let start = end - span;
start_dt = Some(start);
end_dt = Some(end);
}
} else if self.span.is_some() && self.start.is_none() && self.end.is_none() {
// Only 'span' is provided
let span = Self::parse_span(self.span.as_ref().unwrap())?;
let end = Utc::now();
let start = end - span;
start_dt = Some(start);
end_dt = Some(end);
} else if self.start.is_some() && self.span.is_some() && self.end.is_some() {
// All fields are provided; 'start' and 'end' take priority
let (start, _) = Self::parse_datetime(self.start.as_ref().unwrap())?;
let (end, _) = Self::parse_datetime(self.end.as_ref().unwrap())?;
start_dt = Some(start);
end_dt = Some(end);
} else {
// Exception
return Err(InvalidTimeFilterSnafu {
filter: self.clone(),
}
.build());
}
// Validate that end is after start
if let (Some(start), Some(end)) = (&start_dt, &end_dt) {
if end <= start {
return Err(EndBeforeStartSnafu {
start: start.to_rfc3339(),
end: end.to_rfc3339(),
}
.build());
}
}
// Update the fields with canonicalized timestamps
if let Some(start) = start_dt {
self.start = Some(start.to_rfc3339());
}
if let Some(end) = end_dt {
self.end = Some(end.to_rfc3339());
}
Ok(())
}
/// Util function returns a start and optional end DateTime
fn parse_datetime(s: &str) -> Result<(DateTime<Utc>, Option<DateTime<Utc>>)> {
if let Ok(dt) = DateTime::parse_from_rfc3339(s) {
Ok((dt.with_timezone(&Utc), None))
} else {
let formats = ["%Y-%m-%d", "%Y-%m", "%Y"];
for format in &formats {
if let Ok(naive_date) = NaiveDate::parse_from_str(s, format) {
let start = Utc.from_utc_datetime(
&naive_date.and_time(NaiveTime::from_hms_opt(0, 0, 0).unwrap()),
);
let end = match *format {
"%Y-%m-%d" => start + Duration::days(1),
"%Y-%m" => {
let next_month = if naive_date.month() == 12 {
NaiveDate::from_ymd_opt(naive_date.year() + 1, 1, 1).unwrap()
} else {
NaiveDate::from_ymd_opt(
naive_date.year(),
naive_date.month() + 1,
1,
)
.unwrap()
};
Utc.from_utc_datetime(&next_month.and_hms_opt(0, 0, 0).unwrap())
}
"%Y" => {
let next_year =
NaiveDate::from_ymd_opt(naive_date.year() + 1, 1, 1).unwrap();
Utc.from_utc_datetime(&next_year.and_hms_opt(0, 0, 0).unwrap())
}
_ => unreachable!(),
};
return Ok((start, Some(end)));
}
}
Err(InvalidDateFormatSnafu {
input: s.to_string(),
}
.build())
}
}
/// Util function handles durations like "1 week", "1 month", etc (unimplemented).
fn parse_span(s: &str) -> Result<Duration> {
// Simplified parsing logic
if let Ok(seconds) = s.parse::<i64>() {
Ok(Duration::seconds(seconds))
} else {
Err(InvalidSpanFormatSnafu {
input: s.to_string(),
}
.build())
}
}
}
/// Represents a column with filters to query.
pub struct ColumnFilters {
/// Case-sensitive column name to query.
pub column_name: String,
/// Filters to apply to the column. Can be empty.
pub filters: Vec<ContentFilter>,
}
pub enum ContentFilter {
/// Only match the exact content.
///
/// For example, if the content is "pale blue dot", the filter "pale" or "pale blue" will match.
Exact(String),
/// Match the content with a prefix.
///
/// For example, if the content is "error message", the filter "err" or "error mess" will match.
Prefix(String),
/// Match the content with a postfix. Similar to `Prefix`.
Postfix(String),
/// Match the content with a substring.
Contains(String),
/// Match the content with a regex pattern. The pattern should be a valid Rust regex.
Regex(String),
Compound(Vec<ContentFilter>, BinaryOperator),
}
pub enum BinaryOperator {
And,
Or,
}
/// Controls how many adjacent lines to return.
pub enum Context {
None,
/// Specify the number of lines before and after the matched line separately.
Lines(usize, usize),
/// Specify the number of seconds before and after the matched line occurred.
Seconds(usize, usize),
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::Error;
#[test]
fn test_canonicalize() {
// with 'start' only
let mut tf = TimeFilter {
start: Some("2023-10-01".to_string()),
end: None,
span: None,
};
tf.canonicalize().unwrap();
assert!(tf.end.is_some());
// with 'start' and 'span'
let mut tf = TimeFilter {
start: Some("2023-10-01T00:00:00Z".to_string()),
end: None,
span: Some("86400".to_string()), // 1 day in seconds
};
tf.canonicalize().unwrap();
assert_eq!(tf.end.as_ref().unwrap(), "2023-10-02T00:00:00+00:00");
// with 'end' and 'span'
let mut tf = TimeFilter {
start: None,
end: Some("2023-10-02T00:00:00Z".to_string()),
span: Some("86400".to_string()), // 1 day in seconds
};
tf.canonicalize().unwrap();
assert_eq!(tf.start.as_ref().unwrap(), "2023-10-01T00:00:00+00:00");
// with both 'start' and 'end'
let mut tf = TimeFilter {
start: Some("2023-10-01T00:00:00Z".to_string()),
end: Some("2023-10-02T00:00:00Z".to_string()),
span: None,
};
tf.canonicalize().unwrap();
assert_eq!(tf.start.as_ref().unwrap(), "2023-10-01T00:00:00+00:00");
assert_eq!(tf.end.as_ref().unwrap(), "2023-10-02T00:00:00+00:00");
// with invalid date format
let mut tf = TimeFilter {
start: Some("invalid-date".to_string()),
end: None,
span: None,
};
let result = tf.canonicalize();
assert!(matches!(result, Err(Error::InvalidDateFormat { .. })));
// with missing 'start' and 'end'
let mut tf = TimeFilter {
start: None,
end: None,
span: None,
};
let result = tf.canonicalize();
assert!(matches!(result, Err(Error::InvalidTimeFilter { .. })));
// 'end' is before 'start'
let mut tf = TimeFilter {
start: Some("2023-10-02T00:00:00Z".to_string()),
end: Some("2023-10-01T00:00:00Z".to_string()),
span: None,
};
let result = tf.canonicalize();
assert!(matches!(result, Err(Error::EndBeforeStart { .. })));
}
}