mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-07 13:52:59 +00:00
refactor: replace pipeline::value with vrl::value (#6430)
* chore: pass compile Signed-off-by: shuiyisong <xixing.sys@gmail.com> * fix: default case Signed-off-by: shuiyisong <xixing.sys@gmail.com> * fix: test Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: remove and move code Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: remove serde_value to vrlvalue conversion Signed-off-by: shuiyisong <xixing.sys@gmail.com> * refactor: optimized vrl value related code Signed-off-by: shuiyisong <xixing.sys@gmail.com> * refactor: loki transform using vrl Signed-off-by: shuiyisong <xixing.sys@gmail.com> * fix: remove unused error Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: fix cr issue Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: use from_utf8_lossy_owned Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: CR issue Signed-off-by: shuiyisong <xixing.sys@gmail.com> --------- Signed-off-by: shuiyisong <xixing.sys@gmail.com>
This commit is contained in:
48
Cargo.lock
generated
48
Cargo.lock
generated
@@ -2996,9 +2996,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crc"
|
||||
version = "3.2.1"
|
||||
version = "3.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
|
||||
checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
|
||||
dependencies = [
|
||||
"crc-catalog",
|
||||
]
|
||||
@@ -3830,7 +3830,7 @@ dependencies = [
|
||||
"jsonb",
|
||||
"num",
|
||||
"num-traits",
|
||||
"ordered-float 3.9.2",
|
||||
"ordered-float 4.3.0",
|
||||
"paste",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -4151,12 +4151,16 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "domain"
|
||||
version = "0.10.4"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c84070523f8ba0f9127ff156920f27eb27b302b425efe60bf5f41ec244d1c60"
|
||||
checksum = "a11dd7f04a6a6d2aea0153c6e31f5ea7af8b2efdf52cdaeea7a9a592c7fefef9"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"bytes",
|
||||
"domain-macros",
|
||||
"futures-util",
|
||||
"hashbrown 0.14.5",
|
||||
"log",
|
||||
"moka",
|
||||
"octseq",
|
||||
"rand 0.8.5",
|
||||
@@ -4167,6 +4171,17 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "domain-macros"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e197fdfd2cdb5fdeb7f8ddcf3aed5d5d04ecde2890d448b14ffb716f7376b70"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dotenv"
|
||||
version = "0.15.0"
|
||||
@@ -8566,17 +8581,6 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "3.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "4.3.0"
|
||||
@@ -8584,6 +8588,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44d501f1a72f71d3c063a6bbc8f7271fa73aa09fe5d6283b6571e2ed176a2537"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9120,6 +9126,7 @@ dependencies = [
|
||||
"moka",
|
||||
"once_cell",
|
||||
"operator",
|
||||
"ordered-float 4.3.0",
|
||||
"paste",
|
||||
"prometheus",
|
||||
"query",
|
||||
@@ -11368,6 +11375,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"urlencoding",
|
||||
"uuid",
|
||||
"vrl",
|
||||
"zstd 0.13.2",
|
||||
]
|
||||
|
||||
@@ -13030,9 +13038,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.44.2"
|
||||
version = "1.45.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
|
||||
checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"bytes",
|
||||
@@ -13988,9 +13996,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "vrl"
|
||||
version = "0.24.0"
|
||||
version = "0.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9ceadaa40aef567a26079ff014ca7a567ba85344f1b81090b5ec7d7bb16a219"
|
||||
checksum = "4f49394b948406ea1564aa00152e011d87a38ad35d277ebddda257a9ee39c419"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"aes-siv",
|
||||
|
||||
@@ -167,6 +167,7 @@ opentelemetry-proto = { version = "0.27", features = [
|
||||
"with-serde",
|
||||
"logs",
|
||||
] }
|
||||
ordered-float = { version = "4.3", features = ["serde"] }
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
|
||||
paste = "1.0"
|
||||
@@ -228,6 +229,7 @@ tracing-appender = "0.2"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
|
||||
typetag = "0.2"
|
||||
uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
|
||||
vrl = "0.25"
|
||||
zstd = "0.13"
|
||||
# DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ greptime-proto.workspace = true
|
||||
jsonb.workspace = true
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
ordered-float = { version = "3.0", features = ["serde"] }
|
||||
ordered-float.workspace = true
|
||||
paste.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -47,6 +47,7 @@ lazy_static.workspace = true
|
||||
moka = { workspace = true, features = ["sync"] }
|
||||
once_cell.workspace = true
|
||||
operator.workspace = true
|
||||
ordered-float.workspace = true
|
||||
paste.workspace = true
|
||||
prometheus.workspace = true
|
||||
query.workspace = true
|
||||
@@ -59,7 +60,7 @@ sql.workspace = true
|
||||
table.workspace = true
|
||||
tokio.workspace = true
|
||||
urlencoding = "2.1"
|
||||
vrl = "0.24"
|
||||
vrl.workspace = true
|
||||
yaml-rust = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -16,23 +16,21 @@ use std::sync::Arc;
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use pipeline::error::Result;
|
||||
use pipeline::{
|
||||
json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext, SchemaInfo,
|
||||
};
|
||||
use serde_json::{Deserializer, Value};
|
||||
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext, SchemaInfo};
|
||||
use serde_json::Deserializer;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
fn processor_mut(
|
||||
pipeline: Arc<Pipeline>,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
schema_info: &mut SchemaInfo,
|
||||
input_values: Vec<Value>,
|
||||
input_values: Vec<VrlValue>,
|
||||
) -> Result<Vec<greptime_proto::v1::Row>> {
|
||||
let mut result = Vec::with_capacity(input_values.len());
|
||||
|
||||
for v in input_values {
|
||||
let payload = json_to_map(v).unwrap();
|
||||
let r = pipeline
|
||||
.exec_mut(payload, pipeline_ctx, schema_info)?
|
||||
.exec_mut(v, pipeline_ctx, schema_info)?
|
||||
.into_transformed()
|
||||
.expect("expect transformed result ");
|
||||
result.push(r.0);
|
||||
@@ -237,7 +235,7 @@ transform:
|
||||
fn criterion_benchmark(c: &mut Criterion) {
|
||||
let input_value_str = include_str!("./data.log");
|
||||
let input_value = Deserializer::from_str(input_value_str)
|
||||
.into_iter::<serde_json::Value>()
|
||||
.into_iter::<VrlValue>()
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.unwrap();
|
||||
let pipeline = prepare_pipeline();
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use common_telemetry::debug;
|
||||
use snafu::OptionExt;
|
||||
use vrl::value::Value as VrlValue;
|
||||
use yaml_rust::Yaml;
|
||||
|
||||
use crate::error::{
|
||||
@@ -21,7 +22,7 @@ use crate::error::{
|
||||
ValueRequiredForDispatcherRuleSnafu,
|
||||
};
|
||||
use crate::etl::ctx_req::TABLE_SUFFIX_KEY;
|
||||
use crate::Value;
|
||||
use crate::etl::value::yaml_to_vrl_value;
|
||||
|
||||
const FIELD: &str = "field";
|
||||
const PIPELINE: &str = "pipeline";
|
||||
@@ -62,7 +63,7 @@ pub(crate) struct Dispatcher {
|
||||
/// name
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(crate) struct Rule {
|
||||
pub value: Value,
|
||||
pub value: VrlValue,
|
||||
pub table_suffix: String,
|
||||
pub pipeline: Option<String>,
|
||||
}
|
||||
@@ -90,7 +91,8 @@ impl TryFrom<&Yaml> for Dispatcher {
|
||||
if rule[VALUE].is_badvalue() {
|
||||
ValueRequiredForDispatcherRuleSnafu.fail()?;
|
||||
}
|
||||
let value = Value::try_from(&rule[VALUE])?;
|
||||
|
||||
let value = yaml_to_vrl_value(&rule[VALUE])?;
|
||||
|
||||
Ok(Rule {
|
||||
value,
|
||||
@@ -109,8 +111,9 @@ impl TryFrom<&Yaml> for Dispatcher {
|
||||
|
||||
impl Dispatcher {
|
||||
/// execute dispatcher and returns matched rule if any
|
||||
pub(crate) fn exec(&self, data: &Value) -> Option<&Rule> {
|
||||
if let Some(value) = data.get(&self.field) {
|
||||
pub(crate) fn exec(&self, data: &VrlValue) -> Option<&Rule> {
|
||||
let data = data.as_object()?;
|
||||
if let Some(value) = data.get(self.field.as_str()) {
|
||||
for rule in &self.rules {
|
||||
if rule.value == *value {
|
||||
return Some(rule);
|
||||
|
||||
@@ -62,7 +62,7 @@ pub enum Error {
|
||||
#[snafu(display("Processor {processor}: expect string value, but got {v:?}"))]
|
||||
ProcessorExpectString {
|
||||
processor: String,
|
||||
v: crate::Value,
|
||||
v: vrl::value::Value,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
@@ -229,12 +229,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get timestamp"))]
|
||||
DateFailedToGetTimestamp {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid Pattern: '{s}'. {detail}"))]
|
||||
DissectInvalidPattern {
|
||||
s: String,
|
||||
@@ -372,13 +366,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Url decoding error"))]
|
||||
UrlEncodingDecode {
|
||||
#[snafu(source)]
|
||||
error: std::string::FromUtf8Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Invalid transform on_failure value: {value}"))]
|
||||
TransformOnFailureInvalidValue {
|
||||
value: String,
|
||||
@@ -433,17 +420,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Null type not supported"))]
|
||||
CoerceUnsupportedNullType {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Null type not supported when to coerce '{ty}' type"))]
|
||||
CoerceUnsupportedNullTypeTo {
|
||||
ty: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Type: {ty} value not supported for Epoch"))]
|
||||
CoerceUnsupportedEpochType {
|
||||
ty: String,
|
||||
@@ -556,12 +532,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Input value must be an object"))]
|
||||
InputValueMustBeObject {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Column options error"))]
|
||||
ColumnOptions {
|
||||
#[snafu(source)]
|
||||
@@ -575,12 +545,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Unsupported number type: {value:?}"))]
|
||||
UnsupportedNumberType {
|
||||
value: serde_json::Number,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Failed to parse json"))]
|
||||
JsonParse {
|
||||
#[snafu(source)]
|
||||
@@ -694,14 +658,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Float is not a number: {}", input_float))]
|
||||
FloatNaN {
|
||||
input_float: f64,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid timestamp value: {}", input))]
|
||||
InvalidTimestamp {
|
||||
input: String,
|
||||
@@ -709,14 +665,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to convert bytes to utf8"))]
|
||||
BytesToUtf8 {
|
||||
#[snafu(source)]
|
||||
error: std::string::FromUtf8Error,
|
||||
#[snafu(display("Invalid epoch value '{}' for resolution '{}'", value, resolution))]
|
||||
InvalidEpochForResolution {
|
||||
value: i64,
|
||||
resolution: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Please don't use regex in Vrl script"))]
|
||||
VrlRegexValue {
|
||||
#[snafu(implicit)]
|
||||
@@ -808,6 +763,21 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Float is NaN"))]
|
||||
FloatIsNan {
|
||||
#[snafu(source)]
|
||||
error: ordered_float::FloatIsNan,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported type in pipeline: {}", ty))]
|
||||
UnsupportedTypeInPipeline {
|
||||
ty: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -858,7 +828,6 @@ impl ErrorExt for Error {
|
||||
| DateParseTimezone { .. }
|
||||
| DateParse { .. }
|
||||
| DateFailedToGetLocalTimezone { .. }
|
||||
| DateFailedToGetTimestamp { .. }
|
||||
| DissectInvalidPattern { .. }
|
||||
| DissectEmptyPattern { .. }
|
||||
| DissectSplitExceedsInput { .. }
|
||||
@@ -881,7 +850,6 @@ impl ErrorExt for Error {
|
||||
| RegexNoValidPattern { .. }
|
||||
| UrlEncodingInvalidMethod { .. }
|
||||
| DigestPatternInvalid { .. }
|
||||
| UrlEncodingDecode { .. }
|
||||
| TransformOnFailureInvalidValue { .. }
|
||||
| TransformElementMustBeMap { .. }
|
||||
| TransformFieldMustBeSet { .. }
|
||||
@@ -891,8 +859,6 @@ impl ErrorExt for Error {
|
||||
| TransformTimestampIndexCount { .. }
|
||||
| AutoTransformOneTimestamp { .. }
|
||||
| InvalidVersionNumber { .. }
|
||||
| CoerceUnsupportedNullType { .. }
|
||||
| CoerceUnsupportedNullTypeTo { .. }
|
||||
| CoerceUnsupportedEpochType { .. }
|
||||
| CoerceStringToType { .. }
|
||||
| CoerceJsonTypeTo { .. }
|
||||
@@ -908,10 +874,8 @@ impl ErrorExt for Error {
|
||||
| ValueYamlKeyMustBeString { .. }
|
||||
| YamlLoad { .. }
|
||||
| YamlParse { .. }
|
||||
| InputValueMustBeObject { .. }
|
||||
| ColumnOptions { .. }
|
||||
| UnsupportedIndexType { .. }
|
||||
| UnsupportedNumberType { .. }
|
||||
| IdentifyPipelineColumnTypeMismatch { .. }
|
||||
| JsonParse { .. }
|
||||
| JsonPathParse { .. }
|
||||
@@ -924,12 +888,14 @@ impl ErrorExt for Error {
|
||||
| InvalidTableSuffixTemplate { .. }
|
||||
| CompileVrl { .. }
|
||||
| ExecuteVrl { .. }
|
||||
| FloatNaN { .. }
|
||||
| BytesToUtf8 { .. }
|
||||
| InvalidTimestamp { .. }
|
||||
| VrlRegexValue { .. }
|
||||
| VrlReturnValue { .. }
|
||||
| PipelineMissing { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
FloatIsNan { .. }
|
||||
| InvalidEpochForResolution { .. }
|
||||
| UnsupportedTypeInPipeline { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,21 +19,19 @@ pub mod processor;
|
||||
pub mod transform;
|
||||
pub mod value;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use api::v1::Row;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use itertools::Itertools;
|
||||
use processor::{Processor, Processors};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use transform::Transforms;
|
||||
use value::Value;
|
||||
use vrl::core::Value as VrlValue;
|
||||
use yaml_rust::{Yaml, YamlLoader};
|
||||
|
||||
use crate::dispatcher::{Dispatcher, Rule};
|
||||
use crate::error::{
|
||||
AutoTransformOneTimestampSnafu, Error, InputValueMustBeObjectSnafu, IntermediateKeyIndexSnafu,
|
||||
InvalidVersionNumberSnafu, Result, YamlLoadSnafu, YamlParseSnafu,
|
||||
AutoTransformOneTimestampSnafu, Error, IntermediateKeyIndexSnafu, InvalidVersionNumberSnafu,
|
||||
Result, YamlLoadSnafu, YamlParseSnafu,
|
||||
};
|
||||
use crate::etl::processor::ProcessorKind;
|
||||
use crate::etl::transform::transformer::greptime::values_to_row;
|
||||
@@ -228,7 +226,7 @@ impl DispatchedTo {
|
||||
#[derive(Debug)]
|
||||
pub enum PipelineExecOutput {
|
||||
Transformed(TransformedOutput),
|
||||
DispatchedTo(DispatchedTo, Value),
|
||||
DispatchedTo(DispatchedTo, VrlValue),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -261,40 +259,6 @@ impl PipelineExecOutput {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn json_to_map(val: serde_json::Value) -> Result<Value> {
|
||||
match val {
|
||||
serde_json::Value::Object(map) => {
|
||||
let mut intermediate_state = BTreeMap::new();
|
||||
for (k, v) in map {
|
||||
intermediate_state.insert(k, Value::try_from(v)?);
|
||||
}
|
||||
Ok(Value::Map(intermediate_state.into()))
|
||||
}
|
||||
_ => InputValueMustBeObjectSnafu.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn json_array_to_map(val: Vec<serde_json::Value>) -> Result<Vec<Value>> {
|
||||
val.into_iter().map(json_to_map).collect()
|
||||
}
|
||||
|
||||
pub fn simd_json_to_map(val: simd_json::OwnedValue) -> Result<Value> {
|
||||
match val {
|
||||
simd_json::OwnedValue::Object(map) => {
|
||||
let mut intermediate_state = BTreeMap::new();
|
||||
for (k, v) in map.into_iter() {
|
||||
intermediate_state.insert(k, Value::try_from(v)?);
|
||||
}
|
||||
Ok(Value::Map(intermediate_state.into()))
|
||||
}
|
||||
_ => InputValueMustBeObjectSnafu.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn simd_json_array_to_map(val: Vec<simd_json::OwnedValue>) -> Result<Vec<Value>> {
|
||||
val.into_iter().map(simd_json_to_map).collect()
|
||||
}
|
||||
|
||||
impl Pipeline {
|
||||
fn is_v1(&self) -> bool {
|
||||
self.doc_version == PipelineDocVersion::V1
|
||||
@@ -302,7 +266,7 @@ impl Pipeline {
|
||||
|
||||
pub fn exec_mut(
|
||||
&self,
|
||||
mut val: Value,
|
||||
mut val: VrlValue,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
schema_info: &mut SchemaInfo,
|
||||
) -> Result<PipelineExecOutput> {
|
||||
@@ -409,11 +373,14 @@ macro_rules! setup_pipeline {
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::Rows;
|
||||
use greptime_proto::v1::value::ValueData;
|
||||
use greptime_proto::v1::{self, ColumnDataType, SemanticType};
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::KeyString;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -454,7 +421,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let payload = json_to_map(input_value).unwrap();
|
||||
let payload = input_value.into();
|
||||
let result = pipeline
|
||||
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -515,9 +482,10 @@ transform:
|
||||
&pipeline_param,
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
let mut payload = BTreeMap::new();
|
||||
payload.insert("message".to_string(), Value::String(message));
|
||||
let payload = Value::Map(payload.into());
|
||||
let payload = VrlValue::Object(BTreeMap::from([(
|
||||
KeyString::from("message"),
|
||||
VrlValue::Bytes(Bytes::from(message)),
|
||||
)]));
|
||||
|
||||
let result = pipeline
|
||||
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
|
||||
@@ -613,7 +581,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let payload = json_to_map(input_value).unwrap();
|
||||
let payload = input_value.into();
|
||||
let result = pipeline
|
||||
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -666,7 +634,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
let schema = pipeline.schemas().unwrap().clone();
|
||||
let result = json_to_map(input_value).unwrap();
|
||||
let result = input_value.into();
|
||||
|
||||
let row = pipeline
|
||||
.exec_mut(result, &pipeline_ctx, &mut schema_info)
|
||||
@@ -732,7 +700,7 @@ transform:
|
||||
assert_eq!(
|
||||
dispatcher.rules[0],
|
||||
crate::dispatcher::Rule {
|
||||
value: Value::String("http".to_string()),
|
||||
value: VrlValue::Bytes(Bytes::from("http")),
|
||||
table_suffix: "http_events".to_string(),
|
||||
pipeline: None
|
||||
}
|
||||
@@ -741,7 +709,7 @@ transform:
|
||||
assert_eq!(
|
||||
dispatcher.rules[1],
|
||||
crate::dispatcher::Rule {
|
||||
value: Value::String("database".to_string()),
|
||||
value: VrlValue::Bytes(Bytes::from("database")),
|
||||
table_suffix: "db_events".to_string(),
|
||||
pipeline: Some("database_pipeline".to_string()),
|
||||
}
|
||||
|
||||
@@ -19,10 +19,10 @@ use ahash::{HashMap, HashMapExt};
|
||||
use api::v1::{RowInsertRequest, RowInsertRequests, Rows};
|
||||
use session::context::{QueryContext, QueryContextRef};
|
||||
use snafu::OptionExt;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{Result, ValueMustBeMapSnafu};
|
||||
use crate::tablesuffix::TableSuffixTemplate;
|
||||
use crate::Value;
|
||||
|
||||
const GREPTIME_AUTO_CREATE_TABLE: &str = "greptime_auto_create_table";
|
||||
const GREPTIME_TTL: &str = "greptime_ttl";
|
||||
@@ -86,32 +86,34 @@ impl ContextOpt {
|
||||
}
|
||||
|
||||
impl ContextOpt {
|
||||
pub fn from_pipeline_map_to_opt(pipeline_map: &mut Value) -> Result<Self> {
|
||||
let pipeline_map = pipeline_map.as_map_mut().context(ValueMustBeMapSnafu)?;
|
||||
pub fn from_pipeline_map_to_opt(value: &mut VrlValue) -> Result<Self> {
|
||||
let map = value.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
let mut opt = Self::default();
|
||||
for k in PIPELINE_HINT_KEYS {
|
||||
if let Some(v) = pipeline_map.remove(k) {
|
||||
if let Some(v) = map.remove(k) {
|
||||
let v = v.to_string_lossy().to_string();
|
||||
match k {
|
||||
GREPTIME_AUTO_CREATE_TABLE => {
|
||||
opt.auto_create_table = Some(v.to_str_value());
|
||||
opt.auto_create_table = Some(v);
|
||||
}
|
||||
GREPTIME_TTL => {
|
||||
opt.ttl = Some(v.to_str_value());
|
||||
opt.ttl = Some(v);
|
||||
}
|
||||
GREPTIME_APPEND_MODE => {
|
||||
opt.append_mode = Some(v.to_str_value());
|
||||
opt.append_mode = Some(v);
|
||||
}
|
||||
GREPTIME_MERGE_MODE => {
|
||||
opt.merge_mode = Some(v.to_str_value());
|
||||
opt.merge_mode = Some(v);
|
||||
}
|
||||
GREPTIME_PHYSICAL_TABLE => {
|
||||
opt.physical_table = Some(v.to_str_value());
|
||||
opt.physical_table = Some(v);
|
||||
}
|
||||
GREPTIME_SKIP_WAL => {
|
||||
opt.skip_wal = Some(v.to_str_value());
|
||||
opt.skip_wal = Some(v);
|
||||
}
|
||||
GREPTIME_TABLE_SUFFIX => {
|
||||
opt.table_suffix = Some(v.to_str_value());
|
||||
opt.table_suffix = Some(v);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -123,7 +125,7 @@ impl ContextOpt {
|
||||
pub(crate) fn resolve_table_suffix(
|
||||
&mut self,
|
||||
table_suffix: Option<&TableSuffixTemplate>,
|
||||
pipeline_map: &Value,
|
||||
pipeline_map: &VrlValue,
|
||||
) -> Option<String> {
|
||||
self.table_suffix
|
||||
.take()
|
||||
|
||||
@@ -28,7 +28,7 @@ pub mod regex;
|
||||
pub mod select;
|
||||
pub mod simple_extract;
|
||||
pub mod urlencoding;
|
||||
pub mod vrl;
|
||||
pub mod vrl_processor;
|
||||
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -47,6 +47,7 @@ use letter::LetterProcessor;
|
||||
use regex::RegexProcessor;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use urlencoding::UrlEncodingProcessor;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{
|
||||
Error, FailedParseFieldFromStringSnafu, FieldMustBeTypeSnafu, InvalidFieldRenameSnafu,
|
||||
@@ -57,8 +58,7 @@ use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::processor::json_parse::JsonParseProcessor;
|
||||
use crate::etl::processor::select::SelectProcessor;
|
||||
use crate::etl::processor::simple_extract::SimpleExtractProcessor;
|
||||
use crate::etl::processor::vrl::VrlProcessor;
|
||||
use crate::Value;
|
||||
use crate::etl::processor::vrl_processor::VrlProcessor;
|
||||
|
||||
const FIELD_NAME: &str = "field";
|
||||
const FIELDS_NAME: &str = "fields";
|
||||
@@ -123,7 +123,7 @@ pub trait Processor: std::fmt::Debug + Send + Sync + 'static {
|
||||
fn ignore_missing(&self) -> bool;
|
||||
|
||||
/// Execute the processor on a vector which be preprocessed by the pipeline
|
||||
fn exec_mut(&self, val: Value) -> Result<Value>;
|
||||
fn exec_mut(&self, val: VrlValue) -> Result<VrlValue>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -224,7 +224,7 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind> {
|
||||
json_parse::PROCESSOR_JSON_PARSE => {
|
||||
ProcessorKind::JsonParse(JsonParseProcessor::try_from(value)?)
|
||||
}
|
||||
vrl::PROCESSOR_VRL => ProcessorKind::Vrl(VrlProcessor::try_from(value)?),
|
||||
vrl_processor::PROCESSOR_VRL => ProcessorKind::Vrl(VrlProcessor::try_from(value)?),
|
||||
select::PROCESSOR_SELECT => ProcessorKind::Select(SelectProcessor::try_from(value)?),
|
||||
_ => return UnsupportedProcessorSnafu { processor: str_key }.fail(),
|
||||
};
|
||||
|
||||
@@ -18,20 +18,22 @@
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use ordered_float::NotNan;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use urlencoding::decode;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
CmcdMissingKeySnafu, CmcdMissingValueSnafu, Error, FailedToParseFloatKeySnafu,
|
||||
FailedToParseIntKeySnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
FailedToParseIntKeySnafu, FloatIsNanSnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_CMCD: &str = "cmcd";
|
||||
|
||||
@@ -76,42 +78,43 @@ const CMCD_KEYS: [&str; 18] = [
|
||||
];
|
||||
|
||||
/// function to resolve CMCD_KEY_BS | CMCD_KEY_SU
|
||||
fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<Value> {
|
||||
Ok(Value::Boolean(true))
|
||||
fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<VrlValue> {
|
||||
Ok(VrlValue::Boolean(true))
|
||||
}
|
||||
|
||||
/// function to resolve CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP | CMCD_KEY_RTP | CMCD_KEY_TB
|
||||
fn br_tb(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
|
||||
fn br_tb(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
|
||||
let v = v.context(CmcdMissingValueSnafu { k, s })?;
|
||||
let val: i64 = v
|
||||
.parse()
|
||||
.context(FailedToParseIntKeySnafu { key: k, value: v })?;
|
||||
Ok(Value::Int64(val))
|
||||
Ok(VrlValue::Integer(val))
|
||||
}
|
||||
|
||||
/// function to resolve CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID | CMCD_KEY_V
|
||||
fn cid_v(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
|
||||
fn cid_v(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
|
||||
let v = v.context(CmcdMissingValueSnafu { k, s })?;
|
||||
Ok(Value::String(v.to_string()))
|
||||
Ok(VrlValue::Bytes(Bytes::from(v.to_string())))
|
||||
}
|
||||
|
||||
/// function to resolve CMCD_KEY_NOR
|
||||
fn nor(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
|
||||
fn nor(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
|
||||
let v = v.context(CmcdMissingValueSnafu { k, s })?;
|
||||
let val = match decode(v) {
|
||||
Ok(val) => val.to_string(),
|
||||
Err(_) => v.to_string(),
|
||||
};
|
||||
Ok(Value::String(val))
|
||||
Ok(VrlValue::Bytes(Bytes::from(val)))
|
||||
}
|
||||
|
||||
/// function to resolve CMCD_KEY_PR
|
||||
fn pr(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
|
||||
fn pr(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
|
||||
let v = v.context(CmcdMissingValueSnafu { k, s })?;
|
||||
let val: f64 = v
|
||||
.parse()
|
||||
.context(FailedToParseFloatKeySnafu { key: k, value: v })?;
|
||||
Ok(Value::Float64(val))
|
||||
let val = NotNan::new(val).context(FloatIsNanSnafu)?;
|
||||
Ok(VrlValue::Float(val))
|
||||
}
|
||||
|
||||
/// Common Media Client Data Specification:
|
||||
@@ -156,11 +159,11 @@ pub struct CmcdProcessor {
|
||||
}
|
||||
|
||||
impl CmcdProcessor {
|
||||
fn generate_key(prefix: &str, key: &str) -> String {
|
||||
format!("{}_{}", prefix, key)
|
||||
fn generate_key(prefix: &str, key: &str) -> KeyString {
|
||||
KeyString::from(format!("{}_{}", prefix, key))
|
||||
}
|
||||
|
||||
fn parse(&self, name: &str, value: &str) -> Result<BTreeMap<String, Value>> {
|
||||
fn parse(&self, name: &str, value: &str) -> Result<BTreeMap<KeyString, VrlValue>> {
|
||||
let mut working_set = BTreeMap::new();
|
||||
|
||||
let parts = value.split(',');
|
||||
@@ -250,16 +253,18 @@ impl Processor for CmcdProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let name = field.input_field();
|
||||
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(name) {
|
||||
Some(Value::String(s)) => {
|
||||
let results = self.parse(field.target_or_input_field(), s)?;
|
||||
val.extend(results.into())?;
|
||||
Some(VrlValue::Bytes(s)) => {
|
||||
let s = String::from_utf8_lossy(s);
|
||||
let results = self.parse(field.target_or_input_field(), &s)?;
|
||||
|
||||
val.extend(results);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind().to_string(),
|
||||
@@ -288,7 +293,6 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_cmcd() {
|
||||
@@ -297,23 +301,23 @@ mod tests {
|
||||
"sid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
|
||||
vec![(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
)],
|
||||
),
|
||||
(
|
||||
"br%3D3200%2Cbs%2Cd%3D4004%2Cmtp%3D25400%2Cot%3Dv%2Crtp%3D15000%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22%2Ctb%3D6000",
|
||||
vec![
|
||||
("prefix_bs", Value::Boolean(true)),
|
||||
("prefix_ot", Value::String("v".into())),
|
||||
("prefix_rtp", Value::Int64(15000)),
|
||||
("prefix_br", Value::Int64(3200)),
|
||||
("prefix_tb", Value::Int64(6000)),
|
||||
("prefix_d", Value::Int64(4004)),
|
||||
("prefix_bs", VrlValue::Boolean(true)),
|
||||
("prefix_ot", VrlValue::Bytes(Bytes::from("v"))),
|
||||
("prefix_rtp", VrlValue::Integer(15000)),
|
||||
("prefix_br", VrlValue::Integer(3200)),
|
||||
("prefix_tb", VrlValue::Integer(6000)),
|
||||
("prefix_d", VrlValue::Integer(4004)),
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
("prefix_mtp", Value::Int64(25400)),
|
||||
("prefix_mtp", VrlValue::Integer(25400)),
|
||||
],
|
||||
),
|
||||
(
|
||||
@@ -322,16 +326,16 @@ mod tests {
|
||||
vec![
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
("prefix_rtp", Value::Int64(15000)),
|
||||
("prefix_rtp", VrlValue::Integer(15000)),
|
||||
],
|
||||
),
|
||||
(
|
||||
"bs%2Csu",
|
||||
vec![
|
||||
("prefix_su", Value::Boolean(true)),
|
||||
("prefix_bs", Value::Boolean(true)),
|
||||
("prefix_su", VrlValue::Boolean(true)),
|
||||
("prefix_bs", VrlValue::Boolean(true)),
|
||||
],
|
||||
),
|
||||
(
|
||||
@@ -346,7 +350,7 @@ mod tests {
|
||||
// "prefix_com.examplemyStringKey",
|
||||
// Value::String("\"myStringValue\"".into()),
|
||||
// ),
|
||||
("prefix_d", Value::Int64(4004)),
|
||||
("prefix_d", VrlValue::Integer(4004)),
|
||||
],
|
||||
),
|
||||
(
|
||||
@@ -354,11 +358,11 @@ mod tests {
|
||||
vec![
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
(
|
||||
"prefix_nor",
|
||||
Value::String("\"../300kbps/segment35.m4v\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"../300kbps/segment35.m4v\"")),
|
||||
|
||||
),
|
||||
],
|
||||
@@ -366,56 +370,56 @@ mod tests {
|
||||
(
|
||||
"nrr%3D%2212323-48763%22%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
|
||||
vec![
|
||||
("prefix_nrr", Value::String("\"12323-48763\"".into())),
|
||||
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
"nor%3D%22..%252F300kbps%252Ftrack.m4v%22%2Cnrr%3D%2212323-48763%22%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
|
||||
vec![
|
||||
("prefix_nrr", Value::String("\"12323-48763\"".into())),
|
||||
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
(
|
||||
"prefix_nor",
|
||||
Value::String("\"../300kbps/track.m4v\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"../300kbps/track.m4v\"")),
|
||||
),
|
||||
],
|
||||
),
|
||||
(
|
||||
"bl%3D21300%2Cbr%3D3200%2Cbs%2Ccid%3D%22faec5fc2-ac30-11eabb37-0242ac130002%22%2Cd%3D4004%2Cdl%3D18500%2Cmtp%3D48100%2Cnor%3D%22..%252F300kbps%252Ftrack.m4v%22%2Cnrr%3D%2212323-48763%22%2Cot%3Dv%2Cpr%3D1.08%2Crtp%3D12000%2Csf%3Dd%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22%2Cst%3Dv%2Csu%2Ctb%3D6000",
|
||||
vec![
|
||||
("prefix_bl", Value::Int64(21300)),
|
||||
("prefix_bs", Value::Boolean(true)),
|
||||
("prefix_st", Value::String("v".into())),
|
||||
("prefix_ot", Value::String("v".into())),
|
||||
("prefix_bl", VrlValue::Integer(21300)),
|
||||
("prefix_bs", VrlValue::Boolean(true)),
|
||||
("prefix_st", VrlValue::Bytes(Bytes::from("v"))),
|
||||
("prefix_ot", VrlValue::Bytes(Bytes::from("v"))),
|
||||
(
|
||||
"prefix_sid",
|
||||
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
|
||||
),
|
||||
("prefix_tb", Value::Int64(6000)),
|
||||
("prefix_d", Value::Int64(4004)),
|
||||
("prefix_tb", VrlValue::Integer(6000)),
|
||||
("prefix_d", VrlValue::Integer(4004)),
|
||||
(
|
||||
"prefix_cid",
|
||||
Value::String("\"faec5fc2-ac30-11eabb37-0242ac130002\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"faec5fc2-ac30-11eabb37-0242ac130002\"")),
|
||||
),
|
||||
("prefix_mtp", Value::Int64(48100)),
|
||||
("prefix_rtp", Value::Int64(12000)),
|
||||
("prefix_mtp", VrlValue::Integer(48100)),
|
||||
("prefix_rtp", VrlValue::Integer(12000)),
|
||||
(
|
||||
"prefix_nor",
|
||||
Value::String("\"../300kbps/track.m4v\"".into()),
|
||||
VrlValue::Bytes(Bytes::from("\"../300kbps/track.m4v\"")),
|
||||
),
|
||||
("prefix_sf", Value::String("d".into())),
|
||||
("prefix_br", Value::Int64(3200)),
|
||||
("prefix_nrr", Value::String("\"12323-48763\"".into())),
|
||||
("prefix_pr", Value::Float64(1.08)),
|
||||
("prefix_su", Value::Boolean(true)),
|
||||
("prefix_dl", Value::Int64(18500)),
|
||||
("prefix_sf", VrlValue::Bytes(Bytes::from("d"))),
|
||||
("prefix_br", VrlValue::Integer(3200)),
|
||||
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
|
||||
("prefix_pr", VrlValue::Float(NotNan::new(1.08).unwrap())),
|
||||
("prefix_su", VrlValue::Boolean(true)),
|
||||
("prefix_dl", VrlValue::Integer(18500)),
|
||||
],
|
||||
),
|
||||
];
|
||||
@@ -432,8 +436,8 @@ mod tests {
|
||||
|
||||
let expected = vec
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect::<BTreeMap<String, Value>>();
|
||||
.map(|(k, v)| (KeyString::from(k.to_string()), v))
|
||||
.collect::<BTreeMap<KeyString, VrlValue>>();
|
||||
|
||||
let actual = processor.parse("prefix", &decoded).unwrap();
|
||||
assert_eq!(actual, expected);
|
||||
|
||||
@@ -20,17 +20,19 @@ use csv::{ReaderBuilder, Trim};
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
use itertools::Itertools;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
CsvNoRecordSnafu, CsvQuoteNameSnafu, CsvReadSnafu, CsvSeparatorNameSnafu, Error,
|
||||
KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_CSV: &str = "csv";
|
||||
|
||||
@@ -60,8 +62,8 @@ pub struct CsvProcessor {
|
||||
|
||||
impl CsvProcessor {
|
||||
// process the csv format string to a map with target_fields as keys
|
||||
fn process(&self, val: &str) -> Result<BTreeMap<String, Value>> {
|
||||
let mut reader = self.reader.from_reader(val.as_bytes());
|
||||
fn process(&self, val: &[u8]) -> Result<BTreeMap<KeyString, VrlValue>> {
|
||||
let mut reader = self.reader.from_reader(val);
|
||||
|
||||
if let Some(result) = reader.records().next() {
|
||||
let record: csv::StringRecord = result.context(CsvReadSnafu)?;
|
||||
@@ -71,17 +73,18 @@ impl CsvProcessor {
|
||||
.iter()
|
||||
.zip_longest(record.iter())
|
||||
.filter_map(|zipped| match zipped {
|
||||
Both(target_field, val) => {
|
||||
Some((target_field.clone(), Value::String(val.into())))
|
||||
}
|
||||
Both(target_field, val) => Some((
|
||||
KeyString::from(target_field.clone()),
|
||||
VrlValue::Bytes(Bytes::from(val.to_string())),
|
||||
)),
|
||||
// if target fields are more than extracted fields, fill the rest with empty value
|
||||
Left(target_field) => {
|
||||
let value = self
|
||||
.empty_value
|
||||
.as_ref()
|
||||
.map(|s| Value::String(s.clone()))
|
||||
.unwrap_or(Value::Null);
|
||||
Some((target_field.clone(), value))
|
||||
.map(|s| VrlValue::Bytes(Bytes::from(s.clone())))
|
||||
.unwrap_or(VrlValue::Null);
|
||||
Some((KeyString::from(target_field.clone()), value))
|
||||
}
|
||||
// if extracted fields are more than target fields, ignore the rest
|
||||
Right(_) => None,
|
||||
@@ -190,16 +193,18 @@ impl Processor for CsvProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let name = field.input_field();
|
||||
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
match val.get(name) {
|
||||
Some(Value::String(v)) => {
|
||||
Some(VrlValue::Bytes(v)) => {
|
||||
let results = self.process(v)?;
|
||||
val.extend(results.into())?;
|
||||
val.extend(results);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind().to_string(),
|
||||
@@ -238,11 +243,11 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor.process("1,2").unwrap();
|
||||
let result = processor.process(b"1,2").unwrap();
|
||||
|
||||
let values: BTreeMap<String, Value> = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
("b".into(), Value::String("2".into())),
|
||||
let values: BTreeMap<KeyString, VrlValue> = [
|
||||
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
|
||||
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
@@ -264,12 +269,12 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor.process("1,2").unwrap();
|
||||
let result = processor.process(b"1,2").unwrap();
|
||||
|
||||
let values: BTreeMap<String, Value> = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
("b".into(), Value::String("2".into())),
|
||||
("c".into(), Value::Null),
|
||||
let values: BTreeMap<KeyString, VrlValue> = [
|
||||
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
|
||||
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
|
||||
(KeyString::from("c"), VrlValue::Null),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
@@ -289,12 +294,15 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor.process("1,2").unwrap();
|
||||
let result = processor.process(b"1,2").unwrap();
|
||||
|
||||
let values: BTreeMap<String, Value> = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
("b".into(), Value::String("2".into())),
|
||||
("c".into(), Value::String("default".into())),
|
||||
let values: BTreeMap<KeyString, VrlValue> = [
|
||||
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
|
||||
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
|
||||
(
|
||||
KeyString::from("c"),
|
||||
VrlValue::Bytes(Bytes::from("default")),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
@@ -315,11 +323,11 @@ mod tests {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor.process("1,2").unwrap();
|
||||
let result = processor.process(b"1,2").unwrap();
|
||||
|
||||
let values: BTreeMap<String, Value> = [
|
||||
("a".into(), Value::String("1".into())),
|
||||
("b".into(), Value::String("2".into())),
|
||||
let values: BTreeMap<KeyString, VrlValue> = [
|
||||
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
|
||||
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
@@ -14,22 +14,22 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono::{DateTime, NaiveDateTime};
|
||||
use chrono::{DateTime, NaiveDateTime, Utc};
|
||||
use chrono_tz::Tz;
|
||||
use lazy_static::lazy_static;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
DateFailedToGetLocalTimezoneSnafu, DateFailedToGetTimestampSnafu, DateParseSnafu,
|
||||
DateParseTimezoneSnafu, Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorFailedToParseStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
DateFailedToGetLocalTimezoneSnafu, DateParseSnafu, DateParseTimezoneSnafu, Error,
|
||||
KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorFailedToParseStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor, FIELDS_NAME,
|
||||
FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
|
||||
pub(crate) const PROCESSOR_DATE: &str = "date";
|
||||
|
||||
@@ -162,7 +162,7 @@ pub struct DateProcessor {
|
||||
}
|
||||
|
||||
impl DateProcessor {
|
||||
fn parse(&self, val: &str) -> Result<Timestamp> {
|
||||
fn parse(&self, val: &str) -> Result<DateTime<Utc>> {
|
||||
let mut tz = Tz::UTC;
|
||||
if let Some(timezone) = &self.timezone {
|
||||
tz = timezone.parse::<Tz>().context(DateParseTimezoneSnafu {
|
||||
@@ -171,8 +171,8 @@ impl DateProcessor {
|
||||
}
|
||||
|
||||
for fmt in self.formats.iter() {
|
||||
if let Ok(ns) = try_parse(val, fmt, tz) {
|
||||
return Ok(Timestamp::Nanosecond(ns));
|
||||
if let Ok(utc_ts) = try_parse(val, fmt, tz) {
|
||||
return Ok(utc_ts);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,16 +193,19 @@ impl Processor for DateProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
match val.get(index) {
|
||||
Some(Value::String(s)) => {
|
||||
let timestamp = self.parse(s)?;
|
||||
Some(VrlValue::Bytes(s)) => {
|
||||
let timestamp = self.parse(String::from_utf8_lossy(s).as_ref())?;
|
||||
let output_key = field.target_or_input_field();
|
||||
val.insert(output_key.to_string(), Value::Timestamp(timestamp))?;
|
||||
val.insert(KeyString::from(output_key), VrlValue::Timestamp(timestamp));
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind().to_string(),
|
||||
@@ -224,21 +227,19 @@ impl Processor for DateProcessor {
|
||||
}
|
||||
}
|
||||
|
||||
/// try to parse val with timezone first, if failed, parse without timezone
|
||||
fn try_parse(val: &str, fmt: &str, tz: Tz) -> Result<i64> {
|
||||
// parse the datetime with timezone info
|
||||
// if failed, try to parse using naive date time and add tz info
|
||||
// finally convert the datetime to utc
|
||||
fn try_parse(val: &str, fmt: &str, tz: Tz) -> Result<DateTime<Utc>> {
|
||||
if let Ok(dt) = DateTime::parse_from_str(val, fmt) {
|
||||
Ok(dt
|
||||
.timestamp_nanos_opt()
|
||||
.context(DateFailedToGetTimestampSnafu)?)
|
||||
Ok(dt.to_utc())
|
||||
} else {
|
||||
let dt = NaiveDateTime::parse_from_str(val, fmt)
|
||||
.context(DateParseSnafu { value: val })?
|
||||
.and_local_timezone(tz)
|
||||
.single()
|
||||
.context(DateFailedToGetLocalTimezoneSnafu)?;
|
||||
Ok(dt
|
||||
.timestamp_nanos_opt()
|
||||
.context(DateFailedToGetTimestampSnafu)?)
|
||||
Ok(dt.to_utc())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -21,15 +21,17 @@
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_DECOLORIZE: &str = "decolorize";
|
||||
|
||||
@@ -43,13 +45,15 @@ pub struct DecolorizeProcessor {
|
||||
}
|
||||
|
||||
impl DecolorizeProcessor {
|
||||
fn process_string(&self, val: &str) -> Result<Value> {
|
||||
Ok(Value::String(RE.replace_all(val, "").into_owned()))
|
||||
fn process_string(&self, val: &str) -> Result<VrlValue> {
|
||||
Ok(VrlValue::Bytes(Bytes::from(
|
||||
RE.replace_all(val, "").to_string(),
|
||||
)))
|
||||
}
|
||||
|
||||
fn process(&self, val: &Value) -> Result<Value> {
|
||||
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
match val {
|
||||
Value::String(val) => self.process_string(val),
|
||||
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
|
||||
_ => ProcessorExpectStringSnafu {
|
||||
processor: PROCESSOR_DECOLORIZE,
|
||||
v: val.clone(),
|
||||
@@ -101,11 +105,12 @@ impl crate::etl::processor::Processor for DecolorizeProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -117,7 +122,7 @@ impl crate::etl::processor::Processor for DecolorizeProcessor {
|
||||
Some(v) => {
|
||||
let result = self.process(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_index), result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -136,16 +141,19 @@ mod tests {
|
||||
ignore_missing: false,
|
||||
};
|
||||
|
||||
let val = Value::String("\x1b[32mGreen\x1b[0m".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("\x1b[32mGreen\x1b[0m".to_string()));
|
||||
let result = processor.process(&val).unwrap();
|
||||
assert_eq!(result, Value::String("Green".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("Green".to_string())));
|
||||
|
||||
let val = Value::String("Plain text".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("Plain text".to_string()));
|
||||
let result = processor.process(&val).unwrap();
|
||||
assert_eq!(result, Value::String("Plain text".to_string()));
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("Plain text".to_string()))
|
||||
);
|
||||
|
||||
let val = Value::String("\x1b[46mfoo\x1b[0m bar".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("\x1b[46mfoo\x1b[0m bar".to_string()));
|
||||
let result = processor.process(&val).unwrap();
|
||||
assert_eq!(result, Value::String("foo bar".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("foo bar".to_string())));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,16 +23,17 @@ use std::borrow::Cow;
|
||||
|
||||
use regex::Regex;
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
DigestPatternInvalidSnafu, Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_DIGEST: &str = "digest";
|
||||
|
||||
@@ -100,7 +101,7 @@ impl DigestProcessor {
|
||||
re.replace_all(val, "").to_string()
|
||||
}
|
||||
|
||||
fn process_string(&self, val: &str) -> Result<Value> {
|
||||
fn process_string(&self, val: &str) -> Result<VrlValue> {
|
||||
let mut input = Cow::from(val);
|
||||
for pattern in &self.patterns {
|
||||
if let Cow::Owned(new_string) = pattern.replace_all(&input, "") {
|
||||
@@ -108,12 +109,12 @@ impl DigestProcessor {
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Value::String(input.into_owned()))
|
||||
Ok(VrlValue::Bytes(Bytes::from(input.to_string())))
|
||||
}
|
||||
|
||||
fn process(&self, val: &Value) -> Result<Value> {
|
||||
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
match val {
|
||||
Value::String(val) => self.process_string(val),
|
||||
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
|
||||
_ => ProcessorExpectStringSnafu {
|
||||
processor: PROCESSOR_DIGEST,
|
||||
v: val.clone(),
|
||||
@@ -200,11 +201,12 @@ impl crate::etl::processor::Processor for DigestProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -216,7 +218,7 @@ impl crate::etl::processor::Processor for DigestProcessor {
|
||||
Some(v) => {
|
||||
let result = self.process(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_index), result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -237,24 +239,31 @@ mod tests {
|
||||
patterns: vec![PresetPattern::Ip.regex()],
|
||||
};
|
||||
|
||||
let input = Value::String("192.168.1.1".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("192.168.1.1".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
let input = Value::String("192.168.1.1:8080".to_string());
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
let input = VrlValue::Bytes(Bytes::from("192.168.1.1:8080".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"[2001:0db8:85a3:0000:0000:8a2e:0370:7334]".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("not an ip".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("not an ip".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("not an ip".to_string()));
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("not an ip".to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -265,29 +274,40 @@ mod tests {
|
||||
patterns: vec![PresetPattern::Uuid.regex()],
|
||||
};
|
||||
// UUID v4
|
||||
let input = Value::String("123e4567-e89b-12d3-a456-426614174000".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"123e4567-e89b-12d3-a456-426614174000".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// UUID v1
|
||||
let input = Value::String("6ba7b810-9dad-11d1-80b4-00c04fd430c8".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"6ba7b810-9dad-11d1-80b4-00c04fd430c8".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// UUID v5
|
||||
let input = Value::String("886313e1-3b8a-5372-9b90-0c9aee199e5d".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"886313e1-3b8a-5372-9b90-0c9aee199e5d".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// UUID with uppercase letters
|
||||
let input = Value::String("A987FBC9-4BED-3078-CF07-9141BA07C9F3".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from(
|
||||
"A987FBC9-4BED-3078-CF07-9141BA07C9F3".to_string(),
|
||||
));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// Negative case
|
||||
let input = Value::String("not a uuid".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("not a uuid".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("not a uuid".to_string()));
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("not a uuid".to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -299,45 +319,48 @@ mod tests {
|
||||
};
|
||||
|
||||
// Basic brackets
|
||||
let input = Value::String("[content]".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("[content]".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("(content)".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("(content)".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// Chinese brackets
|
||||
let input = Value::String("「content」".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("「content」".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("『content』".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("『content』".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("【content】".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("【content】".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// Unmatched/unclosed brackets should not match
|
||||
let input = Value::String("[content".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("[content".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("[content".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("[content".to_string())));
|
||||
|
||||
let input = Value::String("content]".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("content]".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("content]".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("content]".to_string())));
|
||||
|
||||
// Bad case
|
||||
let input = Value::String("[content}".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("[content}".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
// Negative case
|
||||
let input = Value::String("no brackets".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("no brackets".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("no brackets".to_string()));
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("no brackets".to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -348,16 +371,19 @@ mod tests {
|
||||
patterns: vec![PresetPattern::Quoted.regex()],
|
||||
};
|
||||
|
||||
let input = Value::String("\"quoted content\"".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("\"quoted content\"".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("no quotes".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("no quotes".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("no quotes".to_string()));
|
||||
let input = Value::String("".to_string());
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("no quotes".to_string()))
|
||||
);
|
||||
let input = VrlValue::Bytes(Bytes::from("".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -368,15 +394,18 @@ mod tests {
|
||||
patterns: vec![Regex::new(r"\d+").unwrap()],
|
||||
};
|
||||
|
||||
let input = Value::String("12345".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("12345".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
|
||||
let input = Value::String("no digits".to_string());
|
||||
let input = VrlValue::Bytes(Bytes::from("no digits".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("no digits".to_string()));
|
||||
let input = Value::String("".to_string());
|
||||
assert_eq!(
|
||||
result,
|
||||
VrlValue::Bytes(Bytes::from("no digits".to_string()))
|
||||
);
|
||||
let input = VrlValue::Bytes(Bytes::from("".to_string()));
|
||||
let result = processor.process(&input).unwrap();
|
||||
assert_eq!(result, Value::String("".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,8 @@ use std::ops::Deref;
|
||||
use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
|
||||
use itertools::Itertools;
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
DissectAppendOrderAlreadySetSnafu, DissectConsecutiveNamesSnafu, DissectEmptyPatternSnafu,
|
||||
@@ -24,13 +26,13 @@ use crate::error::{
|
||||
DissectNoMatchingPatternSnafu, DissectOrderOnlyAppendModifierSnafu,
|
||||
DissectOrderOnlyAppendSnafu, DissectSplitExceedsInputSnafu, DissectSplitNotMatchInputSnafu,
|
||||
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_parse_string, yaml_parse_strings, yaml_string,
|
||||
Processor, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERNS_NAME, PATTERN_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_DISSECT: &str = "dissect";
|
||||
|
||||
@@ -421,7 +423,7 @@ impl DissectProcessor {
|
||||
name: &'a Name,
|
||||
value: String,
|
||||
appends: &mut HashMap<&'a String, Vec<(String, u32)>>,
|
||||
map: &mut Vec<(&'a String, Value)>,
|
||||
map: &mut Vec<(&'a String, VrlValue)>,
|
||||
) {
|
||||
match name.start_modifier {
|
||||
Some(StartModifier::NamedSkip) => {
|
||||
@@ -438,12 +440,16 @@ impl DissectProcessor {
|
||||
// because transform can know the key name
|
||||
}
|
||||
None => {
|
||||
map.push((&name.name, Value::String(value)));
|
||||
map.push((&name.name, VrlValue::Bytes(Bytes::from(value))));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn process_pattern(&self, chs: &[char], pattern: &Pattern) -> Result<Vec<(String, Value)>> {
|
||||
fn process_pattern(
|
||||
&self,
|
||||
chs: &[char],
|
||||
pattern: &Pattern,
|
||||
) -> Result<Vec<(KeyString, VrlValue)>> {
|
||||
let mut map = Vec::new();
|
||||
let mut pos = 0;
|
||||
|
||||
@@ -523,14 +529,17 @@ impl DissectProcessor {
|
||||
for (name, mut values) in appends {
|
||||
values.sort_by(|a, b| a.1.cmp(&b.1));
|
||||
let value = values.into_iter().map(|(a, _)| a).join(sep);
|
||||
map.push((name, Value::String(value)));
|
||||
map.push((name, VrlValue::Bytes(Bytes::from(value))));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(map.into_iter().map(|(k, v)| (k.to_string(), v)).collect())
|
||||
Ok(map
|
||||
.into_iter()
|
||||
.map(|(k, v)| (KeyString::from(k.clone()), v))
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn process(&self, val: &str) -> Result<Vec<(String, Value)>> {
|
||||
fn process(&self, val: &str) -> Result<Vec<(KeyString, VrlValue)>> {
|
||||
let chs = val.chars().collect::<Vec<char>>();
|
||||
|
||||
for pattern in &self.patterns {
|
||||
@@ -600,17 +609,18 @@ impl Processor for DissectProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::String(val_str)) => {
|
||||
let r = self.process(val_str)?;
|
||||
Some(VrlValue::Bytes(val_str)) => {
|
||||
let r = self.process(String::from_utf8_lossy(val_str).as_ref())?;
|
||||
for (k, v) in r {
|
||||
val.insert(k, v)?;
|
||||
val.insert(k, v);
|
||||
}
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -639,17 +649,18 @@ fn is_valid_char(ch: char) -> bool {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ahash::HashMap;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use super::{DissectProcessor, EndModifier, Name, Part, StartModifier};
|
||||
use crate::etl::processor::dissect::Pattern;
|
||||
use crate::etl::value::Value;
|
||||
|
||||
fn assert(pattern_str: &str, input: &str, expected: HashMap<String, Value>) {
|
||||
fn assert(pattern_str: &str, input: &str, expected: HashMap<KeyString, VrlValue>) {
|
||||
let chs = input.chars().collect::<Vec<char>>();
|
||||
let patterns: Vec<Pattern> = vec![pattern_str.parse().unwrap()];
|
||||
|
||||
let processor = DissectProcessor::default();
|
||||
let result: HashMap<String, Value> = processor
|
||||
let result: HashMap<KeyString, VrlValue> = processor
|
||||
.process_pattern(&chs, &patterns[0])
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
@@ -991,8 +1002,13 @@ mod tests {
|
||||
("httpversion", "1.0"),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
|
||||
.collect::<HashMap<String, Value>>();
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
KeyString::from(k.to_string()),
|
||||
VrlValue::Bytes(Bytes::from(v.to_string())),
|
||||
)
|
||||
})
|
||||
.collect::<HashMap<KeyString, VrlValue>>();
|
||||
|
||||
{
|
||||
// pattern start with Name
|
||||
@@ -1032,9 +1048,12 @@ mod tests {
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(pattern, input, expected)| {
|
||||
let map = expected
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
|
||||
let map = expected.into_iter().map(|(k, v)| {
|
||||
(
|
||||
KeyString::from(k.to_string()),
|
||||
VrlValue::Bytes(Bytes::from(v.to_string())),
|
||||
)
|
||||
});
|
||||
(pattern, input, map)
|
||||
});
|
||||
|
||||
@@ -1042,7 +1061,7 @@ mod tests {
|
||||
assert(
|
||||
pattern_str,
|
||||
input,
|
||||
expected.collect::<HashMap<String, Value>>(),
|
||||
expected.collect::<HashMap<KeyString, VrlValue>>(),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1063,9 +1082,12 @@ mod tests {
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(pattern, input, expected)| {
|
||||
let map = expected
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
|
||||
let map = expected.into_iter().map(|(k, v)| {
|
||||
(
|
||||
KeyString::from(k.to_string()),
|
||||
VrlValue::Bytes(Bytes::from(v.to_string())),
|
||||
)
|
||||
});
|
||||
(pattern, input, map)
|
||||
});
|
||||
|
||||
@@ -1073,7 +1095,7 @@ mod tests {
|
||||
assert(
|
||||
pattern_str,
|
||||
input,
|
||||
expected.collect::<HashMap<String, Value>>(),
|
||||
expected.collect::<HashMap<KeyString, VrlValue>>(),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1090,9 +1112,12 @@ mod tests {
|
||||
)]
|
||||
.into_iter()
|
||||
.map(|(pattern, input, expected)| {
|
||||
let map = expected
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
|
||||
let map = expected.into_iter().map(|(k, v)| {
|
||||
(
|
||||
KeyString::from(k.to_string()),
|
||||
VrlValue::Bytes(Bytes::from(v.to_string())),
|
||||
)
|
||||
});
|
||||
(pattern, input, map)
|
||||
});
|
||||
|
||||
@@ -1100,7 +1125,7 @@ mod tests {
|
||||
assert(
|
||||
pattern_str,
|
||||
input,
|
||||
expected.collect::<HashMap<String, Value>>(),
|
||||
expected.collect::<HashMap<KeyString, VrlValue>>(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,24 +12,26 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
EpochInvalidResolutionSnafu, Error, FailedToParseIntSnafu, KeyMustBeStringSnafu,
|
||||
ProcessorMissingFieldSnafu, ProcessorUnsupportedValueSnafu, Result,
|
||||
EpochInvalidResolutionSnafu, Error, FailedToParseIntSnafu, InvalidEpochForResolutionSnafu,
|
||||
KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, ProcessorUnsupportedValueSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::etl::value::time::{
|
||||
use crate::etl::value::{
|
||||
MICROSECOND_RESOLUTION, MICRO_RESOLUTION, MILLISECOND_RESOLUTION, MILLI_RESOLUTION,
|
||||
MS_RESOLUTION, NANOSECOND_RESOLUTION, NANO_RESOLUTION, NS_RESOLUTION, SECOND_RESOLUTION,
|
||||
SEC_RESOLUTION, S_RESOLUTION, US_RESOLUTION,
|
||||
};
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
|
||||
pub(crate) const PROCESSOR_EPOCH: &str = "epoch";
|
||||
const RESOLUTION_NAME: &str = "resolution";
|
||||
@@ -43,6 +45,18 @@ pub(crate) enum Resolution {
|
||||
Nano,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Resolution {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let text = match self {
|
||||
Resolution::Second => SECOND_RESOLUTION,
|
||||
Resolution::Milli => MILLISECOND_RESOLUTION,
|
||||
Resolution::Micro => MICROSECOND_RESOLUTION,
|
||||
Resolution::Nano => NANOSECOND_RESOLUTION,
|
||||
};
|
||||
write!(f, "{}", text)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for Resolution {
|
||||
type Error = Error;
|
||||
|
||||
@@ -84,43 +98,36 @@ pub struct EpochProcessor {
|
||||
}
|
||||
|
||||
impl EpochProcessor {
|
||||
fn parse(&self, val: &Value) -> Result<Timestamp> {
|
||||
let t: i64 = match val {
|
||||
Value::String(s) => s
|
||||
.parse::<i64>()
|
||||
.context(FailedToParseIntSnafu { value: s })?,
|
||||
Value::Int16(i) => *i as i64,
|
||||
Value::Int32(i) => *i as i64,
|
||||
Value::Int64(i) => *i,
|
||||
Value::Uint8(i) => *i as i64,
|
||||
Value::Uint16(i) => *i as i64,
|
||||
Value::Uint32(i) => *i as i64,
|
||||
Value::Uint64(i) => *i as i64,
|
||||
Value::Float32(f) => *f as i64,
|
||||
Value::Float64(f) => *f as i64,
|
||||
|
||||
Value::Timestamp(t) => match self.resolution {
|
||||
Resolution::Second => t.timestamp(),
|
||||
Resolution::Milli => t.timestamp_millis(),
|
||||
Resolution::Micro => t.timestamp_micros(),
|
||||
Resolution::Nano => t.timestamp_nanos(),
|
||||
},
|
||||
|
||||
_ => {
|
||||
return ProcessorUnsupportedValueSnafu {
|
||||
processor: PROCESSOR_EPOCH,
|
||||
val: val.to_string(),
|
||||
fn parse(&self, val: &VrlValue) -> Result<DateTime<Utc>> {
|
||||
let t: i64 =
|
||||
match val {
|
||||
VrlValue::Bytes(bytes) => String::from_utf8_lossy(bytes).parse::<i64>().context(
|
||||
FailedToParseIntSnafu {
|
||||
value: val.to_string_lossy(),
|
||||
},
|
||||
)?,
|
||||
VrlValue::Integer(ts) => *ts,
|
||||
VrlValue::Float(not_nan) => not_nan.into_inner() as i64,
|
||||
VrlValue::Timestamp(date_time) => return Ok(*date_time),
|
||||
_ => {
|
||||
return ProcessorUnsupportedValueSnafu {
|
||||
processor: PROCESSOR_EPOCH,
|
||||
val: val.to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
match self.resolution {
|
||||
Resolution::Second => Ok(Timestamp::Second(t)),
|
||||
Resolution::Milli => Ok(Timestamp::Millisecond(t)),
|
||||
Resolution::Micro => Ok(Timestamp::Microsecond(t)),
|
||||
Resolution::Nano => Ok(Timestamp::Nanosecond(t)),
|
||||
Resolution::Second => DateTime::from_timestamp(t, 0),
|
||||
Resolution::Milli => DateTime::from_timestamp_millis(t),
|
||||
Resolution::Micro => DateTime::from_timestamp_micros(t),
|
||||
Resolution::Nano => Some(DateTime::from_timestamp_nanos(t)),
|
||||
}
|
||||
.context(InvalidEpochForResolutionSnafu {
|
||||
value: t,
|
||||
resolution: self.resolution.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,11 +181,12 @@ impl Processor for EpochProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -190,7 +198,10 @@ impl Processor for EpochProcessor {
|
||||
Some(v) => {
|
||||
let timestamp = self.parse(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), Value::Timestamp(timestamp))?;
|
||||
val.insert(
|
||||
KeyString::from(output_index.to_string()),
|
||||
VrlValue::Timestamp(timestamp),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -200,8 +211,12 @@ impl Processor for EpochProcessor {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use chrono::DateTime;
|
||||
use ordered_float::NotNan;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use super::EpochProcessor;
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_parse_epoch() {
|
||||
@@ -211,15 +226,15 @@ mod tests {
|
||||
};
|
||||
|
||||
let values = [
|
||||
Value::String("1573840000".into()),
|
||||
Value::Int32(1573840000),
|
||||
Value::Uint64(1573840000),
|
||||
Value::Float32(1573840000.0),
|
||||
VrlValue::Bytes(Bytes::from("1573840000")),
|
||||
VrlValue::Integer(1573840000),
|
||||
VrlValue::Integer(1573840000),
|
||||
VrlValue::Float(NotNan::new(1573840000.0).unwrap()),
|
||||
];
|
||||
|
||||
for value in values {
|
||||
let parsed = processor.parse(&value).unwrap();
|
||||
assert_eq!(parsed, super::Timestamp::Second(1573840000));
|
||||
assert_eq!(parsed, DateTime::from_timestamp(1573840000, 0).unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,17 +14,19 @@
|
||||
|
||||
use regex::Regex;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, GsubPatternRequiredSnafu, GsubReplacementRequiredSnafu, KeyMustBeStringSnafu,
|
||||
ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, RegexSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, PATTERN_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_GSUB: &str = "gsub";
|
||||
|
||||
@@ -40,16 +42,16 @@ pub struct GsubProcessor {
|
||||
}
|
||||
|
||||
impl GsubProcessor {
|
||||
fn process_string(&self, val: &str) -> Result<Value> {
|
||||
fn process_string(&self, val: &str) -> Result<VrlValue> {
|
||||
let new_val = self.pattern.replace_all(val, &self.replacement).to_string();
|
||||
let val = Value::String(new_val);
|
||||
let val = VrlValue::Bytes(Bytes::from(new_val));
|
||||
|
||||
Ok(val)
|
||||
}
|
||||
|
||||
fn process(&self, val: &Value) -> Result<Value> {
|
||||
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
match val {
|
||||
Value::String(val) => self.process_string(val),
|
||||
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
|
||||
_ => ProcessorExpectStringSnafu {
|
||||
processor: PROCESSOR_GSUB,
|
||||
v: val.clone(),
|
||||
@@ -117,11 +119,12 @@ impl crate::etl::processor::Processor for GsubProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -133,7 +136,7 @@ impl crate::etl::processor::Processor for GsubProcessor {
|
||||
Some(v) => {
|
||||
let result = self.process(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_index.to_string()), result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -145,7 +148,6 @@ impl crate::etl::processor::Processor for GsubProcessor {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::etl::processor::gsub::GsubProcessor;
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_string_value() {
|
||||
@@ -156,9 +158,9 @@ mod tests {
|
||||
ignore_missing: false,
|
||||
};
|
||||
|
||||
let val = Value::String("123".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("123"));
|
||||
let result = processor.process(&val).unwrap();
|
||||
|
||||
assert_eq!(result, Value::String("xxx".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("xxx")));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,17 +13,18 @@
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, JoinSeparatorRequiredSnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, SEPARATOR_NAME,
|
||||
};
|
||||
use crate::etl::value::{Array, Value};
|
||||
|
||||
pub(crate) const PROCESSOR_JOIN: &str = "join";
|
||||
|
||||
@@ -36,14 +37,14 @@ pub struct JoinProcessor {
|
||||
}
|
||||
|
||||
impl JoinProcessor {
|
||||
fn process(&self, arr: &Array) -> Result<Value> {
|
||||
fn process(&self, arr: &[VrlValue]) -> Result<VrlValue> {
|
||||
let val = arr
|
||||
.iter()
|
||||
.map(|v| v.to_str_value())
|
||||
.collect::<Vec<String>>()
|
||||
.map(|v| v.to_string_lossy())
|
||||
.collect::<Vec<_>>()
|
||||
.join(&self.separator);
|
||||
|
||||
Ok(Value::String(val))
|
||||
Ok(VrlValue::Bytes(Bytes::from(val)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,16 +95,17 @@ impl Processor for JoinProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::Array(arr)) => {
|
||||
Some(VrlValue::Array(arr)) => {
|
||||
let result = self.process(arr)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_index.to_string()), result);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -129,8 +131,10 @@ impl Processor for JoinProcessor {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::etl::processor::join::JoinProcessor;
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_join_processor() {
|
||||
@@ -140,11 +144,10 @@ mod tests {
|
||||
};
|
||||
|
||||
let arr = vec![
|
||||
Value::String("a".to_string()),
|
||||
Value::String("b".to_string()),
|
||||
]
|
||||
.into();
|
||||
VrlValue::Bytes(Bytes::from("a")),
|
||||
VrlValue::Bytes(Bytes::from("b")),
|
||||
];
|
||||
let result = processor.process(&arr).unwrap();
|
||||
assert_eq!(result, Value::String("a-b".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("a-b")));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,16 +13,17 @@
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::{OptionExt as _, ResultExt};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, FieldMustBeTypeSnafu, JsonParseSnafu, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu,
|
||||
ProcessorUnsupportedValueSnafu, Result,
|
||||
ProcessorUnsupportedValueSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
|
||||
};
|
||||
use crate::{json_to_map, Processor, Value};
|
||||
use crate::Processor;
|
||||
|
||||
pub(crate) const PROCESSOR_JSON_PARSE: &str = "json_parse";
|
||||
|
||||
@@ -67,21 +68,21 @@ impl TryFrom<&yaml_rust::yaml::Hash> for JsonParseProcessor {
|
||||
}
|
||||
|
||||
impl JsonParseProcessor {
|
||||
fn process_field(&self, val: &Value) -> Result<Value> {
|
||||
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
let Some(json_str) = val.as_str() else {
|
||||
return FieldMustBeTypeSnafu {
|
||||
field: val.to_str_type(),
|
||||
field: val.to_string(),
|
||||
ty: "string",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
let parsed: serde_json::Value = serde_json::from_str(json_str).context(JsonParseSnafu)?;
|
||||
let parsed: VrlValue = serde_json::from_str(&json_str).context(JsonParseSnafu)?;
|
||||
match parsed {
|
||||
serde_json::Value::Object(_) => Ok(json_to_map(parsed)?),
|
||||
serde_json::Value::Array(arr) => Ok(Value::Array(arr.try_into()?)),
|
||||
VrlValue::Object(_) => Ok(parsed),
|
||||
VrlValue::Array(_) => Ok(parsed),
|
||||
_ => ProcessorUnsupportedValueSnafu {
|
||||
processor: self.kind(),
|
||||
val: val.to_str_type(),
|
||||
val: val.to_string(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
@@ -97,14 +98,15 @@ impl Processor for JsonParseProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(v) => {
|
||||
let processed = self.process_field(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), processed)?;
|
||||
val.insert(KeyString::from(output_index.to_string()), processed);
|
||||
}
|
||||
None => {
|
||||
if !self.ignore_missing {
|
||||
@@ -123,24 +125,27 @@ impl Processor for JsonParseProcessor {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::etl::processor::json_parse::JsonParseProcessor;
|
||||
|
||||
#[test]
|
||||
fn test_json_parse() {
|
||||
use super::*;
|
||||
use crate::Value;
|
||||
|
||||
let processor = JsonParseProcessor {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor
|
||||
.process_field(&Value::String(r#"{"hello": "world"}"#.to_string()))
|
||||
.process_field(&VrlValue::Bytes(Bytes::from(r#"{"hello": "world"}"#)))
|
||||
.unwrap();
|
||||
|
||||
let expected = Value::Map(crate::Map::one(
|
||||
"hello".to_string(),
|
||||
Value::String("world".to_string()),
|
||||
));
|
||||
let expected = VrlValue::Object(BTreeMap::from([(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world")),
|
||||
)]));
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
|
||||
@@ -14,17 +14,17 @@
|
||||
|
||||
use jsonpath_rust::JsonPath;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, JsonPathParseResultIndexSnafu, JsonPathParseSnafu, KeyMustBeStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
Error, JsonParseSnafu, JsonPathParseResultIndexSnafu, JsonPathParseSnafu, KeyMustBeStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, JSON_PATH_NAME, JSON_PATH_RESULT_INDEX_NAME,
|
||||
};
|
||||
use crate::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_JSON_PATH: &str = "json_path";
|
||||
|
||||
@@ -84,7 +84,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for JsonPathProcessor {
|
||||
#[derive(Debug)]
|
||||
pub struct JsonPathProcessor {
|
||||
fields: Fields,
|
||||
json_path: JsonPath<Value>,
|
||||
json_path: JsonPath<serde_json::Value>,
|
||||
ignore_missing: bool,
|
||||
result_index: Option<usize>,
|
||||
}
|
||||
@@ -101,17 +101,22 @@ impl Default for JsonPathProcessor {
|
||||
}
|
||||
|
||||
impl JsonPathProcessor {
|
||||
fn process_field(&self, val: &Value) -> Result<Value> {
|
||||
let processed = self.json_path.find(val);
|
||||
match processed {
|
||||
Value::Array(arr) => {
|
||||
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
let v = serde_json::to_value(val).context(JsonParseSnafu)?;
|
||||
let p = self.json_path.find(&v);
|
||||
match p {
|
||||
serde_json::Value::Array(arr) => {
|
||||
if let Some(index) = self.result_index {
|
||||
Ok(arr.get(index).cloned().unwrap_or(Value::Null))
|
||||
Ok(arr
|
||||
.get(index)
|
||||
.cloned()
|
||||
.map(|v| v.into())
|
||||
.unwrap_or(VrlValue::Null))
|
||||
} else {
|
||||
Ok(Value::Array(arr))
|
||||
Ok(VrlValue::Array(arr.into_iter().map(|v| v.into()).collect()))
|
||||
}
|
||||
}
|
||||
v => Ok(v),
|
||||
v => Ok(v.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -125,14 +130,15 @@ impl Processor for JsonPathProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(v) => {
|
||||
let processed = self.process_field(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), processed)?;
|
||||
val.insert(KeyString::from(output_index), processed);
|
||||
}
|
||||
None => {
|
||||
if !self.ignore_missing {
|
||||
@@ -151,12 +157,13 @@ impl Processor for JsonPathProcessor {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::Map;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
|
||||
#[test]
|
||||
fn test_json_path() {
|
||||
use super::*;
|
||||
use crate::Value;
|
||||
|
||||
let json_path = JsonPath::try_from("$.hello").unwrap();
|
||||
let processor = JsonPathProcessor {
|
||||
@@ -166,11 +173,11 @@ mod test {
|
||||
};
|
||||
|
||||
let result = processor
|
||||
.process_field(&Value::Map(Map::one(
|
||||
"hello",
|
||||
Value::String("world".to_string()),
|
||||
)))
|
||||
.process_field(&VrlValue::Object(BTreeMap::from([(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world")),
|
||||
)])))
|
||||
.unwrap();
|
||||
assert_eq!(result, Value::String("world".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("world")));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,17 +13,18 @@
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, LetterInvalidMethodSnafu, ProcessorExpectStringSnafu,
|
||||
ProcessorMissingFieldSnafu, Result,
|
||||
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, METHOD_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_LETTER: &str = "letter";
|
||||
|
||||
@@ -67,15 +68,14 @@ pub struct LetterProcessor {
|
||||
}
|
||||
|
||||
impl LetterProcessor {
|
||||
fn process_field(&self, val: &str) -> Result<Value> {
|
||||
let processed = match self.method {
|
||||
Method::Upper => val.to_uppercase(),
|
||||
Method::Lower => val.to_lowercase(),
|
||||
Method::Capital => capitalize(val),
|
||||
};
|
||||
let val = Value::String(processed);
|
||||
|
||||
Ok(val)
|
||||
fn process_field(&self, val: &Bytes) -> VrlValue {
|
||||
match self.method {
|
||||
Method::Upper => VrlValue::Bytes(Bytes::from(val.to_ascii_uppercase())),
|
||||
Method::Lower => VrlValue::Bytes(Bytes::from(val.to_ascii_lowercase())),
|
||||
Method::Capital => VrlValue::Bytes(Bytes::from(capitalize(
|
||||
String::from_utf8_lossy(val).as_ref(),
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,16 +125,17 @@ impl Processor for LetterProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::String(s)) => {
|
||||
let result = self.process_field(s)?;
|
||||
Some(VrlValue::Bytes(s)) => {
|
||||
let result = self.process_field(s);
|
||||
let output_key = field.target_or_input_field();
|
||||
val.insert(output_key.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_key), result);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -167,8 +168,10 @@ fn capitalize(s: &str) -> String {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::etl::processor::letter::{LetterProcessor, Method};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_process() {
|
||||
@@ -177,8 +180,8 @@ mod tests {
|
||||
method: Method::Upper,
|
||||
..Default::default()
|
||||
};
|
||||
let processed = processor.process_field("pipeline").unwrap();
|
||||
assert_eq!(Value::String("PIPELINE".into()), processed)
|
||||
let processed = processor.process_field(&Bytes::from("pipeline"));
|
||||
assert_eq!(VrlValue::Bytes(Bytes::from("PIPELINE")), processed)
|
||||
}
|
||||
|
||||
{
|
||||
@@ -186,8 +189,8 @@ mod tests {
|
||||
method: Method::Lower,
|
||||
..Default::default()
|
||||
};
|
||||
let processed = processor.process_field("Pipeline").unwrap();
|
||||
assert_eq!(Value::String("pipeline".into()), processed)
|
||||
let processed = processor.process_field(&Bytes::from("Pipeline"));
|
||||
assert_eq!(VrlValue::Bytes(Bytes::from("pipeline")), processed)
|
||||
}
|
||||
|
||||
{
|
||||
@@ -195,8 +198,8 @@ mod tests {
|
||||
method: Method::Capital,
|
||||
..Default::default()
|
||||
};
|
||||
let processed = processor.process_field("pipeline").unwrap();
|
||||
assert_eq!(Value::String("Pipeline".into()), processed)
|
||||
let processed = processor.process_field(&Bytes::from("pipeline"));
|
||||
assert_eq!(VrlValue::Bytes(Bytes::from("Pipeline")), processed)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,18 +23,19 @@ use std::collections::BTreeMap;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu,
|
||||
RegexNamedGroupNotFoundSnafu, RegexNoValidFieldSnafu, RegexNoValidPatternSnafu, RegexSnafu,
|
||||
Result,
|
||||
Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor, FIELDS_NAME,
|
||||
FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
lazy_static! {
|
||||
static ref GROUPS_NAME_REGEX: Regex = Regex::new(r"\(\?P?<([[:word:]]+)>.+?\)").unwrap();
|
||||
@@ -168,14 +169,17 @@ impl RegexProcessor {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn process(&self, prefix: &str, val: &str) -> Result<BTreeMap<String, Value>> {
|
||||
fn process(&self, prefix: &str, val: &str) -> Result<BTreeMap<KeyString, VrlValue>> {
|
||||
let mut result = BTreeMap::new();
|
||||
for gr in self.patterns.iter() {
|
||||
if let Some(captures) = gr.regex.captures(val) {
|
||||
for group in gr.groups.iter() {
|
||||
if let Some(capture) = captures.name(group) {
|
||||
let value = capture.as_str().to_string();
|
||||
result.insert(generate_key(prefix, group), Value::String(value));
|
||||
result.insert(
|
||||
KeyString::from(generate_key(prefix, group)),
|
||||
VrlValue::Bytes(Bytes::from(value)),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -193,16 +197,17 @@ impl Processor for RegexProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let prefix = field.target_or_input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::String(s)) => {
|
||||
let result = self.process(prefix, s)?;
|
||||
val.extend(result.into())?;
|
||||
Some(VrlValue::Bytes(s)) => {
|
||||
let result = self.process(prefix, String::from_utf8_lossy(s).as_ref())?;
|
||||
val.extend(result);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -226,12 +231,11 @@ impl Processor for RegexProcessor {
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ahash::{HashMap, HashMapExt};
|
||||
use itertools::Itertools;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use super::*;
|
||||
use crate::etl::processor::regex::RegexProcessor;
|
||||
use crate::etl::value::{Map, Value};
|
||||
|
||||
#[test]
|
||||
fn test_simple_parse() {
|
||||
@@ -250,15 +254,11 @@ ignore_missing: false"#;
|
||||
|
||||
let result = processor.process("a", "123").unwrap();
|
||||
|
||||
let map = Map { values: result };
|
||||
let v = vec![(KeyString::from("a_ar"), VrlValue::Bytes(Bytes::from("1")))]
|
||||
.into_iter()
|
||||
.collect::<BTreeMap<KeyString, VrlValue>>();
|
||||
|
||||
let v = Map {
|
||||
values: vec![("a_ar".to_string(), Value::String("1".to_string()))]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
};
|
||||
|
||||
assert_eq!(v, map);
|
||||
assert_eq!(v, result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -270,15 +270,30 @@ ignore_missing: false"#;
|
||||
let cw = "[c=w,n=US_CA_SANJOSE,o=55155]";
|
||||
let breadcrumbs_str = [cc, cg, co, cp, cw].iter().join(",");
|
||||
|
||||
let temporary_map: BTreeMap<String, Value> = [
|
||||
("breadcrumbs_parent", Value::String(cc.to_string())),
|
||||
("breadcrumbs_edge", Value::String(cg.to_string())),
|
||||
("breadcrumbs_origin", Value::String(co.to_string())),
|
||||
("breadcrumbs_peer", Value::String(cp.to_string())),
|
||||
("breadcrumbs_wrapper", Value::String(cw.to_string())),
|
||||
let temporary_map: BTreeMap<KeyString, VrlValue> = [
|
||||
(
|
||||
"breadcrumbs_parent",
|
||||
VrlValue::Bytes(Bytes::from(cc.to_string())),
|
||||
),
|
||||
(
|
||||
"breadcrumbs_edge",
|
||||
VrlValue::Bytes(Bytes::from(cg.to_string())),
|
||||
),
|
||||
(
|
||||
"breadcrumbs_origin",
|
||||
VrlValue::Bytes(Bytes::from(co.to_string())),
|
||||
),
|
||||
(
|
||||
"breadcrumbs_peer",
|
||||
VrlValue::Bytes(Bytes::from(cp.to_string())),
|
||||
),
|
||||
(
|
||||
"breadcrumbs_wrapper",
|
||||
VrlValue::Bytes(Bytes::from(cw.to_string())),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.map(|(k, v)| (KeyString::from(k), v))
|
||||
.collect();
|
||||
|
||||
{
|
||||
@@ -331,35 +346,66 @@ ignore_missing: false"#;
|
||||
let processor_yaml_hash = processor_yaml.as_hash().unwrap();
|
||||
let processor = RegexProcessor::try_from(processor_yaml_hash).unwrap();
|
||||
|
||||
let mut result = HashMap::new();
|
||||
let mut result = BTreeMap::new();
|
||||
for field in processor.fields.iter() {
|
||||
let s = temporary_map
|
||||
.get(field.input_field())
|
||||
.unwrap()
|
||||
.to_str_value();
|
||||
let s = temporary_map.get(field.input_field()).unwrap();
|
||||
let s = s.to_string_lossy();
|
||||
let prefix = field.target_or_input_field();
|
||||
|
||||
let r = processor.process(prefix, &s).unwrap();
|
||||
let r = processor.process(prefix, s.as_ref()).unwrap();
|
||||
|
||||
result.extend(r);
|
||||
}
|
||||
|
||||
let new_values = vec![
|
||||
("edge_ip", Value::String("12.34.567.89".to_string())),
|
||||
("edge_request_id", Value::String("12345678".to_string())),
|
||||
("edge_geo", Value::String("US_CA_SANJOSE".to_string())),
|
||||
("edge_asn", Value::String("20940".to_string())),
|
||||
("origin_ip", Value::String("987.654.321.09".to_string())),
|
||||
("peer_asn", Value::String("55155".to_string())),
|
||||
("peer_geo", Value::String("US_CA_SANJOSE".to_string())),
|
||||
("parent_asn", Value::String("55155".to_string())),
|
||||
("parent_geo", Value::String("US_CA_SANJOSE".to_string())),
|
||||
("wrapper_asn", Value::String("55155".to_string())),
|
||||
("wrapper_geo", Value::String("US_CA_SANJOSE".to_string())),
|
||||
(
|
||||
"edge_ip",
|
||||
VrlValue::Bytes(Bytes::from("12.34.567.89".to_string())),
|
||||
),
|
||||
(
|
||||
"edge_request_id",
|
||||
VrlValue::Bytes(Bytes::from("12345678".to_string())),
|
||||
),
|
||||
(
|
||||
"edge_geo",
|
||||
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
|
||||
),
|
||||
(
|
||||
"edge_asn",
|
||||
VrlValue::Bytes(Bytes::from("20940".to_string())),
|
||||
),
|
||||
(
|
||||
"origin_ip",
|
||||
VrlValue::Bytes(Bytes::from("987.654.321.09".to_string())),
|
||||
),
|
||||
(
|
||||
"peer_asn",
|
||||
VrlValue::Bytes(Bytes::from("55155".to_string())),
|
||||
),
|
||||
(
|
||||
"peer_geo",
|
||||
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
|
||||
),
|
||||
(
|
||||
"parent_asn",
|
||||
VrlValue::Bytes(Bytes::from("55155".to_string())),
|
||||
),
|
||||
(
|
||||
"parent_geo",
|
||||
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
|
||||
),
|
||||
(
|
||||
"wrapper_asn",
|
||||
VrlValue::Bytes(Bytes::from("55155".to_string())),
|
||||
),
|
||||
(
|
||||
"wrapper_geo",
|
||||
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v))
|
||||
.collect();
|
||||
.map(|(k, v)| (KeyString::from(k), v))
|
||||
.collect::<BTreeMap<KeyString, VrlValue>>();
|
||||
|
||||
assert_eq!(result, new_values);
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use ahash::{HashSet, HashSetExt};
|
||||
use snafu::OptionExt;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorUnsupportedValueSnafu, Result, ValueMustBeMapSnafu,
|
||||
@@ -22,7 +23,7 @@ use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME, TYPE_NAME,
|
||||
};
|
||||
use crate::{Processor, Value};
|
||||
use crate::Processor;
|
||||
|
||||
pub(crate) const PROCESSOR_SELECT: &str = "select";
|
||||
const INCLUDE_KEY: &str = "include";
|
||||
@@ -98,8 +99,8 @@ impl Processor for SelectProcessor {
|
||||
true
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
let v_map = val.as_map_mut().context(ValueMustBeMapSnafu)?;
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
let v_map = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
match self.select_type {
|
||||
SelectType::Include => {
|
||||
@@ -109,7 +110,7 @@ impl Processor for SelectProcessor {
|
||||
let field_name = field.input_field();
|
||||
if let Some(target_name) = field.target_field() {
|
||||
if let Some(v) = v_map.remove(field_name) {
|
||||
v_map.insert(target_name.to_string(), v);
|
||||
v_map.insert(KeyString::from(target_name), v);
|
||||
}
|
||||
include_key_set.insert(target_name);
|
||||
} else {
|
||||
@@ -133,9 +134,12 @@ impl Processor for SelectProcessor {
|
||||
mod test {
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::processor::select::{SelectProcessor, SelectType};
|
||||
use crate::{Map, Processor, Value};
|
||||
use crate::Processor;
|
||||
|
||||
#[test]
|
||||
fn test_select() {
|
||||
@@ -145,15 +149,24 @@ mod test {
|
||||
};
|
||||
|
||||
let mut p = BTreeMap::new();
|
||||
p.insert("hello".to_string(), Value::String("world".to_string()));
|
||||
p.insert("hello2".to_string(), Value::String("world2".to_string()));
|
||||
p.insert(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world".to_string())),
|
||||
);
|
||||
p.insert(
|
||||
KeyString::from("hello2"),
|
||||
VrlValue::Bytes(Bytes::from("world2".to_string())),
|
||||
);
|
||||
|
||||
let result = processor.exec_mut(Value::Map(Map { values: p }));
|
||||
let result = processor.exec_mut(VrlValue::Object(p));
|
||||
assert!(result.is_ok());
|
||||
let mut result = result.unwrap();
|
||||
let p = result.as_map_mut().unwrap();
|
||||
let p = result.as_object_mut().unwrap();
|
||||
assert_eq!(p.len(), 1);
|
||||
assert_eq!(p.get("hello"), Some(&Value::String("world".to_string())));
|
||||
assert_eq!(
|
||||
p.get(&KeyString::from("hello")),
|
||||
Some(&VrlValue::Bytes(Bytes::from("world".to_string())))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -164,15 +177,24 @@ mod test {
|
||||
};
|
||||
|
||||
let mut p = BTreeMap::new();
|
||||
p.insert("hello".to_string(), Value::String("world".to_string()));
|
||||
p.insert("hello2".to_string(), Value::String("world2".to_string()));
|
||||
p.insert(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world".to_string())),
|
||||
);
|
||||
p.insert(
|
||||
KeyString::from("hello2"),
|
||||
VrlValue::Bytes(Bytes::from("world2".to_string())),
|
||||
);
|
||||
|
||||
let result = processor.exec_mut(Value::Map(Map { values: p }));
|
||||
let result = processor.exec_mut(VrlValue::Object(p));
|
||||
assert!(result.is_ok());
|
||||
let mut result = result.unwrap();
|
||||
let p = result.as_map_mut().unwrap();
|
||||
let p = result.as_object_mut().unwrap();
|
||||
assert_eq!(p.len(), 1);
|
||||
assert_eq!(p.get("hello3"), Some(&Value::String("world".to_string())));
|
||||
assert_eq!(
|
||||
p.get(&KeyString::from("hello3")),
|
||||
Some(&VrlValue::Bytes(Bytes::from("world".to_string())))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -183,15 +205,24 @@ mod test {
|
||||
};
|
||||
|
||||
let mut p = BTreeMap::new();
|
||||
p.insert("hello".to_string(), Value::String("world".to_string()));
|
||||
p.insert("hello2".to_string(), Value::String("world2".to_string()));
|
||||
p.insert(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world".to_string())),
|
||||
);
|
||||
p.insert(
|
||||
KeyString::from("hello2"),
|
||||
VrlValue::Bytes(Bytes::from("world2".to_string())),
|
||||
);
|
||||
|
||||
let result = processor.exec_mut(Value::Map(Map { values: p }));
|
||||
let result = processor.exec_mut(VrlValue::Object(p));
|
||||
assert!(result.is_ok());
|
||||
let mut result = result.unwrap();
|
||||
let p = result.as_map_mut().unwrap();
|
||||
let p = result.as_object_mut().unwrap();
|
||||
assert_eq!(p.len(), 1);
|
||||
assert_eq!(p.get("hello"), None);
|
||||
assert_eq!(p.get("hello2"), Some(&Value::String("world2".to_string())));
|
||||
assert_eq!(p.get(&KeyString::from("hello")), None);
|
||||
assert_eq!(
|
||||
p.get(&KeyString::from("hello2")),
|
||||
Some(&VrlValue::Bytes(Bytes::from("world2".to_string())))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,14 +13,17 @@
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::OptionExt as _;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{Error, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, Result};
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, KEY_NAME,
|
||||
};
|
||||
use crate::{Processor, Value};
|
||||
use crate::Processor;
|
||||
|
||||
pub(crate) const PROCESSOR_SIMPLE_EXTRACT: &str = "simple_extract";
|
||||
|
||||
@@ -74,14 +77,14 @@ impl TryFrom<&yaml_rust::yaml::Hash> for SimpleExtractProcessor {
|
||||
}
|
||||
|
||||
impl SimpleExtractProcessor {
|
||||
fn process_field(&self, val: &Value) -> Result<Value> {
|
||||
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
|
||||
let mut current = val;
|
||||
for key in self.key.iter() {
|
||||
let Value::Map(map) = current else {
|
||||
return Ok(Value::Null);
|
||||
let VrlValue::Object(map) = current else {
|
||||
return Ok(VrlValue::Null);
|
||||
};
|
||||
let Some(v) = map.get(key) else {
|
||||
return Ok(Value::Null);
|
||||
let Some(v) = map.get(key.as_str()) else {
|
||||
return Ok(VrlValue::Null);
|
||||
};
|
||||
current = v;
|
||||
}
|
||||
@@ -98,14 +101,15 @@ impl Processor for SimpleExtractProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(v) => {
|
||||
let processed = self.process_field(v)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), processed)?;
|
||||
val.insert(KeyString::from(output_index), processed);
|
||||
}
|
||||
None => {
|
||||
if !self.ignore_missing {
|
||||
@@ -124,11 +128,13 @@ impl Processor for SimpleExtractProcessor {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
|
||||
#[test]
|
||||
fn test_simple_extract() {
|
||||
use super::*;
|
||||
use crate::{Map, Value};
|
||||
|
||||
let processor = SimpleExtractProcessor {
|
||||
key: vec!["hello".to_string()],
|
||||
@@ -136,12 +142,12 @@ mod test {
|
||||
};
|
||||
|
||||
let result = processor
|
||||
.process_field(&Value::Map(Map::one(
|
||||
"hello",
|
||||
Value::String("world".to_string()),
|
||||
)))
|
||||
.process_field(&VrlValue::Object(BTreeMap::from([(
|
||||
KeyString::from("hello"),
|
||||
VrlValue::Bytes(Bytes::from("world".to_string())),
|
||||
)])))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result, Value::String("world".to_string()));
|
||||
assert_eq!(result, VrlValue::Bytes(Bytes::from("world".to_string())));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,19 +12,20 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use urlencoding::{decode, encode};
|
||||
use snafu::OptionExt;
|
||||
use urlencoding::{decode_binary, encode_binary};
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
UrlEncodingDecodeSnafu, UrlEncodingInvalidMethodSnafu,
|
||||
UrlEncodingInvalidMethodSnafu, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
|
||||
IGNORE_MISSING_NAME, METHOD_NAME,
|
||||
};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_URL_ENCODING: &str = "urlencoding";
|
||||
|
||||
@@ -65,12 +66,12 @@ pub struct UrlEncodingProcessor {
|
||||
}
|
||||
|
||||
impl UrlEncodingProcessor {
|
||||
fn process_field(&self, val: &str) -> Result<Value> {
|
||||
fn process_field(&self, val: &Bytes) -> Result<VrlValue> {
|
||||
let processed = match self.method {
|
||||
Method::Encode => encode(val).to_string(),
|
||||
Method::Decode => decode(val).context(UrlEncodingDecodeSnafu)?.into_owned(),
|
||||
Method::Encode => Bytes::from_iter(encode_binary(val).bytes()),
|
||||
Method::Decode => Bytes::from(decode_binary(val).to_vec()),
|
||||
};
|
||||
Ok(Value::String(processed))
|
||||
Ok(VrlValue::Bytes(processed))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,16 +126,17 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
match val.get(index) {
|
||||
Some(Value::String(s)) => {
|
||||
Some(VrlValue::Bytes(s)) => {
|
||||
let result = self.process_field(s)?;
|
||||
let output_index = field.target_or_input_field();
|
||||
val.insert(output_index.to_string(), result)?;
|
||||
val.insert(KeyString::from(output_index), result);
|
||||
}
|
||||
Some(Value::Null) | None => {
|
||||
Some(VrlValue::Null) | None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
@@ -159,9 +161,11 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::urlencoding::UrlEncodingProcessor;
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_decode_url() {
|
||||
@@ -170,8 +174,8 @@ mod tests {
|
||||
|
||||
{
|
||||
let processor = UrlEncodingProcessor::default();
|
||||
let result = processor.process_field(encoded).unwrap();
|
||||
assert_eq!(Value::String(decoded.into()), result)
|
||||
let result = processor.process_field(&Bytes::from(encoded)).unwrap();
|
||||
assert_eq!(VrlValue::Bytes(Bytes::from(decoded)), result)
|
||||
}
|
||||
{
|
||||
let processor = UrlEncodingProcessor {
|
||||
@@ -179,8 +183,8 @@ mod tests {
|
||||
method: super::Method::Encode,
|
||||
ignore_missing: false,
|
||||
};
|
||||
let result = processor.process_field(decoded).unwrap();
|
||||
assert_eq!(Value::String(encoded.into()), result)
|
||||
let result = processor.process_field(&Bytes::from(decoded)).unwrap();
|
||||
assert_eq!(VrlValue::Bytes(Bytes::from(encoded)), result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,19 +15,18 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use chrono_tz::Tz;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::OptionExt;
|
||||
use vrl::compiler::runtime::Runtime;
|
||||
use vrl::compiler::{compile, Program, TargetValue};
|
||||
use vrl::diagnostic::Formatter;
|
||||
use vrl::prelude::{Bytes, NotNan, TimeZone};
|
||||
use vrl::value::{KeyString, Kind, Secrets, Value as VrlValue};
|
||||
use vrl::prelude::TimeZone;
|
||||
use vrl::value::{Kind, Secrets, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
BytesToUtf8Snafu, CompileVrlSnafu, Error, ExecuteVrlSnafu, FloatNaNSnafu,
|
||||
InvalidTimestampSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu, VrlReturnValueSnafu,
|
||||
CompileVrlSnafu, Error, ExecuteVrlSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu,
|
||||
VrlReturnValueSnafu,
|
||||
};
|
||||
use crate::etl::processor::yaml_string;
|
||||
use crate::Value as PipelineValue;
|
||||
|
||||
pub(crate) const PROCESSOR_VRL: &str = "vrl";
|
||||
const SOURCE: &str = "source";
|
||||
@@ -62,11 +61,9 @@ impl VrlProcessor {
|
||||
Ok(Self { source, program })
|
||||
}
|
||||
|
||||
pub fn resolve(&self, m: PipelineValue) -> Result<PipelineValue> {
|
||||
let pipeline_vrl = pipeline_value_to_vrl_value(m)?;
|
||||
|
||||
pub fn resolve(&self, value: VrlValue) -> Result<VrlValue> {
|
||||
let mut target = TargetValue {
|
||||
value: pipeline_vrl,
|
||||
value,
|
||||
metadata: VrlValue::Object(BTreeMap::new()),
|
||||
secrets: Secrets::default(),
|
||||
};
|
||||
@@ -82,7 +79,7 @@ impl VrlProcessor {
|
||||
.build()
|
||||
})?;
|
||||
|
||||
vrl_value_to_pipeline_value(re)
|
||||
Ok(re)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,91 +110,17 @@ impl crate::etl::processor::Processor for VrlProcessor {
|
||||
true
|
||||
}
|
||||
|
||||
fn exec_mut(&self, val: PipelineValue) -> Result<PipelineValue> {
|
||||
fn exec_mut(&self, val: VrlValue) -> Result<VrlValue> {
|
||||
let val = self.resolve(val)?;
|
||||
|
||||
if let PipelineValue::Map(m) = val {
|
||||
Ok(PipelineValue::Map(m.values.into()))
|
||||
if let VrlValue::Object(_) = val {
|
||||
Ok(val)
|
||||
} else {
|
||||
VrlRegexValueSnafu.fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn pipeline_value_to_vrl_value(v: PipelineValue) -> Result<VrlValue> {
|
||||
match v {
|
||||
PipelineValue::Null => Ok(VrlValue::Null),
|
||||
PipelineValue::Int8(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Int16(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Int32(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Int64(x) => Ok(VrlValue::Integer(x)),
|
||||
PipelineValue::Uint8(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Uint16(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Uint32(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Uint64(x) => Ok(VrlValue::Integer(x as i64)),
|
||||
PipelineValue::Float32(x) => NotNan::new(x as f64)
|
||||
.map_err(|_| FloatNaNSnafu { input_float: x }.build())
|
||||
.map(VrlValue::Float),
|
||||
PipelineValue::Float64(x) => NotNan::new(x)
|
||||
.map_err(|_| FloatNaNSnafu { input_float: x }.build())
|
||||
.map(VrlValue::Float),
|
||||
PipelineValue::Boolean(x) => Ok(VrlValue::Boolean(x)),
|
||||
PipelineValue::String(x) => Ok(VrlValue::Bytes(Bytes::copy_from_slice(x.as_bytes()))),
|
||||
PipelineValue::Timestamp(x) => x
|
||||
.to_datetime()
|
||||
.context(InvalidTimestampSnafu {
|
||||
input: x.to_string(),
|
||||
})
|
||||
.map(VrlValue::Timestamp),
|
||||
PipelineValue::Array(array) => Ok(VrlValue::Array(
|
||||
array
|
||||
.into_iter()
|
||||
.map(pipeline_value_to_vrl_value)
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
)),
|
||||
PipelineValue::Map(m) => {
|
||||
let values = m
|
||||
.values
|
||||
.into_iter()
|
||||
.map(|(k, v)| pipeline_value_to_vrl_value(v).map(|v| (KeyString::from(k), v)))
|
||||
.collect::<Result<BTreeMap<_, _>>>()?;
|
||||
Ok(VrlValue::Object(values))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn vrl_value_to_pipeline_value(v: VrlValue) -> Result<PipelineValue> {
|
||||
match v {
|
||||
VrlValue::Bytes(bytes) => String::from_utf8(bytes.to_vec())
|
||||
.context(BytesToUtf8Snafu)
|
||||
.map(PipelineValue::String),
|
||||
VrlValue::Regex(_) => VrlRegexValueSnafu.fail(),
|
||||
VrlValue::Integer(x) => Ok(PipelineValue::Int64(x)),
|
||||
VrlValue::Float(not_nan) => Ok(PipelineValue::Float64(not_nan.into_inner())),
|
||||
VrlValue::Boolean(b) => Ok(PipelineValue::Boolean(b)),
|
||||
VrlValue::Timestamp(date_time) => crate::etl::value::Timestamp::from_datetime(date_time)
|
||||
.context(InvalidTimestampSnafu {
|
||||
input: date_time.to_string(),
|
||||
})
|
||||
.map(PipelineValue::Timestamp),
|
||||
VrlValue::Object(bm) => {
|
||||
let b = bm
|
||||
.into_iter()
|
||||
.map(|(k, v)| vrl_value_to_pipeline_value(v).map(|v| (k.to_string(), v)))
|
||||
.collect::<Result<BTreeMap<String, PipelineValue>>>()?;
|
||||
Ok(PipelineValue::Map(b.into()))
|
||||
}
|
||||
VrlValue::Array(values) => {
|
||||
let a = values
|
||||
.into_iter()
|
||||
.map(vrl_value_to_pipeline_value)
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
Ok(PipelineValue::Array(a.into()))
|
||||
}
|
||||
VrlValue::Null => Ok(PipelineValue::Null),
|
||||
}
|
||||
}
|
||||
|
||||
fn check_regex_output(output_kind: &Kind) -> Result<()> {
|
||||
if output_kind.is_regex() {
|
||||
return VrlRegexValueSnafu.fail();
|
||||
@@ -223,9 +146,10 @@ fn check_regex_output(output_kind: &Kind) -> Result<()> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
use vrl::value::KeyString;
|
||||
|
||||
use super::*;
|
||||
use crate::etl::value::Timestamp;
|
||||
use crate::Map;
|
||||
|
||||
#[test]
|
||||
fn test_vrl() {
|
||||
@@ -243,31 +167,27 @@ del(.user_info)
|
||||
|
||||
let mut n = BTreeMap::new();
|
||||
n.insert(
|
||||
"name".to_string(),
|
||||
PipelineValue::String("certain_name".to_string()),
|
||||
KeyString::from("name"),
|
||||
VrlValue::Bytes(Bytes::from("certain_name")),
|
||||
);
|
||||
|
||||
let mut m = BTreeMap::new();
|
||||
m.insert(
|
||||
"user_info".to_string(),
|
||||
PipelineValue::Map(Map { values: n }),
|
||||
);
|
||||
m.insert(KeyString::from("user_info"), VrlValue::Object(n));
|
||||
|
||||
let re = v.resolve(PipelineValue::Map(Map { values: m }));
|
||||
let re = v.resolve(VrlValue::Object(m));
|
||||
assert!(re.is_ok());
|
||||
let re = re.unwrap();
|
||||
|
||||
assert!(matches!(re, PipelineValue::Map(_)));
|
||||
assert!(matches!(re, VrlValue::Object(_)));
|
||||
let re = re.as_object().unwrap();
|
||||
assert!(re.get("name").is_some());
|
||||
let name = re.get("name").unwrap();
|
||||
assert!(matches!(name.get("a").unwrap(), PipelineValue::String(x) if x == "certain_name"));
|
||||
assert!(matches!(name.get("b").unwrap(), PipelineValue::String(x) if x == "certain_name"));
|
||||
let name = name.as_object().unwrap();
|
||||
assert!(matches!(name.get("a").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
|
||||
assert!(matches!(name.get("b").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
|
||||
assert!(re.get("timestamp").is_some());
|
||||
let timestamp = re.get("timestamp").unwrap();
|
||||
assert!(matches!(
|
||||
timestamp,
|
||||
PipelineValue::Timestamp(Timestamp::Nanosecond(_))
|
||||
));
|
||||
assert!(matches!(timestamp, VrlValue::Timestamp(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -15,16 +15,20 @@
|
||||
pub mod index;
|
||||
pub mod transformer;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::ColumnDataType;
|
||||
use chrono::Utc;
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, Result, TransformElementMustBeMapSnafu,
|
||||
TransformFieldMustBeSetSnafu, TransformOnFailureInvalidValueSnafu, TransformTypeMustBeSetSnafu,
|
||||
UnsupportedTypeInPipelineSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{yaml_bool, yaml_new_field, yaml_new_fields, yaml_string};
|
||||
use crate::etl::transform::index::Index;
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
use crate::etl::value::{parse_str_type, parse_str_value};
|
||||
|
||||
const TRANSFORM_FIELD: &str = "field";
|
||||
const TRANSFORM_FIELDS: &str = "fields";
|
||||
@@ -124,39 +128,61 @@ impl TryFrom<&Vec<yaml_rust::Yaml>> for Transforms {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Transform {
|
||||
pub fields: Fields,
|
||||
pub type_: Value,
|
||||
pub default: Option<Value>,
|
||||
pub type_: ColumnDataType,
|
||||
pub default: Option<ValueData>,
|
||||
pub index: Option<Index>,
|
||||
pub tag: bool,
|
||||
pub on_failure: Option<OnFailure>,
|
||||
}
|
||||
|
||||
impl Default for Transform {
|
||||
fn default() -> Self {
|
||||
Transform {
|
||||
fields: Fields::default(),
|
||||
type_: Value::Null,
|
||||
default: None,
|
||||
index: None,
|
||||
tag: false,
|
||||
on_failure: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
// valid types
|
||||
// ColumnDataType::Int8
|
||||
// ColumnDataType::Int16
|
||||
// ColumnDataType::Int32
|
||||
// ColumnDataType::Int64
|
||||
// ColumnDataType::Uint8
|
||||
// ColumnDataType::Uint16
|
||||
// ColumnDataType::Uint32
|
||||
// ColumnDataType::Uint64
|
||||
// ColumnDataType::Float32
|
||||
// ColumnDataType::Float64
|
||||
// ColumnDataType::Boolean
|
||||
// ColumnDataType::String
|
||||
// ColumnDataType::TimestampNanosecond
|
||||
// ColumnDataType::TimestampMicrosecond
|
||||
// ColumnDataType::TimestampMillisecond
|
||||
// ColumnDataType::TimestampSecond
|
||||
// ColumnDataType::Binary
|
||||
|
||||
impl Transform {
|
||||
pub(crate) fn get_default(&self) -> Option<&Value> {
|
||||
pub(crate) fn get_default(&self) -> Option<&ValueData> {
|
||||
self.default.as_ref()
|
||||
}
|
||||
|
||||
pub(crate) fn get_type_matched_default_val(&self) -> &Value {
|
||||
&self.type_
|
||||
pub(crate) fn get_type_matched_default_val(&self) -> Result<ValueData> {
|
||||
get_default_for_type(&self.type_)
|
||||
}
|
||||
|
||||
pub(crate) fn get_default_value_when_data_is_none(&self) -> Option<Value> {
|
||||
if matches!(self.type_, Value::Timestamp(_)) && self.index.is_some_and(|i| i == Index::Time)
|
||||
{
|
||||
return Some(Value::Timestamp(Timestamp::default()));
|
||||
pub(crate) fn get_default_value_when_data_is_none(&self) -> Option<ValueData> {
|
||||
if is_timestamp_type(&self.type_) && self.index.is_some_and(|i| i == Index::Time) {
|
||||
let now = Utc::now();
|
||||
match self.type_ {
|
||||
ColumnDataType::TimestampSecond => {
|
||||
return Some(ValueData::TimestampSecondValue(now.timestamp()));
|
||||
}
|
||||
ColumnDataType::TimestampMillisecond => {
|
||||
return Some(ValueData::TimestampMillisecondValue(now.timestamp_millis()));
|
||||
}
|
||||
ColumnDataType::TimestampMicrosecond => {
|
||||
return Some(ValueData::TimestampMicrosecondValue(now.timestamp_micros()));
|
||||
}
|
||||
ColumnDataType::TimestampNanosecond => {
|
||||
return Some(ValueData::TimestampNanosecondValue(
|
||||
now.timestamp_nanos_opt()?,
|
||||
));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
@@ -166,17 +192,57 @@ impl Transform {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_timestamp_type(ty: &ColumnDataType) -> bool {
|
||||
matches!(
|
||||
ty,
|
||||
ColumnDataType::TimestampSecond
|
||||
| ColumnDataType::TimestampMillisecond
|
||||
| ColumnDataType::TimestampMicrosecond
|
||||
| ColumnDataType::TimestampNanosecond
|
||||
)
|
||||
}
|
||||
|
||||
fn get_default_for_type(ty: &ColumnDataType) -> Result<ValueData> {
|
||||
let v = match ty {
|
||||
ColumnDataType::Boolean => ValueData::BoolValue(false),
|
||||
ColumnDataType::Int8 => ValueData::I8Value(0),
|
||||
ColumnDataType::Int16 => ValueData::I16Value(0),
|
||||
ColumnDataType::Int32 => ValueData::I32Value(0),
|
||||
ColumnDataType::Int64 => ValueData::I64Value(0),
|
||||
ColumnDataType::Uint8 => ValueData::U8Value(0),
|
||||
ColumnDataType::Uint16 => ValueData::U16Value(0),
|
||||
ColumnDataType::Uint32 => ValueData::U32Value(0),
|
||||
ColumnDataType::Uint64 => ValueData::U64Value(0),
|
||||
ColumnDataType::Float32 => ValueData::F32Value(0.0),
|
||||
ColumnDataType::Float64 => ValueData::F64Value(0.0),
|
||||
ColumnDataType::Binary => ValueData::BinaryValue(jsonb::Value::Null.to_vec()),
|
||||
ColumnDataType::String => ValueData::StringValue(String::new()),
|
||||
|
||||
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(0),
|
||||
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(0),
|
||||
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(0),
|
||||
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(0),
|
||||
|
||||
_ => UnsupportedTypeInPipelineSnafu {
|
||||
ty: ty.as_str_name(),
|
||||
}
|
||||
.fail()?,
|
||||
};
|
||||
Ok(v)
|
||||
}
|
||||
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self> {
|
||||
let mut fields = Fields::default();
|
||||
let mut type_ = Value::Null;
|
||||
let mut default = None;
|
||||
let mut index = None;
|
||||
let mut tag = false;
|
||||
let mut on_failure = None;
|
||||
|
||||
let mut type_ = None;
|
||||
|
||||
for (k, v) in hash {
|
||||
let key = k
|
||||
.as_str()
|
||||
@@ -192,7 +258,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
|
||||
|
||||
TRANSFORM_TYPE => {
|
||||
let t = yaml_string(v, TRANSFORM_TYPE)?;
|
||||
type_ = Value::parse_str_type(&t)?;
|
||||
type_ = Some(parse_str_type(&t)?);
|
||||
}
|
||||
|
||||
TRANSFORM_INDEX => {
|
||||
@@ -205,7 +271,17 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
|
||||
}
|
||||
|
||||
TRANSFORM_DEFAULT => {
|
||||
default = Some(Value::try_from(v)?);
|
||||
default = match v {
|
||||
yaml_rust::Yaml::Real(r) => Some(r.clone()),
|
||||
yaml_rust::Yaml::Integer(i) => Some(i.to_string()),
|
||||
yaml_rust::Yaml::String(s) => Some(s.clone()),
|
||||
yaml_rust::Yaml::Boolean(b) => Some(b.to_string()),
|
||||
yaml_rust::Yaml::Array(_)
|
||||
| yaml_rust::Yaml::Hash(_)
|
||||
| yaml_rust::Yaml::Alias(_)
|
||||
| yaml_rust::Yaml::Null
|
||||
| yaml_rust::Yaml::BadValue => None,
|
||||
};
|
||||
}
|
||||
|
||||
TRANSFORM_ON_FAILURE => {
|
||||
@@ -219,23 +295,14 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
|
||||
|
||||
// ensure fields and type
|
||||
ensure!(!fields.is_empty(), TransformFieldMustBeSetSnafu);
|
||||
ensure!(
|
||||
type_ != Value::Null,
|
||||
TransformTypeMustBeSetSnafu {
|
||||
fields: format!("{:?}", fields)
|
||||
}
|
||||
);
|
||||
let type_ = type_.context(TransformTypeMustBeSetSnafu {
|
||||
fields: format!("{:?}", fields),
|
||||
})?;
|
||||
|
||||
let final_default = if let Some(default_value) = default {
|
||||
match default_value {
|
||||
// if default is not set, then it will be regarded as default null
|
||||
Value::Null => None,
|
||||
_ => {
|
||||
let target = type_.parse_str_value(default_value.to_str_value().as_str())?;
|
||||
on_failure = Some(OnFailure::Default);
|
||||
Some(target)
|
||||
}
|
||||
}
|
||||
let target = parse_str_value(&type_, &default_value)?;
|
||||
on_failure = Some(OnFailure::Default);
|
||||
Some(target)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
pub mod coerce;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -24,26 +25,27 @@ use api::v1::value::ValueData;
|
||||
use api::v1::{ColumnDataType, ColumnDataTypeExtension, JsonTypeExtension, SemanticType};
|
||||
use coerce::{coerce_columns, coerce_value};
|
||||
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use common_telemetry::warn;
|
||||
use greptime_proto::v1::{ColumnSchema, Row, Rows, Value as GreptimeValue};
|
||||
use itertools::Itertools;
|
||||
use jsonb::Number;
|
||||
use once_cell::sync::OnceCell;
|
||||
use serde_json::Number;
|
||||
use session::context::Channel;
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::VrlValueConvert;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
IdentifyPipelineColumnTypeMismatchSnafu, ReachedMaxNestedLevelsSnafu, Result,
|
||||
TimeIndexMustBeNonNullSnafu, TransformColumnNameMustBeUniqueSnafu,
|
||||
TransformMultipleTimestampIndexSnafu, TransformTimestampIndexCountSnafu,
|
||||
UnsupportedNumberTypeSnafu, ValueMustBeMapSnafu,
|
||||
IdentifyPipelineColumnTypeMismatchSnafu, InvalidTimestampSnafu, ReachedMaxNestedLevelsSnafu,
|
||||
Result, TimeIndexMustBeNonNullSnafu, TransformColumnNameMustBeUniqueSnafu,
|
||||
TransformMultipleTimestampIndexSnafu, TransformTimestampIndexCountSnafu, ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::ctx_req::ContextOpt;
|
||||
use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::transform::index::Index;
|
||||
use crate::etl::transform::{Transform, Transforms};
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
use crate::etl::PipelineDocVersion;
|
||||
use crate::{unwrap_or_continue_if_err, Map, PipelineContext};
|
||||
use crate::{unwrap_or_continue_if_err, PipelineContext};
|
||||
|
||||
const DEFAULT_GREPTIME_TIMESTAMP_COLUMN: &str = "greptime_timestamp";
|
||||
const DEFAULT_MAX_NESTED_LEVELS_FOR_JSON_FLATTENING: usize = 10;
|
||||
@@ -133,7 +135,7 @@ impl GreptimePipelineParams {
|
||||
impl GreptimeTransformer {
|
||||
/// Add a default timestamp column to the transforms
|
||||
fn add_greptime_timestamp_column(transforms: &mut Transforms) {
|
||||
let type_ = Value::Timestamp(Timestamp::Nanosecond(0));
|
||||
let type_ = ColumnDataType::TimestampNanosecond;
|
||||
let default = None;
|
||||
|
||||
let transform = Transform {
|
||||
@@ -220,7 +222,7 @@ impl GreptimeTransformer {
|
||||
|
||||
pub fn transform_mut(
|
||||
&self,
|
||||
pipeline_map: &mut Value,
|
||||
pipeline_map: &mut VrlValue,
|
||||
is_v1: bool,
|
||||
) -> Result<Vec<GreptimeValue>> {
|
||||
let mut values = vec![GreptimeValue { value_data: None }; self.schema.len()];
|
||||
@@ -229,6 +231,7 @@ impl GreptimeTransformer {
|
||||
for field in transform.fields.iter() {
|
||||
let column_name = field.input_field();
|
||||
|
||||
let pipeline_map = pipeline_map.as_object_mut().context(ValueMustBeMapSnafu)?;
|
||||
// let keep us `get` here to be compatible with v1
|
||||
match pipeline_map.get(column_name) {
|
||||
Some(v) => {
|
||||
@@ -240,11 +243,8 @@ impl GreptimeTransformer {
|
||||
let value_data = match transform.on_failure {
|
||||
Some(crate::etl::transform::OnFailure::Default) => {
|
||||
match transform.get_default() {
|
||||
Some(default) => coerce_value(default, transform)?,
|
||||
None => match transform.get_default_value_when_data_is_none() {
|
||||
Some(default) => coerce_value(&default, transform)?,
|
||||
None => None,
|
||||
},
|
||||
Some(default) => Some(default.clone()),
|
||||
None => transform.get_default_value_when_data_is_none(),
|
||||
}
|
||||
}
|
||||
Some(crate::etl::transform::OnFailure::Ignore) => None,
|
||||
@@ -349,63 +349,22 @@ fn resolve_schema(
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_number_schema(
|
||||
n: Number,
|
||||
column_name: String,
|
||||
index: Option<usize>,
|
||||
row: &mut Vec<GreptimeValue>,
|
||||
schema_info: &mut SchemaInfo,
|
||||
) -> Result<()> {
|
||||
let (value, datatype, semantic_type) = if n.is_i64() {
|
||||
(
|
||||
ValueData::I64Value(n.as_i64().unwrap()),
|
||||
ColumnDataType::Int64 as i32,
|
||||
SemanticType::Field as i32,
|
||||
)
|
||||
} else if n.is_u64() {
|
||||
(
|
||||
ValueData::U64Value(n.as_u64().unwrap()),
|
||||
ColumnDataType::Uint64 as i32,
|
||||
SemanticType::Field as i32,
|
||||
)
|
||||
} else if n.is_f64() {
|
||||
(
|
||||
ValueData::F64Value(n.as_f64().unwrap()),
|
||||
ColumnDataType::Float64 as i32,
|
||||
SemanticType::Field as i32,
|
||||
)
|
||||
} else {
|
||||
return UnsupportedNumberTypeSnafu { value: n }.fail();
|
||||
};
|
||||
resolve_schema(
|
||||
index,
|
||||
value,
|
||||
ColumnSchema {
|
||||
column_name,
|
||||
datatype,
|
||||
semantic_type,
|
||||
datatype_extension: None,
|
||||
options: None,
|
||||
},
|
||||
row,
|
||||
schema_info,
|
||||
)
|
||||
}
|
||||
|
||||
fn calc_ts(p_ctx: &PipelineContext, values: &Value) -> Result<Option<ValueData>> {
|
||||
fn calc_ts(p_ctx: &PipelineContext, values: &VrlValue) -> Result<Option<ValueData>> {
|
||||
match p_ctx.channel {
|
||||
Channel::Prometheus => Ok(Some(ValueData::TimestampMillisecondValue(
|
||||
values
|
||||
.get(GREPTIME_TIMESTAMP)
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or_default(),
|
||||
))),
|
||||
Channel::Prometheus => {
|
||||
let ts = values
|
||||
.as_object()
|
||||
.and_then(|m| m.get(GREPTIME_TIMESTAMP))
|
||||
.and_then(|ts| ts.try_into_i64().ok())
|
||||
.unwrap_or_default();
|
||||
Ok(Some(ValueData::TimestampMillisecondValue(ts)))
|
||||
}
|
||||
_ => {
|
||||
let custom_ts = p_ctx.pipeline_definition.get_custom_ts();
|
||||
match custom_ts {
|
||||
Some(ts) => {
|
||||
let ts_field = values.get(ts.get_column_name());
|
||||
Some(ts.get_timestamp(ts_field)).transpose()
|
||||
let ts_field = values.as_object().and_then(|m| m.get(ts.get_column_name()));
|
||||
Some(ts.get_timestamp_value(ts_field)).transpose()
|
||||
}
|
||||
None => Ok(Some(ValueData::TimestampNanosecondValue(
|
||||
chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
|
||||
@@ -417,7 +376,7 @@ fn calc_ts(p_ctx: &PipelineContext, values: &Value) -> Result<Option<ValueData>>
|
||||
|
||||
pub(crate) fn values_to_row(
|
||||
schema_info: &mut SchemaInfo,
|
||||
values: Value,
|
||||
values: VrlValue,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
row: Option<Vec<GreptimeValue>>,
|
||||
need_calc_ts: bool,
|
||||
@@ -439,14 +398,20 @@ pub(crate) fn values_to_row(
|
||||
.as_ref()
|
||||
.map_or(DEFAULT_GREPTIME_TIMESTAMP_COLUMN, |ts| ts.get_column_name());
|
||||
|
||||
let values = values.into_map().context(ValueMustBeMapSnafu)?;
|
||||
let values = values.into_object().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
for (column_name, value) in values {
|
||||
if column_name == ts_column_name {
|
||||
if column_name.as_str() == ts_column_name {
|
||||
continue;
|
||||
}
|
||||
|
||||
resolve_value(value, column_name, &mut row, schema_info, pipeline_ctx)?;
|
||||
resolve_value(
|
||||
value,
|
||||
column_name.into(),
|
||||
&mut row,
|
||||
schema_info,
|
||||
pipeline_ctx,
|
||||
)?;
|
||||
}
|
||||
Ok(Row { values: row })
|
||||
}
|
||||
@@ -460,7 +425,7 @@ fn decide_semantic(p_ctx: &PipelineContext, column_name: &str) -> i32 {
|
||||
}
|
||||
|
||||
fn resolve_value(
|
||||
value: Value,
|
||||
value: VrlValue,
|
||||
column_name: String,
|
||||
row: &mut Vec<GreptimeValue>,
|
||||
schema_info: &mut SchemaInfo,
|
||||
@@ -486,27 +451,23 @@ fn resolve_value(
|
||||
};
|
||||
|
||||
match value {
|
||||
Value::Null => {}
|
||||
VrlValue::Null => {}
|
||||
|
||||
Value::Int8(_) | Value::Int16(_) | Value::Int32(_) | Value::Int64(_) => {
|
||||
VrlValue::Integer(v) => {
|
||||
// safe unwrap after type matched
|
||||
let v = value.as_i64().unwrap();
|
||||
resolve_simple_type(ValueData::I64Value(v), column_name, ColumnDataType::Int64)?;
|
||||
}
|
||||
|
||||
Value::Uint8(_) | Value::Uint16(_) | Value::Uint32(_) | Value::Uint64(_) => {
|
||||
VrlValue::Float(v) => {
|
||||
// safe unwrap after type matched
|
||||
let v = value.as_u64().unwrap();
|
||||
resolve_simple_type(ValueData::U64Value(v), column_name, ColumnDataType::Uint64)?;
|
||||
resolve_simple_type(
|
||||
ValueData::F64Value(v.into()),
|
||||
column_name,
|
||||
ColumnDataType::Float64,
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Float32(_) | Value::Float64(_) => {
|
||||
// safe unwrap after type matched
|
||||
let v = value.as_f64().unwrap();
|
||||
resolve_simple_type(ValueData::F64Value(v), column_name, ColumnDataType::Float64)?;
|
||||
}
|
||||
|
||||
Value::Boolean(v) => {
|
||||
VrlValue::Boolean(v) => {
|
||||
resolve_simple_type(
|
||||
ValueData::BoolValue(v),
|
||||
column_name,
|
||||
@@ -514,15 +475,30 @@ fn resolve_value(
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::String(v) => {
|
||||
VrlValue::Bytes(v) => {
|
||||
resolve_simple_type(
|
||||
ValueData::StringValue(v),
|
||||
ValueData::StringValue(String::from_utf8_lossy_owned(v.to_vec())),
|
||||
column_name,
|
||||
ColumnDataType::String,
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Timestamp(Timestamp::Nanosecond(ns)) => {
|
||||
VrlValue::Regex(v) => {
|
||||
warn!(
|
||||
"Persisting regex value in the table, this should not happen, column_name: {}",
|
||||
column_name
|
||||
);
|
||||
resolve_simple_type(
|
||||
ValueData::StringValue(v.to_string()),
|
||||
column_name,
|
||||
ColumnDataType::String,
|
||||
)?;
|
||||
}
|
||||
|
||||
VrlValue::Timestamp(ts) => {
|
||||
let ns = ts.timestamp_nanos_opt().context(InvalidTimestampSnafu {
|
||||
input: ts.to_rfc3339(),
|
||||
})?;
|
||||
resolve_simple_type(
|
||||
ValueData::TimestampNanosecondValue(ns),
|
||||
column_name,
|
||||
@@ -530,32 +506,8 @@ fn resolve_value(
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Timestamp(Timestamp::Microsecond(us)) => {
|
||||
resolve_simple_type(
|
||||
ValueData::TimestampMicrosecondValue(us),
|
||||
column_name,
|
||||
ColumnDataType::TimestampMicrosecond,
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Timestamp(Timestamp::Millisecond(ms)) => {
|
||||
resolve_simple_type(
|
||||
ValueData::TimestampMillisecondValue(ms),
|
||||
column_name,
|
||||
ColumnDataType::TimestampMillisecond,
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Timestamp(Timestamp::Second(s)) => {
|
||||
resolve_simple_type(
|
||||
ValueData::TimestampSecondValue(s),
|
||||
column_name,
|
||||
ColumnDataType::TimestampSecond,
|
||||
)?;
|
||||
}
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
let data: jsonb::Value = value.into();
|
||||
VrlValue::Array(_) | VrlValue::Object(_) => {
|
||||
let data = vrl_value_to_jsonb_value(&value);
|
||||
resolve_schema(
|
||||
index,
|
||||
ValueData::BinaryValue(data.to_vec()),
|
||||
@@ -576,8 +528,32 @@ fn resolve_value(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn vrl_value_to_jsonb_value<'a>(value: &'a VrlValue) -> jsonb::Value<'a> {
|
||||
match value {
|
||||
VrlValue::Bytes(bytes) => jsonb::Value::String(String::from_utf8_lossy(bytes)),
|
||||
VrlValue::Regex(value_regex) => jsonb::Value::String(Cow::Borrowed(value_regex.as_str())),
|
||||
VrlValue::Integer(i) => jsonb::Value::Number(Number::Int64(*i)),
|
||||
VrlValue::Float(not_nan) => jsonb::Value::Number(Number::Float64(not_nan.into_inner())),
|
||||
VrlValue::Boolean(b) => jsonb::Value::Bool(*b),
|
||||
VrlValue::Timestamp(date_time) => jsonb::Value::String(Cow::Owned(date_time.to_rfc3339())),
|
||||
VrlValue::Object(btree_map) => jsonb::Value::Object(
|
||||
btree_map
|
||||
.iter()
|
||||
.map(|(key, value)| (key.to_string(), vrl_value_to_jsonb_value(value)))
|
||||
.collect(),
|
||||
),
|
||||
VrlValue::Array(values) => jsonb::Value::Array(
|
||||
values
|
||||
.iter()
|
||||
.map(|value| vrl_value_to_jsonb_value(value))
|
||||
.collect(),
|
||||
),
|
||||
VrlValue::Null => jsonb::Value::Null,
|
||||
}
|
||||
}
|
||||
|
||||
fn identity_pipeline_inner(
|
||||
pipeline_maps: Vec<Value>,
|
||||
pipeline_maps: Vec<VrlValue>,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
) -> Result<(SchemaInfo, HashMap<ContextOpt, Vec<Row>>)> {
|
||||
let skip_error = pipeline_ctx.pipeline_param.skip_error();
|
||||
@@ -587,7 +563,7 @@ fn identity_pipeline_inner(
|
||||
// set time index column schema first
|
||||
schema_info.schema.push(ColumnSchema {
|
||||
column_name: custom_ts
|
||||
.map(|ts| ts.get_column_name().clone())
|
||||
.map(|ts| ts.get_column_name().to_string())
|
||||
.unwrap_or_else(|| DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string()),
|
||||
datatype: custom_ts.map(|c| c.get_datatype()).unwrap_or_else(|| {
|
||||
if pipeline_ctx.channel == Channel::Prometheus {
|
||||
@@ -642,7 +618,7 @@ fn identity_pipeline_inner(
|
||||
/// 4. The pipeline will return an error if the same column datatype is mismatched
|
||||
/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema.
|
||||
pub fn identity_pipeline(
|
||||
array: Vec<Value>,
|
||||
array: Vec<VrlValue>,
|
||||
table: Option<Arc<table::Table>>,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
) -> Result<HashMap<ContextOpt, Rows>> {
|
||||
@@ -690,22 +666,22 @@ pub fn identity_pipeline(
|
||||
///
|
||||
/// The `max_nested_levels` parameter is used to limit the nested levels of the JSON object.
|
||||
/// The error will be returned if the nested levels is greater than the `max_nested_levels`.
|
||||
pub fn flatten_object(object: Value, max_nested_levels: usize) -> Result<Value> {
|
||||
pub fn flatten_object(object: VrlValue, max_nested_levels: usize) -> Result<VrlValue> {
|
||||
let mut flattened = BTreeMap::new();
|
||||
let object = object.into_map().context(ValueMustBeMapSnafu)?;
|
||||
let object = object.into_object().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
if !object.is_empty() {
|
||||
// it will use recursion to flatten the object.
|
||||
do_flatten_object(&mut flattened, None, object, 1, max_nested_levels)?;
|
||||
}
|
||||
|
||||
Ok(Value::Map(Map { values: flattened }))
|
||||
Ok(VrlValue::Object(flattened))
|
||||
}
|
||||
|
||||
fn do_flatten_object(
|
||||
dest: &mut BTreeMap<String, Value>,
|
||||
dest: &mut BTreeMap<KeyString, VrlValue>,
|
||||
base: Option<&str>,
|
||||
object: BTreeMap<String, Value>,
|
||||
object: BTreeMap<KeyString, VrlValue>,
|
||||
current_level: usize,
|
||||
max_nested_levels: usize,
|
||||
) -> Result<()> {
|
||||
@@ -715,14 +691,17 @@ fn do_flatten_object(
|
||||
}
|
||||
|
||||
for (key, value) in object {
|
||||
let new_key = base.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
|
||||
let new_key = base.map_or_else(
|
||||
|| key.clone(),
|
||||
|base_key| format!("{base_key}.{key}").into(),
|
||||
);
|
||||
|
||||
match value {
|
||||
Value::Map(object) => {
|
||||
VrlValue::Object(object) => {
|
||||
do_flatten_object(
|
||||
dest,
|
||||
Some(&new_key),
|
||||
object.values,
|
||||
object,
|
||||
current_level + 1,
|
||||
max_nested_levels,
|
||||
)?;
|
||||
@@ -742,7 +721,6 @@ mod tests {
|
||||
use api::v1::SemanticType;
|
||||
|
||||
use super::*;
|
||||
use crate::etl::{json_array_to_map, json_to_map};
|
||||
use crate::{identity_pipeline, PipelineDefinition};
|
||||
|
||||
#[test]
|
||||
@@ -754,7 +732,7 @@ mod tests {
|
||||
Channel::Unknown,
|
||||
);
|
||||
{
|
||||
let array = vec![
|
||||
let array = [
|
||||
serde_json::json!({
|
||||
"woshinull": null,
|
||||
"name": "Alice",
|
||||
@@ -774,7 +752,7 @@ mod tests {
|
||||
"gaga": "gaga"
|
||||
}),
|
||||
];
|
||||
let array = json_array_to_map(array).unwrap();
|
||||
let array = array.iter().map(|v| v.into()).collect();
|
||||
let rows = identity_pipeline(array, None, &pipeline_ctx);
|
||||
assert!(rows.is_err());
|
||||
assert_eq!(
|
||||
@@ -783,7 +761,7 @@ mod tests {
|
||||
);
|
||||
}
|
||||
{
|
||||
let array = vec![
|
||||
let array = [
|
||||
serde_json::json!({
|
||||
"woshinull": null,
|
||||
"name": "Alice",
|
||||
@@ -803,7 +781,8 @@ mod tests {
|
||||
"gaga": "gaga"
|
||||
}),
|
||||
];
|
||||
let rows = identity_pipeline(json_array_to_map(array).unwrap(), None, &pipeline_ctx);
|
||||
let array = array.iter().map(|v| v.into()).collect();
|
||||
let rows = identity_pipeline(array, None, &pipeline_ctx);
|
||||
assert!(rows.is_err());
|
||||
assert_eq!(
|
||||
rows.err().unwrap().to_string(),
|
||||
@@ -811,7 +790,7 @@ mod tests {
|
||||
);
|
||||
}
|
||||
{
|
||||
let array = vec![
|
||||
let array = [
|
||||
serde_json::json!({
|
||||
"woshinull": null,
|
||||
"name": "Alice",
|
||||
@@ -831,7 +810,8 @@ mod tests {
|
||||
"gaga": "gaga"
|
||||
}),
|
||||
];
|
||||
let rows = identity_pipeline(json_array_to_map(array).unwrap(), None, &pipeline_ctx);
|
||||
let array = array.iter().map(|v| v.into()).collect();
|
||||
let rows = identity_pipeline(array, None, &pipeline_ctx);
|
||||
assert!(rows.is_ok());
|
||||
let mut rows = rows.unwrap();
|
||||
assert!(rows.len() == 1);
|
||||
@@ -842,7 +822,7 @@ mod tests {
|
||||
assert_eq!(8, rows.rows[1].values.len());
|
||||
}
|
||||
{
|
||||
let array = vec![
|
||||
let array = [
|
||||
serde_json::json!({
|
||||
"woshinull": null,
|
||||
"name": "Alice",
|
||||
@@ -864,22 +844,23 @@ mod tests {
|
||||
];
|
||||
let tag_column_names = ["name".to_string(), "address".to_string()];
|
||||
|
||||
let rows = identity_pipeline_inner(json_array_to_map(array).unwrap(), &pipeline_ctx)
|
||||
.map(|(mut schema, mut rows)| {
|
||||
for name in tag_column_names {
|
||||
if let Some(index) = schema.index.get(&name) {
|
||||
schema.schema[*index].semantic_type = SemanticType::Tag as i32;
|
||||
let rows =
|
||||
identity_pipeline_inner(array.iter().map(|v| v.into()).collect(), &pipeline_ctx)
|
||||
.map(|(mut schema, mut rows)| {
|
||||
for name in tag_column_names {
|
||||
if let Some(index) = schema.index.get(&name) {
|
||||
schema.schema[*index].semantic_type = SemanticType::Tag as i32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(rows.len() == 1);
|
||||
let rows = rows.remove(&ContextOpt::default()).unwrap();
|
||||
assert!(rows.len() == 1);
|
||||
let rows = rows.remove(&ContextOpt::default()).unwrap();
|
||||
|
||||
Rows {
|
||||
schema: schema.schema,
|
||||
rows,
|
||||
}
|
||||
});
|
||||
Rows {
|
||||
schema: schema.schema,
|
||||
rows,
|
||||
}
|
||||
});
|
||||
|
||||
assert!(rows.is_ok());
|
||||
let rows = rows.unwrap();
|
||||
@@ -976,8 +957,8 @@ mod tests {
|
||||
];
|
||||
|
||||
for (input, max_depth, expected) in test_cases {
|
||||
let input = json_to_map(input).unwrap();
|
||||
let expected = expected.map(|e| json_to_map(e).unwrap());
|
||||
let input = input.into();
|
||||
let expected = expected.map(|e| e.into());
|
||||
|
||||
let flattened_object = flatten_object(input, max_depth).ok();
|
||||
assert_eq!(flattened_object, expected);
|
||||
|
||||
@@ -18,58 +18,17 @@ use api::v1::{ColumnDataTypeExtension, ColumnOptions, JsonTypeExtension};
|
||||
use datatypes::schema::{FulltextOptions, SkippingIndexOptions};
|
||||
use greptime_proto::v1::value::ValueData;
|
||||
use greptime_proto::v1::{ColumnDataType, ColumnSchema, SemanticType};
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{
|
||||
CoerceIncompatibleTypesSnafu, CoerceJsonTypeToSnafu, CoerceStringToTypeSnafu,
|
||||
CoerceTypeToJsonSnafu, CoerceUnsupportedEpochTypeSnafu, CoerceUnsupportedNullTypeSnafu,
|
||||
CoerceUnsupportedNullTypeToSnafu, ColumnOptionsSnafu, Error, Result,
|
||||
CoerceTypeToJsonSnafu, CoerceUnsupportedEpochTypeSnafu, ColumnOptionsSnafu,
|
||||
InvalidTimestampSnafu, Result, UnsupportedTypeInPipelineSnafu, VrlRegexValueSnafu,
|
||||
};
|
||||
use crate::etl::transform::index::Index;
|
||||
use crate::etl::transform::transformer::greptime::vrl_value_to_jsonb_value;
|
||||
use crate::etl::transform::{OnFailure, Transform};
|
||||
use crate::etl::value::{Timestamp, Value};
|
||||
|
||||
impl TryFrom<Value> for ValueData {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: Value) -> Result<Self> {
|
||||
match value {
|
||||
Value::Null => CoerceUnsupportedNullTypeSnafu.fail(),
|
||||
|
||||
Value::Int8(v) => Ok(ValueData::I32Value(v as i32)),
|
||||
Value::Int16(v) => Ok(ValueData::I32Value(v as i32)),
|
||||
Value::Int32(v) => Ok(ValueData::I32Value(v)),
|
||||
Value::Int64(v) => Ok(ValueData::I64Value(v)),
|
||||
|
||||
Value::Uint8(v) => Ok(ValueData::U32Value(v as u32)),
|
||||
Value::Uint16(v) => Ok(ValueData::U32Value(v as u32)),
|
||||
Value::Uint32(v) => Ok(ValueData::U32Value(v)),
|
||||
Value::Uint64(v) => Ok(ValueData::U64Value(v)),
|
||||
|
||||
Value::Float32(v) => Ok(ValueData::F32Value(v)),
|
||||
Value::Float64(v) => Ok(ValueData::F64Value(v)),
|
||||
|
||||
Value::Boolean(v) => Ok(ValueData::BoolValue(v)),
|
||||
Value::String(v) => Ok(ValueData::StringValue(v)),
|
||||
|
||||
Value::Timestamp(Timestamp::Nanosecond(ns)) => {
|
||||
Ok(ValueData::TimestampNanosecondValue(ns))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Microsecond(us)) => {
|
||||
Ok(ValueData::TimestampMicrosecondValue(us))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Millisecond(ms)) => {
|
||||
Ok(ValueData::TimestampMillisecondValue(ms))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Second(s)) => Ok(ValueData::TimestampSecondValue(s)),
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
let data: jsonb::Value = value.into();
|
||||
Ok(ValueData::BinaryValue(data.to_vec()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn coerce_columns(transform: &Transform) -> Result<Vec<ColumnSchema>> {
|
||||
let mut columns = Vec::new();
|
||||
@@ -77,15 +36,21 @@ pub(crate) fn coerce_columns(transform: &Transform) -> Result<Vec<ColumnSchema>>
|
||||
for field in transform.fields.iter() {
|
||||
let column_name = field.target_or_input_field().to_string();
|
||||
|
||||
let (datatype, datatype_extension) = coerce_type(transform)?;
|
||||
let ext = if matches!(transform.type_, ColumnDataType::Binary) {
|
||||
Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let semantic_type = coerce_semantic_type(transform) as i32;
|
||||
|
||||
let column = ColumnSchema {
|
||||
column_name,
|
||||
datatype: datatype as i32,
|
||||
datatype: transform.type_ as i32,
|
||||
semantic_type,
|
||||
datatype_extension,
|
||||
datatype_extension: ext,
|
||||
options: coerce_options(transform)?,
|
||||
};
|
||||
columns.push(column);
|
||||
@@ -123,113 +88,60 @@ fn coerce_options(transform: &Transform) -> Result<Option<ColumnOptions>> {
|
||||
}
|
||||
}
|
||||
|
||||
fn coerce_type(transform: &Transform) -> Result<(ColumnDataType, Option<ColumnDataTypeExtension>)> {
|
||||
match transform.type_ {
|
||||
Value::Int8(_) => Ok((ColumnDataType::Int8, None)),
|
||||
Value::Int16(_) => Ok((ColumnDataType::Int16, None)),
|
||||
Value::Int32(_) => Ok((ColumnDataType::Int32, None)),
|
||||
Value::Int64(_) => Ok((ColumnDataType::Int64, None)),
|
||||
|
||||
Value::Uint8(_) => Ok((ColumnDataType::Uint8, None)),
|
||||
Value::Uint16(_) => Ok((ColumnDataType::Uint16, None)),
|
||||
Value::Uint32(_) => Ok((ColumnDataType::Uint32, None)),
|
||||
Value::Uint64(_) => Ok((ColumnDataType::Uint64, None)),
|
||||
|
||||
Value::Float32(_) => Ok((ColumnDataType::Float32, None)),
|
||||
Value::Float64(_) => Ok((ColumnDataType::Float64, None)),
|
||||
|
||||
Value::Boolean(_) => Ok((ColumnDataType::Boolean, None)),
|
||||
Value::String(_) => Ok((ColumnDataType::String, None)),
|
||||
|
||||
Value::Timestamp(Timestamp::Nanosecond(_)) => {
|
||||
Ok((ColumnDataType::TimestampNanosecond, None))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Microsecond(_)) => {
|
||||
Ok((ColumnDataType::TimestampMicrosecond, None))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Millisecond(_)) => {
|
||||
Ok((ColumnDataType::TimestampMillisecond, None))
|
||||
}
|
||||
Value::Timestamp(Timestamp::Second(_)) => Ok((ColumnDataType::TimestampSecond, None)),
|
||||
|
||||
Value::Array(_) | Value::Map(_) => Ok((
|
||||
ColumnDataType::Binary,
|
||||
Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
}),
|
||||
)),
|
||||
|
||||
Value::Null => CoerceUnsupportedNullTypeToSnafu {
|
||||
ty: transform.type_.to_str_type(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn coerce_value(val: &Value, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
pub(crate) fn coerce_value(val: &VrlValue, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
match val {
|
||||
Value::Null => Ok(None),
|
||||
|
||||
Value::Int8(n) => coerce_i64_value(*n as i64, transform),
|
||||
Value::Int16(n) => coerce_i64_value(*n as i64, transform),
|
||||
Value::Int32(n) => coerce_i64_value(*n as i64, transform),
|
||||
Value::Int64(n) => coerce_i64_value(*n, transform),
|
||||
|
||||
Value::Uint8(n) => coerce_u64_value(*n as u64, transform),
|
||||
Value::Uint16(n) => coerce_u64_value(*n as u64, transform),
|
||||
Value::Uint32(n) => coerce_u64_value(*n as u64, transform),
|
||||
Value::Uint64(n) => coerce_u64_value(*n, transform),
|
||||
|
||||
Value::Float32(n) => coerce_f64_value(*n as f64, transform),
|
||||
Value::Float64(n) => coerce_f64_value(*n, transform),
|
||||
|
||||
Value::Boolean(b) => coerce_bool_value(*b, transform),
|
||||
Value::String(s) => coerce_string_value(s, transform),
|
||||
|
||||
Value::Timestamp(input_timestamp) => match &transform.type_ {
|
||||
Value::Timestamp(target_timestamp) => match target_timestamp {
|
||||
Timestamp::Nanosecond(_) => Ok(Some(ValueData::TimestampNanosecondValue(
|
||||
input_timestamp.timestamp_nanos(),
|
||||
))),
|
||||
Timestamp::Microsecond(_) => Ok(Some(ValueData::TimestampMicrosecondValue(
|
||||
input_timestamp.timestamp_micros(),
|
||||
))),
|
||||
Timestamp::Millisecond(_) => Ok(Some(ValueData::TimestampMillisecondValue(
|
||||
input_timestamp.timestamp_millis(),
|
||||
))),
|
||||
Timestamp::Second(_) => Ok(Some(ValueData::TimestampSecondValue(
|
||||
input_timestamp.timestamp(),
|
||||
))),
|
||||
},
|
||||
VrlValue::Null => Ok(None),
|
||||
VrlValue::Integer(n) => coerce_i64_value(*n, transform),
|
||||
VrlValue::Float(n) => coerce_f64_value(n.into_inner(), transform),
|
||||
VrlValue::Boolean(b) => coerce_bool_value(*b, transform),
|
||||
VrlValue::Bytes(b) => coerce_string_value(String::from_utf8_lossy(b).as_ref(), transform),
|
||||
VrlValue::Timestamp(ts) => match transform.type_ {
|
||||
ColumnDataType::TimestampNanosecond => Ok(Some(ValueData::TimestampNanosecondValue(
|
||||
ts.timestamp_nanos_opt().context(InvalidTimestampSnafu {
|
||||
input: ts.to_rfc3339(),
|
||||
})?,
|
||||
))),
|
||||
ColumnDataType::TimestampMicrosecond => Ok(Some(ValueData::TimestampMicrosecondValue(
|
||||
ts.timestamp_micros(),
|
||||
))),
|
||||
ColumnDataType::TimestampMillisecond => Ok(Some(ValueData::TimestampMillisecondValue(
|
||||
ts.timestamp_millis(),
|
||||
))),
|
||||
ColumnDataType::TimestampSecond => {
|
||||
Ok(Some(ValueData::TimestampSecondValue(ts.timestamp())))
|
||||
}
|
||||
_ => CoerceIncompatibleTypesSnafu {
|
||||
msg: "Timestamp can only be coerced to another type",
|
||||
}
|
||||
.fail(),
|
||||
},
|
||||
|
||||
Value::Array(_) | Value::Map(_) => coerce_json_value(val, transform),
|
||||
VrlValue::Array(_) | VrlValue::Object(_) => coerce_json_value(val, transform),
|
||||
VrlValue::Regex(_) => VrlRegexValueSnafu.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
let val = match transform.type_ {
|
||||
Value::Int8(_) => ValueData::I8Value(b as i32),
|
||||
Value::Int16(_) => ValueData::I16Value(b as i32),
|
||||
Value::Int32(_) => ValueData::I32Value(b as i32),
|
||||
Value::Int64(_) => ValueData::I64Value(b as i64),
|
||||
ColumnDataType::Int8 => ValueData::I8Value(b as i32),
|
||||
ColumnDataType::Int16 => ValueData::I16Value(b as i32),
|
||||
ColumnDataType::Int32 => ValueData::I32Value(b as i32),
|
||||
ColumnDataType::Int64 => ValueData::I64Value(b as i64),
|
||||
|
||||
Value::Uint8(_) => ValueData::U8Value(b as u32),
|
||||
Value::Uint16(_) => ValueData::U16Value(b as u32),
|
||||
Value::Uint32(_) => ValueData::U32Value(b as u32),
|
||||
Value::Uint64(_) => ValueData::U64Value(b as u64),
|
||||
ColumnDataType::Uint8 => ValueData::U8Value(b as u32),
|
||||
ColumnDataType::Uint16 => ValueData::U16Value(b as u32),
|
||||
ColumnDataType::Uint32 => ValueData::U32Value(b as u32),
|
||||
ColumnDataType::Uint64 => ValueData::U64Value(b as u64),
|
||||
|
||||
Value::Float32(_) => ValueData::F32Value(if b { 1.0 } else { 0.0 }),
|
||||
Value::Float64(_) => ValueData::F64Value(if b { 1.0 } else { 0.0 }),
|
||||
ColumnDataType::Float32 => ValueData::F32Value(if b { 1.0 } else { 0.0 }),
|
||||
ColumnDataType::Float64 => ValueData::F64Value(if b { 1.0 } else { 0.0 }),
|
||||
|
||||
Value::Boolean(_) => ValueData::BoolValue(b),
|
||||
Value::String(_) => ValueData::StringValue(b.to_string()),
|
||||
ColumnDataType::Boolean => ValueData::BoolValue(b),
|
||||
ColumnDataType::String => ValueData::StringValue(b.to_string()),
|
||||
|
||||
Value::Timestamp(_) => match transform.on_failure {
|
||||
ColumnDataType::TimestampNanosecond
|
||||
| ColumnDataType::TimestampMicrosecond
|
||||
| ColumnDataType::TimestampMillisecond
|
||||
| ColumnDataType::TimestampSecond => match transform.on_failure {
|
||||
Some(OnFailure::Ignore) => return Ok(None),
|
||||
Some(OnFailure::Default) => {
|
||||
return CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail();
|
||||
@@ -239,14 +151,19 @@ fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>
|
||||
}
|
||||
},
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
ColumnDataType::Binary => {
|
||||
return CoerceJsonTypeToSnafu {
|
||||
ty: transform.type_.to_str_type(),
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
Value::Null => return Ok(None),
|
||||
_ => {
|
||||
return UnsupportedTypeInPipelineSnafu {
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Some(val))
|
||||
@@ -254,37 +171,35 @@ fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>
|
||||
|
||||
fn coerce_i64_value(n: i64, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
let val = match &transform.type_ {
|
||||
Value::Int8(_) => ValueData::I8Value(n as i32),
|
||||
Value::Int16(_) => ValueData::I16Value(n as i32),
|
||||
Value::Int32(_) => ValueData::I32Value(n as i32),
|
||||
Value::Int64(_) => ValueData::I64Value(n),
|
||||
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
|
||||
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
|
||||
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
|
||||
ColumnDataType::Int64 => ValueData::I64Value(n),
|
||||
|
||||
Value::Uint8(_) => ValueData::U8Value(n as u32),
|
||||
Value::Uint16(_) => ValueData::U16Value(n as u32),
|
||||
Value::Uint32(_) => ValueData::U32Value(n as u32),
|
||||
Value::Uint64(_) => ValueData::U64Value(n as u64),
|
||||
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
|
||||
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
|
||||
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
|
||||
ColumnDataType::Uint64 => ValueData::U64Value(n as u64),
|
||||
|
||||
Value::Float32(_) => ValueData::F32Value(n as f32),
|
||||
Value::Float64(_) => ValueData::F64Value(n as f64),
|
||||
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
|
||||
ColumnDataType::Float64 => ValueData::F64Value(n as f64),
|
||||
|
||||
Value::Boolean(_) => ValueData::BoolValue(n != 0),
|
||||
Value::String(_) => ValueData::StringValue(n.to_string()),
|
||||
ColumnDataType::Boolean => ValueData::BoolValue(n != 0),
|
||||
ColumnDataType::String => ValueData::StringValue(n.to_string()),
|
||||
|
||||
Value::Timestamp(unit) => match unit {
|
||||
Timestamp::Nanosecond(_) => ValueData::TimestampNanosecondValue(n),
|
||||
Timestamp::Microsecond(_) => ValueData::TimestampMicrosecondValue(n),
|
||||
Timestamp::Millisecond(_) => ValueData::TimestampMillisecondValue(n),
|
||||
Timestamp::Second(_) => ValueData::TimestampSecondValue(n),
|
||||
},
|
||||
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(n),
|
||||
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(n),
|
||||
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(n),
|
||||
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(n),
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
ColumnDataType::Binary => {
|
||||
return CoerceJsonTypeToSnafu {
|
||||
ty: transform.type_.to_str_type(),
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
Value::Null => return Ok(None),
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
Ok(Some(val))
|
||||
@@ -292,37 +207,35 @@ fn coerce_i64_value(n: i64, transform: &Transform) -> Result<Option<ValueData>>
|
||||
|
||||
fn coerce_u64_value(n: u64, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
let val = match &transform.type_ {
|
||||
Value::Int8(_) => ValueData::I8Value(n as i32),
|
||||
Value::Int16(_) => ValueData::I16Value(n as i32),
|
||||
Value::Int32(_) => ValueData::I32Value(n as i32),
|
||||
Value::Int64(_) => ValueData::I64Value(n as i64),
|
||||
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
|
||||
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
|
||||
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
|
||||
ColumnDataType::Int64 => ValueData::I64Value(n as i64),
|
||||
|
||||
Value::Uint8(_) => ValueData::U8Value(n as u32),
|
||||
Value::Uint16(_) => ValueData::U16Value(n as u32),
|
||||
Value::Uint32(_) => ValueData::U32Value(n as u32),
|
||||
Value::Uint64(_) => ValueData::U64Value(n),
|
||||
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
|
||||
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
|
||||
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
|
||||
ColumnDataType::Uint64 => ValueData::U64Value(n),
|
||||
|
||||
Value::Float32(_) => ValueData::F32Value(n as f32),
|
||||
Value::Float64(_) => ValueData::F64Value(n as f64),
|
||||
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
|
||||
ColumnDataType::Float64 => ValueData::F64Value(n as f64),
|
||||
|
||||
Value::Boolean(_) => ValueData::BoolValue(n != 0),
|
||||
Value::String(_) => ValueData::StringValue(n.to_string()),
|
||||
ColumnDataType::Boolean => ValueData::BoolValue(n != 0),
|
||||
ColumnDataType::String => ValueData::StringValue(n.to_string()),
|
||||
|
||||
Value::Timestamp(unit) => match unit {
|
||||
Timestamp::Nanosecond(_) => ValueData::TimestampNanosecondValue(n as i64),
|
||||
Timestamp::Microsecond(_) => ValueData::TimestampMicrosecondValue(n as i64),
|
||||
Timestamp::Millisecond(_) => ValueData::TimestampMillisecondValue(n as i64),
|
||||
Timestamp::Second(_) => ValueData::TimestampSecondValue(n as i64),
|
||||
},
|
||||
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(n as i64),
|
||||
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(n as i64),
|
||||
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(n as i64),
|
||||
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(n as i64),
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
ColumnDataType::Binary => {
|
||||
return CoerceJsonTypeToSnafu {
|
||||
ty: transform.type_.to_str_type(),
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
Value::Null => return Ok(None),
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
Ok(Some(val))
|
||||
@@ -330,23 +243,26 @@ fn coerce_u64_value(n: u64, transform: &Transform) -> Result<Option<ValueData>>
|
||||
|
||||
fn coerce_f64_value(n: f64, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
let val = match transform.type_ {
|
||||
Value::Int8(_) => ValueData::I8Value(n as i32),
|
||||
Value::Int16(_) => ValueData::I16Value(n as i32),
|
||||
Value::Int32(_) => ValueData::I32Value(n as i32),
|
||||
Value::Int64(_) => ValueData::I64Value(n as i64),
|
||||
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
|
||||
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
|
||||
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
|
||||
ColumnDataType::Int64 => ValueData::I64Value(n as i64),
|
||||
|
||||
Value::Uint8(_) => ValueData::U8Value(n as u32),
|
||||
Value::Uint16(_) => ValueData::U16Value(n as u32),
|
||||
Value::Uint32(_) => ValueData::U32Value(n as u32),
|
||||
Value::Uint64(_) => ValueData::U64Value(n as u64),
|
||||
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
|
||||
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
|
||||
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
|
||||
ColumnDataType::Uint64 => ValueData::U64Value(n as u64),
|
||||
|
||||
Value::Float32(_) => ValueData::F32Value(n as f32),
|
||||
Value::Float64(_) => ValueData::F64Value(n),
|
||||
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
|
||||
ColumnDataType::Float64 => ValueData::F64Value(n),
|
||||
|
||||
Value::Boolean(_) => ValueData::BoolValue(n != 0.0),
|
||||
Value::String(_) => ValueData::StringValue(n.to_string()),
|
||||
ColumnDataType::Boolean => ValueData::BoolValue(n != 0.0),
|
||||
ColumnDataType::String => ValueData::StringValue(n.to_string()),
|
||||
|
||||
Value::Timestamp(_) => match transform.on_failure {
|
||||
ColumnDataType::TimestampNanosecond
|
||||
| ColumnDataType::TimestampMicrosecond
|
||||
| ColumnDataType::TimestampMillisecond
|
||||
| ColumnDataType::TimestampSecond => match transform.on_failure {
|
||||
Some(OnFailure::Ignore) => return Ok(None),
|
||||
Some(OnFailure::Default) => {
|
||||
return CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail();
|
||||
@@ -356,14 +272,14 @@ fn coerce_f64_value(n: f64, transform: &Transform) -> Result<Option<ValueData>>
|
||||
}
|
||||
},
|
||||
|
||||
Value::Array(_) | Value::Map(_) => {
|
||||
ColumnDataType::Binary => {
|
||||
return CoerceJsonTypeToSnafu {
|
||||
ty: transform.type_.to_str_type(),
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
Value::Null => return Ok(None),
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
Ok(Some(val))
|
||||
@@ -376,12 +292,12 @@ macro_rules! coerce_string_value {
|
||||
Err(_) => match $transform.on_failure {
|
||||
Some(OnFailure::Ignore) => Ok(None),
|
||||
Some(OnFailure::Default) => match $transform.get_default() {
|
||||
Some(default) => coerce_value(default, $transform),
|
||||
None => coerce_value($transform.get_type_matched_default_val(), $transform),
|
||||
Some(default) => Ok(Some(default.clone())),
|
||||
None => $transform.get_type_matched_default_val().map(Some),
|
||||
},
|
||||
None => CoerceStringToTypeSnafu {
|
||||
s: $s,
|
||||
ty: $transform.type_.to_str_type(),
|
||||
ty: $transform.type_.as_str_name(),
|
||||
}
|
||||
.fail(),
|
||||
},
|
||||
@@ -389,92 +305,85 @@ macro_rules! coerce_string_value {
|
||||
};
|
||||
}
|
||||
|
||||
fn coerce_string_value(s: &String, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
fn coerce_string_value(s: &str, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
match transform.type_ {
|
||||
Value::Int8(_) => {
|
||||
ColumnDataType::Int8 => {
|
||||
coerce_string_value!(s, transform, i32, I8Value)
|
||||
}
|
||||
Value::Int16(_) => {
|
||||
ColumnDataType::Int16 => {
|
||||
coerce_string_value!(s, transform, i32, I16Value)
|
||||
}
|
||||
Value::Int32(_) => {
|
||||
ColumnDataType::Int32 => {
|
||||
coerce_string_value!(s, transform, i32, I32Value)
|
||||
}
|
||||
Value::Int64(_) => {
|
||||
ColumnDataType::Int64 => {
|
||||
coerce_string_value!(s, transform, i64, I64Value)
|
||||
}
|
||||
|
||||
Value::Uint8(_) => {
|
||||
ColumnDataType::Uint8 => {
|
||||
coerce_string_value!(s, transform, u32, U8Value)
|
||||
}
|
||||
Value::Uint16(_) => {
|
||||
ColumnDataType::Uint16 => {
|
||||
coerce_string_value!(s, transform, u32, U16Value)
|
||||
}
|
||||
Value::Uint32(_) => {
|
||||
ColumnDataType::Uint32 => {
|
||||
coerce_string_value!(s, transform, u32, U32Value)
|
||||
}
|
||||
Value::Uint64(_) => {
|
||||
ColumnDataType::Uint64 => {
|
||||
coerce_string_value!(s, transform, u64, U64Value)
|
||||
}
|
||||
|
||||
Value::Float32(_) => {
|
||||
ColumnDataType::Float32 => {
|
||||
coerce_string_value!(s, transform, f32, F32Value)
|
||||
}
|
||||
Value::Float64(_) => {
|
||||
ColumnDataType::Float64 => {
|
||||
coerce_string_value!(s, transform, f64, F64Value)
|
||||
}
|
||||
|
||||
Value::Boolean(_) => {
|
||||
ColumnDataType::Boolean => {
|
||||
coerce_string_value!(s, transform, bool, BoolValue)
|
||||
}
|
||||
|
||||
Value::String(_) => Ok(Some(ValueData::StringValue(s.to_string()))),
|
||||
ColumnDataType::String => Ok(Some(ValueData::StringValue(s.to_string()))),
|
||||
|
||||
Value::Timestamp(_) => match transform.on_failure {
|
||||
ColumnDataType::TimestampNanosecond
|
||||
| ColumnDataType::TimestampMicrosecond
|
||||
| ColumnDataType::TimestampMillisecond
|
||||
| ColumnDataType::TimestampSecond => match transform.on_failure {
|
||||
Some(OnFailure::Ignore) => Ok(None),
|
||||
Some(OnFailure::Default) => CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail(),
|
||||
None => CoerceUnsupportedEpochTypeSnafu { ty: "String" }.fail(),
|
||||
},
|
||||
|
||||
Value::Array(_) | Value::Map(_) => CoerceStringToTypeSnafu {
|
||||
ColumnDataType::Binary => CoerceStringToTypeSnafu {
|
||||
s,
|
||||
ty: transform.type_.to_str_type(),
|
||||
ty: transform.type_.as_str_name(),
|
||||
}
|
||||
.fail(),
|
||||
|
||||
Value::Null => Ok(None),
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
fn coerce_json_value(v: &Value, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
fn coerce_json_value(v: &VrlValue, transform: &Transform) -> Result<Option<ValueData>> {
|
||||
match &transform.type_ {
|
||||
Value::Array(_) | Value::Map(_) => (),
|
||||
ColumnDataType::Binary => (),
|
||||
t => {
|
||||
return CoerceTypeToJsonSnafu {
|
||||
ty: t.to_str_type(),
|
||||
ty: t.as_str_name(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
match v {
|
||||
Value::Map(_) => {
|
||||
let data: jsonb::Value = v.into();
|
||||
Ok(Some(ValueData::BinaryValue(data.to_vec())))
|
||||
}
|
||||
Value::Array(_) => {
|
||||
let data: jsonb::Value = v.into();
|
||||
Ok(Some(ValueData::BinaryValue(data.to_vec())))
|
||||
}
|
||||
_ => CoerceTypeToJsonSnafu {
|
||||
ty: v.to_str_type(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
let data: jsonb::Value = vrl_value_to_jsonb_value(v);
|
||||
Ok(Some(ValueData::BinaryValue(data.to_vec())))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use vrl::prelude::Bytes;
|
||||
|
||||
use super::*;
|
||||
use crate::etl::field::Fields;
|
||||
|
||||
@@ -482,7 +391,7 @@ mod tests {
|
||||
fn test_coerce_string_without_on_failure() {
|
||||
let transform = Transform {
|
||||
fields: Fields::default(),
|
||||
type_: Value::Int32(0),
|
||||
type_: ColumnDataType::Int32,
|
||||
default: None,
|
||||
index: None,
|
||||
on_failure: None,
|
||||
@@ -491,14 +400,14 @@ mod tests {
|
||||
|
||||
// valid string
|
||||
{
|
||||
let val = Value::String("123".to_string());
|
||||
let val = VrlValue::Integer(123);
|
||||
let result = coerce_value(&val, &transform).unwrap();
|
||||
assert_eq!(result, Some(ValueData::I32Value(123)));
|
||||
}
|
||||
|
||||
// invalid string
|
||||
{
|
||||
let val = Value::String("hello".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("hello"));
|
||||
let result = coerce_value(&val, &transform);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
@@ -508,14 +417,14 @@ mod tests {
|
||||
fn test_coerce_string_with_on_failure_ignore() {
|
||||
let transform = Transform {
|
||||
fields: Fields::default(),
|
||||
type_: Value::Int32(0),
|
||||
type_: ColumnDataType::Int32,
|
||||
default: None,
|
||||
index: None,
|
||||
on_failure: Some(OnFailure::Ignore),
|
||||
tag: false,
|
||||
};
|
||||
|
||||
let val = Value::String("hello".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("hello"));
|
||||
let result = coerce_value(&val, &transform).unwrap();
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
@@ -524,7 +433,7 @@ mod tests {
|
||||
fn test_coerce_string_with_on_failure_default() {
|
||||
let mut transform = Transform {
|
||||
fields: Fields::default(),
|
||||
type_: Value::Int32(0),
|
||||
type_: ColumnDataType::Int32,
|
||||
default: None,
|
||||
index: None,
|
||||
on_failure: Some(OnFailure::Default),
|
||||
@@ -533,15 +442,15 @@ mod tests {
|
||||
|
||||
// with no explicit default value
|
||||
{
|
||||
let val = Value::String("hello".to_string());
|
||||
let val = VrlValue::Bytes(Bytes::from("hello"));
|
||||
let result = coerce_value(&val, &transform).unwrap();
|
||||
assert_eq!(result, Some(ValueData::I32Value(0)));
|
||||
}
|
||||
|
||||
// with explicit default value
|
||||
{
|
||||
transform.default = Some(Value::Int32(42));
|
||||
let val = Value::String("hello".to_string());
|
||||
transform.default = Some(ValueData::I32Value(42));
|
||||
let val = VrlValue::Bytes(Bytes::from("hello"));
|
||||
let result = coerce_value(&val, &transform).unwrap();
|
||||
assert_eq!(result, Some(ValueData::I32Value(42)));
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,81 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct Array {
|
||||
pub values: Vec<Value>,
|
||||
}
|
||||
|
||||
impl Array {
|
||||
pub fn new() -> Self {
|
||||
Array { values: vec![] }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Array {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let values = self
|
||||
.values
|
||||
.iter()
|
||||
.map(|v| v.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ");
|
||||
write!(f, "[{}]", values)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Array {
|
||||
type Target = Vec<Value>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.values
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::DerefMut for Array {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.values
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for Array {
|
||||
type Item = Value;
|
||||
|
||||
type IntoIter = std::vec::IntoIter<Value>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.values.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Value>> for Array {
|
||||
fn from(values: Vec<Value>) -> Self {
|
||||
Array { values }
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<serde_json::Value>> for Array {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: Vec<serde_json::Value>) -> Result<Self> {
|
||||
let values = value
|
||||
.into_iter()
|
||||
.map(|v| v.try_into())
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
Ok(Array { values })
|
||||
}
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::etl::value::Value;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub struct Map {
|
||||
pub values: BTreeMap<String, Value>,
|
||||
}
|
||||
|
||||
impl Map {
|
||||
pub fn one(key: impl Into<String>, value: Value) -> Map {
|
||||
let mut map = Map::default();
|
||||
map.insert(key, value);
|
||||
map
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, key: impl Into<String>, value: Value) {
|
||||
self.values.insert(key.into(), value);
|
||||
}
|
||||
|
||||
pub fn extend(&mut self, Map { values }: Map) {
|
||||
self.values.extend(values);
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BTreeMap<String, Value>> for Map {
|
||||
fn from(values: BTreeMap<String, Value>) -> Self {
|
||||
Self { values }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::Deref for Map {
|
||||
type Target = BTreeMap<String, Value>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.values
|
||||
}
|
||||
}
|
||||
|
||||
impl std::ops::DerefMut for Map {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.values
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Map {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let values = self
|
||||
.values
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{}: {}", k, v))
|
||||
.collect::<Vec<String>>()
|
||||
.join(", ");
|
||||
write!(f, "{{{}}}", values)
|
||||
}
|
||||
}
|
||||
@@ -1,140 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Timestamp {
|
||||
Nanosecond(i64),
|
||||
Microsecond(i64),
|
||||
Millisecond(i64),
|
||||
Second(i64),
|
||||
}
|
||||
|
||||
pub(crate) const NANOSECOND_RESOLUTION: &str = "nanosecond";
|
||||
pub(crate) const NANO_RESOLUTION: &str = "nano";
|
||||
pub(crate) const NS_RESOLUTION: &str = "ns";
|
||||
pub(crate) const MICROSECOND_RESOLUTION: &str = "microsecond";
|
||||
pub(crate) const MICRO_RESOLUTION: &str = "micro";
|
||||
pub(crate) const US_RESOLUTION: &str = "us";
|
||||
pub(crate) const MILLISECOND_RESOLUTION: &str = "millisecond";
|
||||
pub(crate) const MILLI_RESOLUTION: &str = "milli";
|
||||
pub(crate) const MS_RESOLUTION: &str = "ms";
|
||||
pub(crate) const SECOND_RESOLUTION: &str = "second";
|
||||
pub(crate) const SEC_RESOLUTION: &str = "sec";
|
||||
pub(crate) const S_RESOLUTION: &str = "s";
|
||||
|
||||
pub(crate) const VALID_RESOLUTIONS: [&str; 12] = [
|
||||
NANOSECOND_RESOLUTION,
|
||||
NANO_RESOLUTION,
|
||||
NS_RESOLUTION,
|
||||
MICROSECOND_RESOLUTION,
|
||||
MICRO_RESOLUTION,
|
||||
US_RESOLUTION,
|
||||
MILLISECOND_RESOLUTION,
|
||||
MILLI_RESOLUTION,
|
||||
MS_RESOLUTION,
|
||||
SECOND_RESOLUTION,
|
||||
SEC_RESOLUTION,
|
||||
S_RESOLUTION,
|
||||
];
|
||||
|
||||
impl Timestamp {
|
||||
pub(crate) fn timestamp_nanos(&self) -> i64 {
|
||||
match self {
|
||||
Timestamp::Nanosecond(v) => *v,
|
||||
Timestamp::Microsecond(v) => *v * 1_000,
|
||||
Timestamp::Millisecond(v) => *v * 1_000_000,
|
||||
Timestamp::Second(v) => *v * 1_000_000_000,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn timestamp_micros(&self) -> i64 {
|
||||
match self {
|
||||
Timestamp::Nanosecond(v) => *v / 1_000,
|
||||
Timestamp::Microsecond(v) => *v,
|
||||
Timestamp::Millisecond(v) => *v * 1_000,
|
||||
Timestamp::Second(v) => *v * 1_000_000,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn timestamp_millis(&self) -> i64 {
|
||||
match self {
|
||||
Timestamp::Nanosecond(v) => *v / 1_000_000,
|
||||
Timestamp::Microsecond(v) => *v / 1_000,
|
||||
Timestamp::Millisecond(v) => *v,
|
||||
Timestamp::Second(v) => *v * 1_000,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn timestamp(&self) -> i64 {
|
||||
match self {
|
||||
Timestamp::Nanosecond(v) => *v / 1_000_000_000,
|
||||
Timestamp::Microsecond(v) => *v / 1_000_000,
|
||||
Timestamp::Millisecond(v) => *v / 1_000,
|
||||
Timestamp::Second(v) => *v,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn to_unit(&self, unit: &TimeUnit) -> i64 {
|
||||
match unit {
|
||||
TimeUnit::Second => self.timestamp(),
|
||||
TimeUnit::Millisecond => self.timestamp_millis(),
|
||||
TimeUnit::Microsecond => self.timestamp_micros(),
|
||||
TimeUnit::Nanosecond => self.timestamp_nanos(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_unit(&self) -> TimeUnit {
|
||||
match self {
|
||||
Timestamp::Nanosecond(_) => TimeUnit::Nanosecond,
|
||||
Timestamp::Microsecond(_) => TimeUnit::Microsecond,
|
||||
Timestamp::Millisecond(_) => TimeUnit::Millisecond,
|
||||
Timestamp::Second(_) => TimeUnit::Second,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_datetime(&self) -> Option<DateTime<Utc>> {
|
||||
match self {
|
||||
Timestamp::Nanosecond(v) => Some(DateTime::from_timestamp_nanos(*v)),
|
||||
Timestamp::Microsecond(v) => DateTime::from_timestamp_micros(*v),
|
||||
Timestamp::Millisecond(v) => DateTime::from_timestamp_millis(*v),
|
||||
Timestamp::Second(v) => DateTime::from_timestamp(*v, 0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_datetime(dt: DateTime<Utc>) -> Option<Self> {
|
||||
dt.timestamp_nanos_opt().map(Timestamp::Nanosecond)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Timestamp {
|
||||
fn default() -> Self {
|
||||
Timestamp::Nanosecond(chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Timestamp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let (value, resolution) = match self {
|
||||
Timestamp::Nanosecond(v) => (v, NANOSECOND_RESOLUTION),
|
||||
Timestamp::Microsecond(v) => (v, MICROSECOND_RESOLUTION),
|
||||
Timestamp::Millisecond(v) => (v, MILLISECOND_RESOLUTION),
|
||||
Timestamp::Second(v) => (v, SECOND_RESOLUTION),
|
||||
};
|
||||
|
||||
write!(f, "{}, resolution: {}", value, resolution)
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#![feature(string_from_utf8_lossy_owned)]
|
||||
|
||||
mod dispatcher;
|
||||
pub mod error;
|
||||
mod etl;
|
||||
@@ -24,10 +26,8 @@ pub use etl::processor::Processor;
|
||||
pub use etl::transform::transformer::greptime::{GreptimePipelineParams, SchemaInfo};
|
||||
pub use etl::transform::transformer::identity_pipeline;
|
||||
pub use etl::transform::GreptimeTransformer;
|
||||
pub use etl::value::{Array, Map, Timestamp, Value};
|
||||
pub use etl::{
|
||||
json_array_to_map, json_to_map, parse, simd_json_array_to_map, simd_json_to_map, Content,
|
||||
DispatchedTo, Pipeline, PipelineExecOutput, TransformedOutput, TransformerMode,
|
||||
parse, Content, DispatchedTo, Pipeline, PipelineExecOutput, TransformedOutput, TransformerMode,
|
||||
};
|
||||
pub use manager::{
|
||||
pipeline_operator, table, util, IdentityTimeIndex, PipelineContext, PipelineDefinition,
|
||||
|
||||
@@ -16,18 +16,22 @@ use std::sync::Arc;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::ColumnDataType;
|
||||
use chrono::{DateTime, Utc};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use datatypes::timestamp::TimestampNanosecond;
|
||||
use itertools::Itertools;
|
||||
use session::context::Channel;
|
||||
use snafu::ensure;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use util::to_pipeline_version;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{CastTypeSnafu, InvalidCustomTimeIndexSnafu, PipelineMissingSnafu, Result};
|
||||
use crate::etl::value::time::{MS_RESOLUTION, NS_RESOLUTION, S_RESOLUTION, US_RESOLUTION};
|
||||
use crate::error::{
|
||||
CastTypeSnafu, InvalidCustomTimeIndexSnafu, InvalidTimestampSnafu, PipelineMissingSnafu, Result,
|
||||
};
|
||||
use crate::etl::value::{MS_RESOLUTION, NS_RESOLUTION, S_RESOLUTION, US_RESOLUTION};
|
||||
use crate::table::PipelineTable;
|
||||
use crate::{GreptimePipelineParams, Pipeline, Value};
|
||||
use crate::{GreptimePipelineParams, Pipeline};
|
||||
|
||||
mod pipeline_cache;
|
||||
pub mod pipeline_operator;
|
||||
@@ -232,7 +236,7 @@ impl IdentityTimeIndex {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_column_name(&self) -> &String {
|
||||
pub fn get_column_name(&self) -> &str {
|
||||
match self {
|
||||
IdentityTimeIndex::Epoch(field, _, _) => field,
|
||||
IdentityTimeIndex::DateStr(field, _, _) => field,
|
||||
@@ -258,25 +262,25 @@ impl IdentityTimeIndex {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_timestamp(&self, value: Option<&Value>) -> Result<ValueData> {
|
||||
pub fn get_timestamp_value(&self, value: Option<&VrlValue>) -> Result<ValueData> {
|
||||
match self {
|
||||
IdentityTimeIndex::Epoch(_, unit, ignore_errors) => {
|
||||
let v = match value {
|
||||
Some(Value::Int32(v)) => *v as i64,
|
||||
Some(Value::Int64(v)) => *v,
|
||||
Some(Value::Uint32(v)) => *v as i64,
|
||||
Some(Value::Uint64(v)) => *v as i64,
|
||||
Some(Value::String(s)) => match s.parse::<i64>() {
|
||||
Some(VrlValue::Integer(v)) => *v,
|
||||
Some(VrlValue::Bytes(s)) => match String::from_utf8_lossy(s).parse::<i64>() {
|
||||
Ok(v) => v,
|
||||
Err(_) => {
|
||||
return if_ignore_errors(
|
||||
*ignore_errors,
|
||||
*unit,
|
||||
format!("failed to convert {} to number", s),
|
||||
format!(
|
||||
"failed to convert {} to number",
|
||||
String::from_utf8_lossy(s)
|
||||
),
|
||||
)
|
||||
}
|
||||
},
|
||||
Some(Value::Timestamp(timestamp)) => timestamp.to_unit(unit),
|
||||
Some(VrlValue::Timestamp(timestamp)) => datetime_utc_to_unit(timestamp, unit)?,
|
||||
Some(v) => {
|
||||
return if_ignore_errors(
|
||||
*ignore_errors,
|
||||
@@ -292,7 +296,7 @@ impl IdentityTimeIndex {
|
||||
}
|
||||
IdentityTimeIndex::DateStr(_, format, ignore_errors) => {
|
||||
let v = match value {
|
||||
Some(Value::String(s)) => s,
|
||||
Some(VrlValue::Bytes(s)) => String::from_utf8_lossy(s),
|
||||
Some(v) => {
|
||||
return if_ignore_errors(
|
||||
*ignore_errors,
|
||||
@@ -309,7 +313,7 @@ impl IdentityTimeIndex {
|
||||
}
|
||||
};
|
||||
|
||||
let timestamp = match chrono::DateTime::parse_from_str(v, format) {
|
||||
let timestamp = match chrono::DateTime::parse_from_str(&v, format) {
|
||||
Ok(ts) => ts,
|
||||
Err(_) => {
|
||||
return if_ignore_errors(
|
||||
@@ -321,13 +325,31 @@ impl IdentityTimeIndex {
|
||||
};
|
||||
|
||||
Ok(ValueData::TimestampNanosecondValue(
|
||||
timestamp.timestamp_nanos_opt().unwrap_or_default(),
|
||||
timestamp
|
||||
.timestamp_nanos_opt()
|
||||
.context(InvalidTimestampSnafu {
|
||||
input: timestamp.to_rfc3339(),
|
||||
})?,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn datetime_utc_to_unit(timestamp: &DateTime<Utc>, unit: &TimeUnit) -> Result<i64> {
|
||||
let ts = match unit {
|
||||
TimeUnit::Nanosecond => timestamp
|
||||
.timestamp_nanos_opt()
|
||||
.context(InvalidTimestampSnafu {
|
||||
input: timestamp.to_rfc3339(),
|
||||
})?,
|
||||
TimeUnit::Microsecond => timestamp.timestamp_micros(),
|
||||
TimeUnit::Millisecond => timestamp.timestamp_millis(),
|
||||
TimeUnit::Second => timestamp.timestamp(),
|
||||
};
|
||||
Ok(ts)
|
||||
}
|
||||
|
||||
fn if_ignore_errors(ignore_errors: bool, unit: TimeUnit, msg: String) -> Result<ValueData> {
|
||||
if ignore_errors {
|
||||
Ok(time_unit_to_value_data(
|
||||
|
||||
@@ -15,12 +15,12 @@
|
||||
use dyn_fmt::AsStrFormatExt;
|
||||
use regex::Regex;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use vrl::value::Value as VrlValue;
|
||||
use yaml_rust::Yaml;
|
||||
|
||||
use crate::error::{
|
||||
Error, InvalidTableSuffixTemplateSnafu, RequiredTableSuffixTemplateSnafu, Result,
|
||||
};
|
||||
use crate::Value;
|
||||
|
||||
const REPLACE_KEY: &str = "{}";
|
||||
|
||||
@@ -47,22 +47,16 @@ pub(crate) struct TableSuffixTemplate {
|
||||
}
|
||||
|
||||
impl TableSuffixTemplate {
|
||||
pub fn apply(&self, val: &Value) -> Option<String> {
|
||||
pub fn apply(&self, val: &VrlValue) -> Option<String> {
|
||||
let val = val.as_object()?;
|
||||
let values = self
|
||||
.keys
|
||||
.iter()
|
||||
.filter_map(|key| {
|
||||
let v = val.get(key)?;
|
||||
let v = val.get(key.as_str())?;
|
||||
match v {
|
||||
Value::Int8(v) => Some(v.to_string()),
|
||||
Value::Int16(v) => Some(v.to_string()),
|
||||
Value::Int32(v) => Some(v.to_string()),
|
||||
Value::Int64(v) => Some(v.to_string()),
|
||||
Value::Uint8(v) => Some(v.to_string()),
|
||||
Value::Uint16(v) => Some(v.to_string()),
|
||||
Value::Uint32(v) => Some(v.to_string()),
|
||||
Value::Uint64(v) => Some(v.to_string()),
|
||||
Value::String(v) => Some(v.clone()),
|
||||
VrlValue::Integer(v) => Some(v.to_string()),
|
||||
VrlValue::Bytes(v) => Some(String::from_utf8_lossy_owned(v.to_vec())),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
|
||||
@@ -13,11 +13,12 @@
|
||||
// limitations under the License.
|
||||
|
||||
use greptime_proto::v1::{ColumnDataType, ColumnSchema, Rows, SemanticType};
|
||||
use pipeline::{json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext};
|
||||
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext};
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
/// test util function to parse and execute pipeline
|
||||
pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
|
||||
let input_value = serde_json::from_str::<serde_json::Value>(input_str).unwrap();
|
||||
let input_value = serde_json::from_str::<VrlValue>(input_str).unwrap();
|
||||
|
||||
let yaml_content = Content::Yaml(pipeline_yaml);
|
||||
let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline");
|
||||
@@ -32,21 +33,19 @@ pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
|
||||
let mut rows = Vec::new();
|
||||
|
||||
match input_value {
|
||||
serde_json::Value::Array(array) => {
|
||||
VrlValue::Array(array) => {
|
||||
for value in array {
|
||||
let intermediate_status = json_to_map(value).unwrap();
|
||||
let row = pipeline
|
||||
.exec_mut(intermediate_status, &pipeline_ctx, &mut schema_info)
|
||||
.exec_mut(value, &pipeline_ctx, &mut schema_info)
|
||||
.expect("failed to exec pipeline")
|
||||
.into_transformed()
|
||||
.expect("expect transformed result ");
|
||||
rows.push(row.0);
|
||||
}
|
||||
}
|
||||
serde_json::Value::Object(_) => {
|
||||
let intermediate_status = json_to_map(input_value).unwrap();
|
||||
VrlValue::Object(_) => {
|
||||
let row = pipeline
|
||||
.exec_mut(intermediate_status, &pipeline_ctx, &mut schema_info)
|
||||
.exec_mut(input_value, &pipeline_ctx, &mut schema_info)
|
||||
.expect("failed to exec pipeline")
|
||||
.into_transformed()
|
||||
.expect("expect transformed result ");
|
||||
|
||||
@@ -16,7 +16,7 @@ mod common;
|
||||
|
||||
use greptime_proto::v1::value::ValueData::StringValue;
|
||||
use greptime_proto::v1::{ColumnDataType, SemanticType};
|
||||
use pipeline::{json_to_map, setup_pipeline, PipelineContext};
|
||||
use pipeline::{setup_pipeline, PipelineContext};
|
||||
|
||||
fn make_string_column_schema(name: String) -> greptime_proto::v1::ColumnSchema {
|
||||
common::make_column_schema(name, ColumnDataType::String, SemanticType::Field)
|
||||
@@ -282,7 +282,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let result = json_to_map(input_value).unwrap();
|
||||
let result = input_value.into();
|
||||
|
||||
let row = pipeline.exec_mut(result, &pipeline_ctx, &mut schema_info);
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ use greptime_proto::v1::value::ValueData::{
|
||||
U32Value, U64Value, U8Value,
|
||||
};
|
||||
use greptime_proto::v1::Value as GreptimeValue;
|
||||
use pipeline::{json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext};
|
||||
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext};
|
||||
|
||||
#[test]
|
||||
fn test_complex_data() {
|
||||
@@ -425,7 +425,7 @@ transform:
|
||||
&pipeline_param,
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
let stats = json_to_map(input_value).unwrap();
|
||||
let stats = input_value.into();
|
||||
|
||||
let row = pipeline
|
||||
.exec_mut(stats, &pipeline_ctx, &mut schema_info)
|
||||
@@ -500,7 +500,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -615,7 +615,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -687,7 +687,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -733,7 +733,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -798,7 +798,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -845,7 +845,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -913,7 +913,7 @@ transform:
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value1).unwrap();
|
||||
let status = input_value1.into();
|
||||
let dispatched_to = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -922,7 +922,7 @@ transform:
|
||||
assert_eq!(dispatched_to.table_suffix, "http");
|
||||
assert_eq!(dispatched_to.pipeline.unwrap(), "access_log_pipeline");
|
||||
|
||||
let status = json_to_map(input_value2).unwrap();
|
||||
let status = input_value2.into();
|
||||
let row = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap()
|
||||
@@ -983,7 +983,7 @@ table_suffix: _${logger}
|
||||
session::context::Channel::Unknown,
|
||||
);
|
||||
|
||||
let status = json_to_map(input_value).unwrap();
|
||||
let status = input_value.into();
|
||||
let exec_re = pipeline
|
||||
.exec_mut(status, &pipeline_ctx, &mut schema_info)
|
||||
.unwrap();
|
||||
|
||||
@@ -128,6 +128,7 @@ tower-http = { version = "0.6", features = ["full"] }
|
||||
tracing.workspace = true
|
||||
urlencoding = "2.1"
|
||||
uuid.workspace = true
|
||||
vrl.workspace = true
|
||||
zstd.workspace = true
|
||||
|
||||
[target.'cfg(not(windows))'.dependencies]
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
@@ -30,9 +31,10 @@ use pipeline::{
|
||||
use serde_json::{json, Deserializer, Value};
|
||||
use session::context::{Channel, QueryContext};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{
|
||||
status_code_to_http_status, InvalidElasticsearchInputSnafu, ParseJsonSnafu, PipelineSnafu,
|
||||
status_code_to_http_status, InvalidElasticsearchInputSnafu, ParseJsonSnafu,
|
||||
Result as ServersResult,
|
||||
};
|
||||
use crate::http::event::{
|
||||
@@ -287,8 +289,8 @@ fn parse_bulk_request(
|
||||
msg_field: &Option<String>,
|
||||
) -> ServersResult<Vec<PipelineIngestRequest>> {
|
||||
// Read the ndjson payload and convert it to `Vec<Value>`. Return error if the input is not a valid JSON.
|
||||
let values: Vec<Value> = Deserializer::from_str(input)
|
||||
.into_iter::<Value>()
|
||||
let values: Vec<VrlValue> = Deserializer::from_str(input)
|
||||
.into_iter::<VrlValue>()
|
||||
.collect::<Result<_, _>>()
|
||||
.context(ParseJsonSnafu)?;
|
||||
|
||||
@@ -307,12 +309,13 @@ fn parse_bulk_request(
|
||||
// For Elasticsearch post `_bulk` API, each chunk contains two objects:
|
||||
// 1. The first object is the command, it should be `create` or `index`.
|
||||
// 2. The second object is the document data.
|
||||
while let Some(mut cmd) = values.next() {
|
||||
while let Some(cmd) = values.next() {
|
||||
// NOTE: Although the native Elasticsearch API supports upsert in `index` command, we don't support change any data in `index` command and it's same as `create` command.
|
||||
let index = if let Some(cmd) = cmd.get_mut("create") {
|
||||
get_index_from_cmd(cmd.take())?
|
||||
} else if let Some(cmd) = cmd.get_mut("index") {
|
||||
get_index_from_cmd(cmd.take())?
|
||||
let mut cmd = cmd.into_object();
|
||||
let index = if let Some(cmd) = cmd.as_mut().and_then(|c| c.remove("create")) {
|
||||
get_index_from_cmd(cmd)?
|
||||
} else if let Some(cmd) = cmd.as_mut().and_then(|c| c.remove("index")) {
|
||||
get_index_from_cmd(cmd)?
|
||||
} else {
|
||||
return InvalidElasticsearchInputSnafu {
|
||||
reason: format!(
|
||||
@@ -339,7 +342,6 @@ fn parse_bulk_request(
|
||||
}
|
||||
);
|
||||
|
||||
let log_value = pipeline::json_to_map(log_value).context(PipelineSnafu)?;
|
||||
requests.push(PipelineIngestRequest {
|
||||
table: index.unwrap_or_else(|| index_from_url.as_ref().unwrap().clone()),
|
||||
values: vec![log_value],
|
||||
@@ -357,39 +359,50 @@ fn parse_bulk_request(
|
||||
}
|
||||
|
||||
// Get the index from the command. We will take index as the table name in GreptimeDB.
|
||||
fn get_index_from_cmd(mut v: Value) -> ServersResult<Option<String>> {
|
||||
if let Some(index) = v.get_mut("_index") {
|
||||
if let Value::String(index) = index.take() {
|
||||
Ok(Some(index))
|
||||
} else {
|
||||
// If the `_index` exists, it should be a string.
|
||||
InvalidElasticsearchInputSnafu {
|
||||
reason: "index is not a string in bulk request".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
fn get_index_from_cmd(v: VrlValue) -> ServersResult<Option<String>> {
|
||||
let Some(index) = v.into_object().and_then(|mut m| m.remove("_index")) else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
if let VrlValue::Bytes(index) = index {
|
||||
Ok(Some(String::from_utf8_lossy(&index).to_string()))
|
||||
} else {
|
||||
Ok(None)
|
||||
// If the `_index` exists, it should be a string.
|
||||
InvalidElasticsearchInputSnafu {
|
||||
reason: "index is not a string in bulk request",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
// If the msg_field is provided, fetch the value of the field from the document data.
|
||||
// For example, if the `msg_field` is `message`, and the document data is `{"message":"hello"}`, the log value will be Value::String("hello").
|
||||
fn get_log_value_from_msg_field(mut v: Value, msg_field: &str) -> Value {
|
||||
if let Some(message) = v.get_mut(msg_field) {
|
||||
let message = message.take();
|
||||
fn get_log_value_from_msg_field(v: VrlValue, msg_field: &str) -> VrlValue {
|
||||
let VrlValue::Object(mut m) = v else {
|
||||
return v;
|
||||
};
|
||||
|
||||
if let Some(message) = m.remove(msg_field) {
|
||||
match message {
|
||||
Value::String(s) => match serde_json::from_str::<Value>(&s) {
|
||||
Ok(s) => s,
|
||||
// If the message is not a valid JSON, return a map with the original message key and value.
|
||||
Err(_) => json!({msg_field: s}),
|
||||
},
|
||||
VrlValue::Bytes(bytes) => {
|
||||
match serde_json::from_slice::<VrlValue>(&bytes) {
|
||||
Ok(v) => v,
|
||||
// If the message is not a valid JSON, return a map with the original message key and value.
|
||||
Err(_) => {
|
||||
let map = BTreeMap::from([(
|
||||
msg_field.to_string().into(),
|
||||
VrlValue::Bytes(bytes),
|
||||
)]);
|
||||
VrlValue::Object(map)
|
||||
}
|
||||
}
|
||||
}
|
||||
// If the message is not a string, just use the original message as the log value.
|
||||
_ => message,
|
||||
}
|
||||
} else {
|
||||
// If the msg_field is not found, just use the original message as the log value.
|
||||
v
|
||||
VrlValue::Object(m)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -414,12 +427,14 @@ mod tests {
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![
|
||||
pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap(),
|
||||
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
|
||||
],
|
||||
},
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
),
|
||||
@@ -436,11 +451,15 @@ mod tests {
|
||||
Ok(vec![
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
|
||||
],
|
||||
},
|
||||
PipelineIngestRequest {
|
||||
table: "logs".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
),
|
||||
@@ -457,11 +476,15 @@ mod tests {
|
||||
Ok(vec![
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
|
||||
],
|
||||
},
|
||||
PipelineIngestRequest {
|
||||
table: "logs".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
),
|
||||
@@ -477,7 +500,9 @@ mod tests {
|
||||
Ok(vec![
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
),
|
||||
@@ -494,11 +519,15 @@ mod tests {
|
||||
Ok(vec![
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
|
||||
],
|
||||
},
|
||||
PipelineIngestRequest {
|
||||
table: "test".to_string(),
|
||||
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
|
||||
values: vec![
|
||||
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
),
|
||||
@@ -516,13 +545,13 @@ mod tests {
|
||||
PipelineIngestRequest {
|
||||
table: "logs-generic-default".to_string(),
|
||||
values: vec![
|
||||
pipeline::json_to_map(json!({"message": "172.16.0.1 - - [25/May/2024:20:19:37 +0000] \"GET /contact HTTP/1.1\" 404 162 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1\""})).unwrap(),
|
||||
json!({"message": "172.16.0.1 - - [25/May/2024:20:19:37 +0000] \"GET /contact HTTP/1.1\" 404 162 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1\""}).into(),
|
||||
],
|
||||
},
|
||||
PipelineIngestRequest {
|
||||
table: "logs-generic-default".to_string(),
|
||||
values: vec![
|
||||
pipeline::json_to_map(json!({"message": "10.0.0.1 - - [25/May/2024:20:18:37 +0000] \"GET /images/logo.png HTTP/1.1\" 304 0 \"-\" \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0\""})).unwrap(),
|
||||
json!({"message": "10.0.0.1 - - [25/May/2024:20:18:37 +0000] \"GET /images/logo.png HTTP/1.1\" 304 0 \"-\" \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0\""}).into(),
|
||||
],
|
||||
},
|
||||
]),
|
||||
|
||||
@@ -35,14 +35,14 @@ use headers::ContentType;
|
||||
use lazy_static::lazy_static;
|
||||
use mime_guess::mime;
|
||||
use pipeline::util::to_pipeline_version;
|
||||
use pipeline::{
|
||||
ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition, Value as PipelineValue,
|
||||
};
|
||||
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Deserializer, Map, Value as JsonValue};
|
||||
use session::context::{Channel, QueryContext, QueryContextRef};
|
||||
use simd_json::Buffers;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use strum::{EnumIter, IntoEnumIterator};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
status_code_to_http_status, Error, InvalidParameterSnafu, ParseJsonSnafu, PipelineSnafu, Result,
|
||||
@@ -117,7 +117,7 @@ pub(crate) struct PipelineIngestRequest {
|
||||
/// The table where the log data will be written to.
|
||||
pub table: String,
|
||||
/// The log data to be ingested.
|
||||
pub values: Vec<PipelineValue>,
|
||||
pub values: Vec<VrlValue>,
|
||||
}
|
||||
|
||||
pub struct PipelineContent(String);
|
||||
@@ -295,18 +295,18 @@ pub async fn delete_pipeline(
|
||||
/// Transform NDJSON array into a single array
|
||||
/// always return an array
|
||||
fn transform_ndjson_array_factory(
|
||||
values: impl IntoIterator<Item = Result<JsonValue, serde_json::Error>>,
|
||||
values: impl IntoIterator<Item = Result<VrlValue, serde_json::Error>>,
|
||||
ignore_error: bool,
|
||||
) -> Result<Vec<JsonValue>> {
|
||||
) -> Result<Vec<VrlValue>> {
|
||||
values
|
||||
.into_iter()
|
||||
.try_fold(Vec::with_capacity(100), |mut acc_array, item| match item {
|
||||
Ok(item_value) => {
|
||||
match item_value {
|
||||
JsonValue::Array(item_array) => {
|
||||
VrlValue::Array(item_array) => {
|
||||
acc_array.extend(item_array);
|
||||
}
|
||||
JsonValue::Object(_) => {
|
||||
VrlValue::Object(_) => {
|
||||
acc_array.push(item_value);
|
||||
}
|
||||
_ => {
|
||||
@@ -331,7 +331,7 @@ fn transform_ndjson_array_factory(
|
||||
|
||||
/// Dryrun pipeline with given data
|
||||
async fn dryrun_pipeline_inner(
|
||||
value: Vec<PipelineValue>,
|
||||
value: Vec<VrlValue>,
|
||||
pipeline: Arc<pipeline::Pipeline>,
|
||||
pipeline_handler: PipelineHandlerRef,
|
||||
query_ctx: &QueryContextRef,
|
||||
@@ -494,7 +494,7 @@ fn add_step_info_for_pipeline_dryrun_error(step_msg: &str, e: Error) -> Response
|
||||
/// Parse the data with given content type
|
||||
/// If the content type is invalid, return error
|
||||
/// content type is one of application/json, text/plain, application/x-ndjson
|
||||
fn parse_dryrun_data(data_type: String, data: String) -> Result<Vec<PipelineValue>> {
|
||||
fn parse_dryrun_data(data_type: String, data: String) -> Result<Vec<VrlValue>> {
|
||||
if let Ok(content_type) = ContentType::from_str(&data_type) {
|
||||
extract_pipeline_value_by_content_type(content_type, Bytes::from(data), false)
|
||||
} else {
|
||||
@@ -741,17 +741,15 @@ impl<'a> TryFrom<&'a ContentType> for EventPayloadResolver<'a> {
|
||||
}
|
||||
|
||||
impl EventPayloadResolver<'_> {
|
||||
fn parse_payload(&self, payload: Bytes, ignore_errors: bool) -> Result<Vec<PipelineValue>> {
|
||||
fn parse_payload(&self, payload: Bytes, ignore_errors: bool) -> Result<Vec<VrlValue>> {
|
||||
match self.inner {
|
||||
EventPayloadResolverInner::Json => {
|
||||
pipeline::json_array_to_map(transform_ndjson_array_factory(
|
||||
Deserializer::from_slice(&payload).into_iter(),
|
||||
ignore_errors,
|
||||
)?)
|
||||
.context(PipelineSnafu)
|
||||
}
|
||||
EventPayloadResolverInner::Json => transform_ndjson_array_factory(
|
||||
Deserializer::from_slice(&payload).into_iter(),
|
||||
ignore_errors,
|
||||
),
|
||||
EventPayloadResolverInner::Ndjson => {
|
||||
let mut result = Vec::with_capacity(1000);
|
||||
let mut buffer = Buffers::new(1000);
|
||||
for (index, line) in payload.lines().enumerate() {
|
||||
let mut line = match line {
|
||||
Ok(line) if !line.is_empty() => line,
|
||||
@@ -768,8 +766,10 @@ impl EventPayloadResolver<'_> {
|
||||
|
||||
// simd_json, according to description, only de-escapes string at character level,
|
||||
// like any other json parser. So it should be safe here.
|
||||
if let Ok(v) = simd_json::to_owned_value(unsafe { line.as_bytes_mut() }) {
|
||||
let v = pipeline::simd_json_to_map(v).context(PipelineSnafu)?;
|
||||
if let Ok(v) = simd_json::serde::from_slice_with_buffers(
|
||||
unsafe { line.as_bytes_mut() },
|
||||
&mut buffer,
|
||||
) {
|
||||
result.push(v);
|
||||
} else if !ignore_errors {
|
||||
warn!("invalid JSON at index: {}, content: {:?}", index, line);
|
||||
@@ -787,8 +787,11 @@ impl EventPayloadResolver<'_> {
|
||||
.filter_map(|line| line.ok().filter(|line| !line.is_empty()))
|
||||
.map(|line| {
|
||||
let mut map = BTreeMap::new();
|
||||
map.insert("message".to_string(), PipelineValue::String(line));
|
||||
PipelineValue::Map(map.into())
|
||||
map.insert(
|
||||
KeyString::from("message"),
|
||||
VrlValue::Bytes(Bytes::from(line)),
|
||||
);
|
||||
VrlValue::Object(map)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(result)
|
||||
@@ -801,7 +804,7 @@ fn extract_pipeline_value_by_content_type(
|
||||
content_type: ContentType,
|
||||
payload: Bytes,
|
||||
ignore_errors: bool,
|
||||
) -> Result<Vec<PipelineValue>> {
|
||||
) -> Result<Vec<VrlValue>> {
|
||||
EventPayloadResolver::try_from(&content_type).and_then(|resolver| {
|
||||
resolver
|
||||
.parse_payload(payload, ignore_errors)
|
||||
@@ -899,36 +902,37 @@ pub struct LogState {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_transform_ndjson() {
|
||||
let s = "{\"a\": 1}\n{\"b\": 2}";
|
||||
let a = JsonValue::Array(
|
||||
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
let a = serde_json::to_string(
|
||||
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
)
|
||||
.to_string();
|
||||
.unwrap();
|
||||
assert_eq!(a, "[{\"a\":1},{\"b\":2}]");
|
||||
|
||||
let s = "{\"a\": 1}";
|
||||
let a = JsonValue::Array(
|
||||
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
let a = serde_json::to_string(
|
||||
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
)
|
||||
.to_string();
|
||||
.unwrap();
|
||||
assert_eq!(a, "[{\"a\":1}]");
|
||||
|
||||
let s = "[{\"a\": 1}]";
|
||||
let a = JsonValue::Array(
|
||||
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
let a = serde_json::to_string(
|
||||
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
)
|
||||
.to_string();
|
||||
.unwrap();
|
||||
assert_eq!(a, "[{\"a\":1}]");
|
||||
|
||||
let s = "[{\"a\": 1}, {\"b\": 2}]";
|
||||
let a = JsonValue::Array(
|
||||
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
let a = serde_json::to_string(
|
||||
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
|
||||
)
|
||||
.to_string();
|
||||
.unwrap();
|
||||
assert_eq!(a, "[{\"a\":1},{\"b\":2}]");
|
||||
}
|
||||
|
||||
@@ -945,21 +949,18 @@ mod tests {
|
||||
let fail_rest =
|
||||
extract_pipeline_value_by_content_type(ContentType::json(), payload.clone(), true);
|
||||
assert!(fail_rest.is_ok());
|
||||
assert_eq!(
|
||||
fail_rest.unwrap(),
|
||||
pipeline::json_array_to_map(vec![json!({"a": 1})]).unwrap()
|
||||
);
|
||||
assert_eq!(fail_rest.unwrap(), vec![json!({"a": 1}).into()]);
|
||||
|
||||
let fail_only_wrong =
|
||||
extract_pipeline_value_by_content_type(NDJSON_CONTENT_TYPE.clone(), payload, true);
|
||||
assert!(fail_only_wrong.is_ok());
|
||||
|
||||
let mut map1 = BTreeMap::new();
|
||||
map1.insert("a".to_string(), PipelineValue::Uint64(1));
|
||||
let map1 = PipelineValue::Map(map1.into());
|
||||
map1.insert(KeyString::from("a"), VrlValue::Integer(1));
|
||||
let map1 = VrlValue::Object(map1);
|
||||
let mut map2 = BTreeMap::new();
|
||||
map2.insert("c".to_string(), PipelineValue::Uint64(1));
|
||||
let map2 = PipelineValue::Map(map2.into());
|
||||
map2.insert(KeyString::from("c"), VrlValue::Integer(1));
|
||||
let map2 = VrlValue::Object(map2);
|
||||
assert_eq!(fail_only_wrong.unwrap(), vec![map1, map2]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ use axum::extract::State;
|
||||
use axum::Extension;
|
||||
use axum_extra::TypedHeader;
|
||||
use bytes::Bytes;
|
||||
use chrono::DateTime;
|
||||
use common_query::prelude::GREPTIME_TIMESTAMP;
|
||||
use common_query::{Output, OutputData};
|
||||
use common_telemetry::{error, warn};
|
||||
@@ -39,6 +40,7 @@ use prost::Message;
|
||||
use quoted_string::test_utils::TestSpec;
|
||||
use session::context::{Channel, QueryContext};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
DecodeOtlpRequestSnafu, InvalidLokiLabelsSnafu, InvalidLokiPayloadSnafu, ParseJsonSnafu,
|
||||
@@ -197,7 +199,7 @@ pub async fn loki_ingest(
|
||||
}
|
||||
|
||||
/// This is the holder of the loki lines parsed from json or protobuf.
|
||||
/// The generic here is either [serde_json::Value] or [Vec<LabelPairAdapter>].
|
||||
/// The generic here is either [VrlValue] or [Vec<LabelPairAdapter>].
|
||||
/// Depending on the target destination, this can be converted to [LokiRawItem] or [LokiPipeline].
|
||||
pub struct LokiMiddleItem<T> {
|
||||
pub ts: i64,
|
||||
@@ -218,7 +220,7 @@ pub struct LokiRawItem {
|
||||
|
||||
/// This is the line item prepared for the pipeline engine.
|
||||
pub struct LokiPipeline {
|
||||
pub map: pipeline::Value,
|
||||
pub map: VrlValue,
|
||||
}
|
||||
|
||||
/// This is the flow of the Loki ingestion.
|
||||
@@ -255,7 +257,7 @@ pub struct LokiPipeline {
|
||||
/// +------------------+ +---------------------+
|
||||
fn extract_item<T>(content_type: ContentType, bytes: Bytes) -> Result<Box<dyn Iterator<Item = T>>>
|
||||
where
|
||||
LokiMiddleItem<serde_json::Value>: Into<T>,
|
||||
LokiMiddleItem<VrlValue>: Into<T>,
|
||||
LokiMiddleItem<Vec<LabelPairAdapter>>: Into<T>,
|
||||
{
|
||||
match content_type {
|
||||
@@ -270,15 +272,14 @@ where
|
||||
}
|
||||
|
||||
struct LokiJsonParser {
|
||||
pub streams: VecDeque<serde_json::Value>,
|
||||
pub streams: VecDeque<VrlValue>,
|
||||
}
|
||||
|
||||
impl LokiJsonParser {
|
||||
pub fn from_bytes(bytes: Bytes) -> Result<Self> {
|
||||
let payload: serde_json::Value =
|
||||
serde_json::from_slice(bytes.as_ref()).context(ParseJsonSnafu)?;
|
||||
let payload: VrlValue = serde_json::from_slice(bytes.as_ref()).context(ParseJsonSnafu)?;
|
||||
|
||||
let serde_json::Value::Object(mut map) = payload else {
|
||||
let VrlValue::Object(mut map) = payload else {
|
||||
return InvalidLokiPayloadSnafu {
|
||||
msg: "payload is not an object",
|
||||
}
|
||||
@@ -289,7 +290,7 @@ impl LokiJsonParser {
|
||||
msg: "missing streams",
|
||||
})?;
|
||||
|
||||
let serde_json::Value::Array(streams) = streams else {
|
||||
let VrlValue::Array(streams) = streams else {
|
||||
return InvalidLokiPayloadSnafu {
|
||||
msg: "streams is not an array",
|
||||
}
|
||||
@@ -308,7 +309,7 @@ impl Iterator for LokiJsonParser {
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while let Some(stream) = self.streams.pop_front() {
|
||||
// get lines from the map
|
||||
let serde_json::Value::Object(mut map) = stream else {
|
||||
let VrlValue::Object(mut map) = stream else {
|
||||
warn!("stream is not an object, {:?}", stream);
|
||||
continue;
|
||||
};
|
||||
@@ -316,7 +317,7 @@ impl Iterator for LokiJsonParser {
|
||||
warn!("missing lines on stream, {:?}", map);
|
||||
continue;
|
||||
};
|
||||
let serde_json::Value::Array(lines) = lines else {
|
||||
let VrlValue::Array(lines) = lines else {
|
||||
warn!("lines is not an array, {:?}", lines);
|
||||
continue;
|
||||
};
|
||||
@@ -325,13 +326,15 @@ impl Iterator for LokiJsonParser {
|
||||
let labels = map
|
||||
.remove(LABEL_KEY)
|
||||
.and_then(|m| match m {
|
||||
serde_json::Value::Object(labels) => Some(labels),
|
||||
VrlValue::Object(labels) => Some(labels),
|
||||
_ => None,
|
||||
})
|
||||
.map(|m| {
|
||||
m.into_iter()
|
||||
.filter_map(|(k, v)| match v {
|
||||
serde_json::Value::String(v) => Some((k, v)),
|
||||
VrlValue::Bytes(v) => {
|
||||
Some((k.into(), String::from_utf8_lossy(&v).to_string()))
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect::<BTreeMap<String, String>>()
|
||||
@@ -347,16 +350,16 @@ impl Iterator for LokiJsonParser {
|
||||
}
|
||||
|
||||
struct JsonStreamItem {
|
||||
pub lines: VecDeque<serde_json::Value>,
|
||||
pub lines: VecDeque<VrlValue>,
|
||||
pub labels: Option<BTreeMap<String, String>>,
|
||||
}
|
||||
|
||||
impl Iterator for JsonStreamItem {
|
||||
type Item = LokiMiddleItem<serde_json::Value>;
|
||||
type Item = LokiMiddleItem<VrlValue>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while let Some(line) = self.lines.pop_front() {
|
||||
let serde_json::Value::Array(line) = line else {
|
||||
let VrlValue::Array(line) = line else {
|
||||
warn!("line is not an array, {:?}", line);
|
||||
continue;
|
||||
};
|
||||
@@ -364,11 +367,11 @@ impl Iterator for JsonStreamItem {
|
||||
warn!("line is too short, {:?}", line);
|
||||
continue;
|
||||
}
|
||||
let mut line: VecDeque<serde_json::Value> = line.into();
|
||||
let mut line: VecDeque<VrlValue> = line.into();
|
||||
|
||||
// get ts
|
||||
let ts = line.pop_front().and_then(|ts| match ts {
|
||||
serde_json::Value::String(ts) => ts.parse::<i64>().ok(),
|
||||
VrlValue::Bytes(ts) => String::from_utf8_lossy(&ts).parse::<i64>().ok(),
|
||||
_ => {
|
||||
warn!("missing or invalid timestamp, {:?}", ts);
|
||||
None
|
||||
@@ -379,7 +382,7 @@ impl Iterator for JsonStreamItem {
|
||||
};
|
||||
|
||||
let line_text = line.pop_front().and_then(|l| match l {
|
||||
serde_json::Value::String(l) => Some(l),
|
||||
VrlValue::Bytes(l) => Some(String::from_utf8_lossy(&l).to_string()),
|
||||
_ => {
|
||||
warn!("missing or invalid line, {:?}", l);
|
||||
None
|
||||
@@ -402,8 +405,8 @@ impl Iterator for JsonStreamItem {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LokiMiddleItem<serde_json::Value>> for LokiRawItem {
|
||||
fn from(val: LokiMiddleItem<serde_json::Value>) -> Self {
|
||||
impl From<LokiMiddleItem<VrlValue>> for LokiRawItem {
|
||||
fn from(val: LokiMiddleItem<VrlValue>) -> Self {
|
||||
let LokiMiddleItem {
|
||||
ts,
|
||||
line,
|
||||
@@ -413,13 +416,16 @@ impl From<LokiMiddleItem<serde_json::Value>> for LokiRawItem {
|
||||
|
||||
let structured_metadata = structured_metadata
|
||||
.and_then(|m| match m {
|
||||
serde_json::Value::Object(m) => Some(m),
|
||||
VrlValue::Object(m) => Some(m),
|
||||
_ => None,
|
||||
})
|
||||
.map(|m| {
|
||||
m.into_iter()
|
||||
.filter_map(|(k, v)| match v {
|
||||
serde_json::Value::String(v) => Some((k, Value::String(v.into()))),
|
||||
VrlValue::Bytes(bytes) => Some((
|
||||
k.into(),
|
||||
Value::String(String::from_utf8_lossy(&bytes).to_string().into()),
|
||||
)),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<BTreeMap<String, Value>>()
|
||||
@@ -436,8 +442,8 @@ impl From<LokiMiddleItem<serde_json::Value>> for LokiRawItem {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LokiMiddleItem<serde_json::Value>> for LokiPipeline {
|
||||
fn from(value: LokiMiddleItem<serde_json::Value>) -> Self {
|
||||
impl From<LokiMiddleItem<VrlValue>> for LokiPipeline {
|
||||
fn from(value: LokiMiddleItem<VrlValue>) -> Self {
|
||||
let LokiMiddleItem {
|
||||
ts,
|
||||
line,
|
||||
@@ -447,37 +453,33 @@ impl From<LokiMiddleItem<serde_json::Value>> for LokiPipeline {
|
||||
|
||||
let mut map = BTreeMap::new();
|
||||
map.insert(
|
||||
GREPTIME_TIMESTAMP.to_string(),
|
||||
pipeline::Value::Timestamp(pipeline::Timestamp::Nanosecond(ts)),
|
||||
KeyString::from(GREPTIME_TIMESTAMP),
|
||||
VrlValue::Timestamp(DateTime::from_timestamp_nanos(ts)),
|
||||
);
|
||||
map.insert(
|
||||
LOKI_LINE_COLUMN_NAME.to_string(),
|
||||
pipeline::Value::String(line),
|
||||
KeyString::from(LOKI_LINE_COLUMN_NAME),
|
||||
VrlValue::Bytes(line.into()),
|
||||
);
|
||||
|
||||
if let Some(serde_json::Value::Object(m)) = structured_metadata {
|
||||
if let Some(VrlValue::Object(m)) = structured_metadata {
|
||||
for (k, v) in m {
|
||||
match pipeline::Value::try_from(v) {
|
||||
Ok(v) => {
|
||||
map.insert(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, k), v);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("not a valid value, {:?}", e);
|
||||
}
|
||||
}
|
||||
map.insert(
|
||||
KeyString::from(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, k)),
|
||||
v,
|
||||
);
|
||||
}
|
||||
}
|
||||
if let Some(v) = labels {
|
||||
v.into_iter().for_each(|(k, v)| {
|
||||
map.insert(
|
||||
format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k),
|
||||
pipeline::Value::String(v),
|
||||
KeyString::from(format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k)),
|
||||
VrlValue::Bytes(v.into()),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
LokiPipeline {
|
||||
map: pipeline::Value::Map(pipeline::Map::from(map)),
|
||||
map: VrlValue::Object(map),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -584,12 +586,12 @@ impl From<LokiMiddleItem<Vec<LabelPairAdapter>>> for LokiPipeline {
|
||||
|
||||
let mut map = BTreeMap::new();
|
||||
map.insert(
|
||||
GREPTIME_TIMESTAMP.to_string(),
|
||||
pipeline::Value::Timestamp(pipeline::Timestamp::Nanosecond(ts)),
|
||||
KeyString::from(GREPTIME_TIMESTAMP),
|
||||
VrlValue::Timestamp(DateTime::from_timestamp_nanos(ts)),
|
||||
);
|
||||
map.insert(
|
||||
LOKI_LINE_COLUMN_NAME.to_string(),
|
||||
pipeline::Value::String(line),
|
||||
KeyString::from(LOKI_LINE_COLUMN_NAME),
|
||||
VrlValue::Bytes(line.into()),
|
||||
);
|
||||
|
||||
structured_metadata
|
||||
@@ -597,22 +599,22 @@ impl From<LokiMiddleItem<Vec<LabelPairAdapter>>> for LokiPipeline {
|
||||
.into_iter()
|
||||
.for_each(|d| {
|
||||
map.insert(
|
||||
format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, d.name),
|
||||
pipeline::Value::String(d.value),
|
||||
KeyString::from(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, d.name)),
|
||||
VrlValue::Bytes(d.value.into()),
|
||||
);
|
||||
});
|
||||
|
||||
if let Some(v) = labels {
|
||||
v.into_iter().for_each(|(k, v)| {
|
||||
map.insert(
|
||||
format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k),
|
||||
pipeline::Value::String(v),
|
||||
KeyString::from(format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k)),
|
||||
VrlValue::Bytes(v.into()),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
LokiPipeline {
|
||||
map: pipeline::Value::Map(pipeline::Map::from(map)),
|
||||
map: VrlValue::Object(map),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,10 +23,10 @@ use common_error::ext::ErrorExt;
|
||||
use common_query::Output;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use log_query::LogQuery;
|
||||
use pipeline::Value;
|
||||
use query::parser::PromQuery;
|
||||
use session::context::QueryContextRef;
|
||||
use sql::statements::statement::Statement;
|
||||
use vrl::value::Value;
|
||||
|
||||
/// SqlQueryInterceptor can track life cycle of a sql query and customize or
|
||||
/// abort its execution at given point.
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap as StdHashMap;
|
||||
use std::collections::{BTreeMap, HashMap as StdHashMap};
|
||||
|
||||
use api::v1::column_data_type_extension::TypeExt;
|
||||
use api::v1::value::ValueData;
|
||||
@@ -20,6 +20,7 @@ use api::v1::{
|
||||
ColumnDataType, ColumnDataTypeExtension, ColumnOptions, ColumnSchema, JsonTypeExtension, Row,
|
||||
RowInsertRequest, Rows, SemanticType, Value as GreptimeValue,
|
||||
};
|
||||
use bytes::Bytes;
|
||||
use jsonb::{Number as JsonbNumber, Value as JsonbValue};
|
||||
use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
|
||||
use opentelemetry_proto::tonic::common::v1::{any_value, AnyValue, InstrumentationScope, KeyValue};
|
||||
@@ -27,13 +28,13 @@ use opentelemetry_proto::tonic::logs::v1::{LogRecord, ResourceLogs, ScopeLogs};
|
||||
use pipeline::{
|
||||
ContextReq, GreptimePipelineParams, PipelineContext, PipelineWay, SchemaInfo, SelectInfo,
|
||||
};
|
||||
use serde_json::{Map, Value};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use snafu::ensure;
|
||||
use vrl::prelude::NotNan;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::{
|
||||
IncompatibleSchemaSnafu, NotSupportedSnafu, PipelineSnafu, Result,
|
||||
UnsupportedJsonDataTypeForTagSnafu,
|
||||
IncompatibleSchemaSnafu, NotSupportedSnafu, Result, UnsupportedJsonDataTypeForTagSnafu,
|
||||
};
|
||||
use crate::http::event::PipelineIngestRequest;
|
||||
use crate::otlp::trace::attributes::OtlpAnyValue;
|
||||
@@ -69,8 +70,7 @@ pub async fn to_grpc_insert_requests(
|
||||
Ok(ContextReq::default_opt_with_reqs(vec![insert_request]))
|
||||
}
|
||||
PipelineWay::Pipeline(pipeline_def) => {
|
||||
let data = parse_export_logs_service_request(request);
|
||||
let array = pipeline::json_array_to_map(data).context(PipelineSnafu)?;
|
||||
let array = parse_export_logs_service_request(request);
|
||||
|
||||
let pipeline_ctx =
|
||||
PipelineContext::new(&pipeline_def, &pipeline_params, query_ctx.channel());
|
||||
@@ -93,16 +93,16 @@ pub async fn to_grpc_insert_requests(
|
||||
}
|
||||
}
|
||||
|
||||
fn scope_to_pipeline_value(scope: Option<InstrumentationScope>) -> (Value, Value, Value) {
|
||||
fn scope_to_pipeline_value(scope: Option<InstrumentationScope>) -> (VrlValue, VrlValue, VrlValue) {
|
||||
scope
|
||||
.map(|x| {
|
||||
(
|
||||
Value::Object(key_value_to_map(x.attributes)),
|
||||
Value::String(x.version),
|
||||
Value::String(x.name),
|
||||
VrlValue::Object(key_value_to_map(x.attributes)),
|
||||
VrlValue::Bytes(x.version.into()),
|
||||
VrlValue::Bytes(x.name.into()),
|
||||
)
|
||||
})
|
||||
.unwrap_or((Value::Null, Value::Null, Value::Null))
|
||||
.unwrap_or((VrlValue::Null, VrlValue::Null, VrlValue::Null))
|
||||
}
|
||||
|
||||
fn scope_to_jsonb(
|
||||
@@ -121,53 +121,59 @@ fn scope_to_jsonb(
|
||||
|
||||
fn log_to_pipeline_value(
|
||||
log: LogRecord,
|
||||
resource_schema_url: Value,
|
||||
resource_attr: Value,
|
||||
scope_schema_url: Value,
|
||||
scope_name: Value,
|
||||
scope_version: Value,
|
||||
scope_attrs: Value,
|
||||
) -> Value {
|
||||
let log_attrs = Value::Object(key_value_to_map(log.attributes));
|
||||
let mut map = Map::new();
|
||||
map.insert("Timestamp".to_string(), Value::from(log.time_unix_nano));
|
||||
resource_schema_url: VrlValue,
|
||||
resource_attr: VrlValue,
|
||||
scope_schema_url: VrlValue,
|
||||
scope_name: VrlValue,
|
||||
scope_version: VrlValue,
|
||||
scope_attrs: VrlValue,
|
||||
) -> VrlValue {
|
||||
let log_attrs = VrlValue::Object(key_value_to_map(log.attributes));
|
||||
let mut map = BTreeMap::new();
|
||||
map.insert(
|
||||
"ObservedTimestamp".to_string(),
|
||||
Value::from(log.observed_time_unix_nano),
|
||||
"Timestamp".into(),
|
||||
VrlValue::Integer(log.time_unix_nano as i64),
|
||||
);
|
||||
map.insert(
|
||||
"ObservedTimestamp".into(),
|
||||
VrlValue::Integer(log.observed_time_unix_nano as i64),
|
||||
);
|
||||
|
||||
// need to be convert to string
|
||||
map.insert(
|
||||
"TraceId".to_string(),
|
||||
Value::String(bytes_to_hex_string(&log.trace_id)),
|
||||
"TraceId".into(),
|
||||
VrlValue::Bytes(bytes_to_hex_string(&log.trace_id).into()),
|
||||
);
|
||||
map.insert(
|
||||
"SpanId".to_string(),
|
||||
Value::String(bytes_to_hex_string(&log.span_id)),
|
||||
"SpanId".into(),
|
||||
VrlValue::Bytes(bytes_to_hex_string(&log.span_id).into()),
|
||||
);
|
||||
map.insert("TraceFlags".to_string(), Value::from(log.flags));
|
||||
map.insert("SeverityText".to_string(), Value::String(log.severity_text));
|
||||
map.insert("TraceFlags".into(), VrlValue::Integer(log.flags as i64));
|
||||
map.insert(
|
||||
"SeverityNumber".to_string(),
|
||||
Value::from(log.severity_number),
|
||||
"SeverityText".into(),
|
||||
VrlValue::Bytes(log.severity_text.into()),
|
||||
);
|
||||
map.insert(
|
||||
"SeverityNumber".into(),
|
||||
VrlValue::Integer(log.severity_number as i64),
|
||||
);
|
||||
// need to be convert to string
|
||||
map.insert(
|
||||
"Body".to_string(),
|
||||
"Body".into(),
|
||||
log.body
|
||||
.as_ref()
|
||||
.map(|x| Value::String(log_body_to_string(x)))
|
||||
.unwrap_or(Value::Null),
|
||||
.map(|x| VrlValue::Bytes(log_body_to_string(x).into()))
|
||||
.unwrap_or(VrlValue::Null),
|
||||
);
|
||||
map.insert("ResourceSchemaUrl".to_string(), resource_schema_url);
|
||||
map.insert("ResourceSchemaUrl".into(), resource_schema_url);
|
||||
|
||||
map.insert("ResourceAttributes".to_string(), resource_attr);
|
||||
map.insert("ScopeSchemaUrl".to_string(), scope_schema_url);
|
||||
map.insert("ScopeName".to_string(), scope_name);
|
||||
map.insert("ScopeVersion".to_string(), scope_version);
|
||||
map.insert("ScopeAttributes".to_string(), scope_attrs);
|
||||
map.insert("LogAttributes".to_string(), log_attrs);
|
||||
Value::Object(map)
|
||||
map.insert("ResourceAttributes".into(), resource_attr);
|
||||
map.insert("ScopeSchemaUrl".into(), scope_schema_url);
|
||||
map.insert("ScopeName".into(), scope_name);
|
||||
map.insert("ScopeVersion".into(), scope_version);
|
||||
map.insert("ScopeAttributes".into(), scope_attrs);
|
||||
map.insert("LogAttributes".into(), log_attrs);
|
||||
VrlValue::Object(map)
|
||||
}
|
||||
|
||||
fn build_otlp_logs_identity_schema() -> Vec<ColumnSchema> {
|
||||
@@ -622,18 +628,18 @@ fn merge_values(
|
||||
|
||||
/// transform otlp logs request to pipeline value
|
||||
/// https://opentelemetry.io/docs/concepts/signals/logs/
|
||||
fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<Value> {
|
||||
fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<VrlValue> {
|
||||
let mut result = Vec::new();
|
||||
for r in request.resource_logs {
|
||||
let resource_attr = r
|
||||
.resource
|
||||
.map(|x| Value::Object(key_value_to_map(x.attributes)))
|
||||
.unwrap_or(Value::Null);
|
||||
let resource_schema_url = Value::String(r.schema_url);
|
||||
.map(|x| VrlValue::Object(key_value_to_map(x.attributes)))
|
||||
.unwrap_or(VrlValue::Null);
|
||||
let resource_schema_url = VrlValue::Bytes(r.schema_url.into());
|
||||
for scope_logs in r.scope_logs {
|
||||
let (scope_attrs, scope_version, scope_name) =
|
||||
scope_to_pipeline_value(scope_logs.scope);
|
||||
let scope_schema_url = Value::String(scope_logs.schema_url);
|
||||
let scope_schema_url = VrlValue::Bytes(scope_logs.schema_url.into());
|
||||
for log in scope_logs.log_records {
|
||||
let value = log_to_pipeline_value(
|
||||
log,
|
||||
@@ -652,43 +658,39 @@ fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<V
|
||||
}
|
||||
|
||||
// convert AnyValue to pipeline value
|
||||
fn any_value_to_pipeline_value(value: any_value::Value) -> Value {
|
||||
fn any_value_to_vrl_value(value: any_value::Value) -> VrlValue {
|
||||
match value {
|
||||
any_value::Value::StringValue(s) => Value::String(s),
|
||||
any_value::Value::IntValue(i) => Value::from(i),
|
||||
any_value::Value::DoubleValue(d) => Value::from(d),
|
||||
any_value::Value::BoolValue(b) => Value::Bool(b),
|
||||
any_value::Value::ArrayValue(a) => {
|
||||
let values = a
|
||||
any_value::Value::StringValue(s) => VrlValue::Bytes(s.into()),
|
||||
any_value::Value::IntValue(i) => VrlValue::Integer(i),
|
||||
any_value::Value::DoubleValue(d) => VrlValue::Float(NotNan::new(d).unwrap()),
|
||||
any_value::Value::BoolValue(b) => VrlValue::Boolean(b),
|
||||
any_value::Value::ArrayValue(array_value) => {
|
||||
let values = array_value
|
||||
.values
|
||||
.into_iter()
|
||||
.map(|v| match v.value {
|
||||
Some(value) => any_value_to_pipeline_value(value),
|
||||
None => Value::Null,
|
||||
})
|
||||
.filter_map(|v| v.value.map(any_value_to_vrl_value))
|
||||
.collect();
|
||||
Value::Array(values)
|
||||
VrlValue::Array(values)
|
||||
}
|
||||
any_value::Value::KvlistValue(kv) => {
|
||||
let value = key_value_to_map(kv.values);
|
||||
Value::Object(value)
|
||||
any_value::Value::KvlistValue(key_value_list) => {
|
||||
VrlValue::Object(key_value_to_map(key_value_list.values))
|
||||
}
|
||||
any_value::Value::BytesValue(b) => Value::String(bytes_to_hex_string(&b)),
|
||||
any_value::Value::BytesValue(items) => VrlValue::Bytes(Bytes::from(items)),
|
||||
}
|
||||
}
|
||||
|
||||
// convert otlp keyValue vec to map
|
||||
fn key_value_to_map(key_values: Vec<KeyValue>) -> Map<String, Value> {
|
||||
let mut map = Map::new();
|
||||
fn key_value_to_map(key_values: Vec<KeyValue>) -> BTreeMap<KeyString, VrlValue> {
|
||||
let mut map = BTreeMap::new();
|
||||
for kv in key_values {
|
||||
let value = match kv.value {
|
||||
Some(value) => match value.value {
|
||||
Some(value) => any_value_to_pipeline_value(value),
|
||||
None => Value::Null,
|
||||
Some(value) => any_value_to_vrl_value(value),
|
||||
None => VrlValue::Null,
|
||||
},
|
||||
None => Value::Null,
|
||||
None => VrlValue::Null,
|
||||
};
|
||||
map.insert(kv.key.clone(), value);
|
||||
map.insert(kv.key.into(), value);
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
@@ -20,12 +20,13 @@ use api::greptime_proto;
|
||||
use api::v1::{ColumnDataType, ColumnSchema, RowInsertRequest, Rows, SemanticType};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use pipeline::{
|
||||
unwrap_or_continue_if_err, ContextReq, DispatchedTo, Pipeline, PipelineContext,
|
||||
PipelineDefinition, PipelineExecOutput, SchemaInfo, TransformedOutput, TransformerMode, Value,
|
||||
GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
|
||||
identity_pipeline, unwrap_or_continue_if_err, ContextReq, DispatchedTo, Pipeline,
|
||||
PipelineContext, PipelineDefinition, PipelineExecOutput, SchemaInfo, TransformedOutput,
|
||||
TransformerMode, GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
|
||||
};
|
||||
use session::context::{Channel, QueryContextRef};
|
||||
use snafu::ResultExt;
|
||||
use vrl::value::Value as VrlValue;
|
||||
|
||||
use crate::error::{CatalogSnafu, PipelineSnafu, Result};
|
||||
use crate::http::event::PipelineIngestRequest;
|
||||
@@ -93,7 +94,7 @@ async fn run_identity_pipeline(
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
};
|
||||
pipeline::identity_pipeline(data_array, table, pipeline_ctx)
|
||||
identity_pipeline(data_array, table, pipeline_ctx)
|
||||
.map(|opt_map| ContextReq::from_opt_map(opt_map, table_name))
|
||||
.context(PipelineSnafu)
|
||||
}
|
||||
@@ -117,7 +118,7 @@ async fn run_custom_pipeline(
|
||||
} = pipeline_req;
|
||||
let arr_len = pipeline_maps.len();
|
||||
let mut transformed_map = HashMap::new();
|
||||
let mut dispatched: BTreeMap<DispatchedTo, Vec<Value>> = BTreeMap::new();
|
||||
let mut dispatched: BTreeMap<DispatchedTo, Vec<VrlValue>> = BTreeMap::new();
|
||||
|
||||
let mut schema_info = match pipeline.transformer() {
|
||||
TransformerMode::GreptimeTransformer(greptime_transformer) => {
|
||||
|
||||
@@ -20,12 +20,15 @@ use std::slice;
|
||||
use api::prom_store::remote::Sample;
|
||||
use bytes::{Buf, Bytes};
|
||||
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition, Value};
|
||||
use common_telemetry::warn;
|
||||
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition};
|
||||
use prost::encoding::message::merge;
|
||||
use prost::encoding::{decode_key, decode_varint, WireType};
|
||||
use prost::DecodeError;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::OptionExt;
|
||||
use vrl::prelude::NotNan;
|
||||
use vrl::value::{KeyString, Value as VrlValue};
|
||||
|
||||
use crate::error::InternalSnafu;
|
||||
use crate::http::event::PipelineIngestRequest;
|
||||
@@ -342,7 +345,7 @@ impl PromWriteRequest {
|
||||
/// let's keep it that way for now.
|
||||
pub struct PromSeriesProcessor {
|
||||
pub(crate) use_pipeline: bool,
|
||||
pub(crate) table_values: BTreeMap<String, Vec<Value>>,
|
||||
pub(crate) table_values: BTreeMap<String, Vec<VrlValue>>,
|
||||
|
||||
// optional fields for pipeline
|
||||
pub(crate) pipeline_handler: Option<PipelineHandlerRef>,
|
||||
@@ -379,29 +382,33 @@ impl PromSeriesProcessor {
|
||||
series: &mut PromTimeSeries,
|
||||
prom_validation_mode: PromValidationMode,
|
||||
) -> Result<(), DecodeError> {
|
||||
let mut vec_pipeline_map: Vec<Value> = Vec::new();
|
||||
let mut vec_pipeline_map = Vec::new();
|
||||
let mut pipeline_map = BTreeMap::new();
|
||||
for l in series.labels.iter() {
|
||||
let name = prom_validation_mode.decode_string(&l.name)?;
|
||||
let value = prom_validation_mode.decode_string(&l.value)?;
|
||||
pipeline_map.insert(name, Value::String(value));
|
||||
pipeline_map.insert(KeyString::from(name), VrlValue::Bytes(value.into()));
|
||||
}
|
||||
|
||||
let one_sample = series.samples.len() == 1;
|
||||
|
||||
for s in series.samples.iter() {
|
||||
// skip NaN value
|
||||
if s.value.is_nan() {
|
||||
let Ok(value) = NotNan::new(s.value) else {
|
||||
warn!("Invalid float value: {}", s.value);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let timestamp = s.timestamp;
|
||||
pipeline_map.insert(GREPTIME_TIMESTAMP.to_string(), Value::Int64(timestamp));
|
||||
pipeline_map.insert(GREPTIME_VALUE.to_string(), Value::Float64(s.value));
|
||||
pipeline_map.insert(
|
||||
KeyString::from(GREPTIME_TIMESTAMP),
|
||||
VrlValue::Integer(timestamp),
|
||||
);
|
||||
pipeline_map.insert(KeyString::from(GREPTIME_VALUE), VrlValue::Float(value));
|
||||
if one_sample {
|
||||
vec_pipeline_map.push(Value::Map(pipeline_map.into()));
|
||||
vec_pipeline_map.push(VrlValue::Object(pipeline_map));
|
||||
break;
|
||||
} else {
|
||||
vec_pipeline_map.push(Value::Map(pipeline_map.clone().into()));
|
||||
vec_pipeline_map.push(VrlValue::Object(pipeline_map.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user