refactor: replace pipeline::value with vrl::value (#6430)

* chore: pass compile

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

* fix: default case

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

* fix: test

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

* chore: remove and move code

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

* chore: remove serde_value to vrlvalue conversion

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

* refactor: optimized vrl value related code

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

* refactor: loki transform using vrl

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

* fix: remove unused error

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

* chore: fix cr issue

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

* chore: use from_utf8_lossy_owned

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

* chore: CR issue

Signed-off-by: shuiyisong <xixing.sys@gmail.com>

---------

Signed-off-by: shuiyisong <xixing.sys@gmail.com>
This commit is contained in:
shuiyisong
2025-07-11 01:08:31 +08:00
committed by GitHub
parent 351a77a2e5
commit 1594859957
48 changed files with 1660 additions and 2665 deletions

48
Cargo.lock generated
View File

@@ -2996,9 +2996,9 @@ dependencies = [
[[package]]
name = "crc"
version = "3.2.1"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
dependencies = [
"crc-catalog",
]
@@ -3830,7 +3830,7 @@ dependencies = [
"jsonb",
"num",
"num-traits",
"ordered-float 3.9.2",
"ordered-float 4.3.0",
"paste",
"serde",
"serde_json",
@@ -4151,12 +4151,16 @@ dependencies = [
[[package]]
name = "domain"
version = "0.10.4"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c84070523f8ba0f9127ff156920f27eb27b302b425efe60bf5f41ec244d1c60"
checksum = "a11dd7f04a6a6d2aea0153c6e31f5ea7af8b2efdf52cdaeea7a9a592c7fefef9"
dependencies = [
"bumpalo",
"bytes",
"domain-macros",
"futures-util",
"hashbrown 0.14.5",
"log",
"moka",
"octseq",
"rand 0.8.5",
@@ -4167,6 +4171,17 @@ dependencies = [
"tracing",
]
[[package]]
name = "domain-macros"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e197fdfd2cdb5fdeb7f8ddcf3aed5d5d04ecde2890d448b14ffb716f7376b70"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.100",
]
[[package]]
name = "dotenv"
version = "0.15.0"
@@ -8566,17 +8581,6 @@ dependencies = [
"num-traits",
]
[[package]]
name = "ordered-float"
version = "3.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc"
dependencies = [
"num-traits",
"rand 0.8.5",
"serde",
]
[[package]]
name = "ordered-float"
version = "4.3.0"
@@ -8584,6 +8588,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d501f1a72f71d3c063a6bbc8f7271fa73aa09fe5d6283b6571e2ed176a2537"
dependencies = [
"num-traits",
"rand 0.8.5",
"serde",
]
[[package]]
@@ -9120,6 +9126,7 @@ dependencies = [
"moka",
"once_cell",
"operator",
"ordered-float 4.3.0",
"paste",
"prometheus",
"query",
@@ -11368,6 +11375,7 @@ dependencies = [
"tracing",
"urlencoding",
"uuid",
"vrl",
"zstd 0.13.2",
]
@@ -13030,9 +13038,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
version = "1.44.2"
version = "1.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
dependencies = [
"backtrace",
"bytes",
@@ -13988,9 +13996,9 @@ dependencies = [
[[package]]
name = "vrl"
version = "0.24.0"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9ceadaa40aef567a26079ff014ca7a567ba85344f1b81090b5ec7d7bb16a219"
checksum = "4f49394b948406ea1564aa00152e011d87a38ad35d277ebddda257a9ee39c419"
dependencies = [
"aes",
"aes-siv",

View File

@@ -167,6 +167,7 @@ opentelemetry-proto = { version = "0.27", features = [
"with-serde",
"logs",
] }
ordered-float = { version = "4.3", features = ["serde"] }
parking_lot = "0.12"
parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
paste = "1.0"
@@ -228,6 +229,7 @@ tracing-appender = "0.2"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
typetag = "0.2"
uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
vrl = "0.25"
zstd = "0.13"
# DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES

View File

@@ -28,7 +28,7 @@ greptime-proto.workspace = true
jsonb.workspace = true
num = "0.4"
num-traits = "0.2"
ordered-float = { version = "3.0", features = ["serde"] }
ordered-float.workspace = true
paste.workspace = true
serde.workspace = true
serde_json.workspace = true

View File

@@ -47,6 +47,7 @@ lazy_static.workspace = true
moka = { workspace = true, features = ["sync"] }
once_cell.workspace = true
operator.workspace = true
ordered-float.workspace = true
paste.workspace = true
prometheus.workspace = true
query.workspace = true
@@ -59,7 +60,7 @@ sql.workspace = true
table.workspace = true
tokio.workspace = true
urlencoding = "2.1"
vrl = "0.24"
vrl.workspace = true
yaml-rust = "0.4"
[dev-dependencies]

View File

@@ -16,23 +16,21 @@ use std::sync::Arc;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use pipeline::error::Result;
use pipeline::{
json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext, SchemaInfo,
};
use serde_json::{Deserializer, Value};
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext, SchemaInfo};
use serde_json::Deserializer;
use vrl::value::Value as VrlValue;
fn processor_mut(
pipeline: Arc<Pipeline>,
pipeline_ctx: &PipelineContext<'_>,
schema_info: &mut SchemaInfo,
input_values: Vec<Value>,
input_values: Vec<VrlValue>,
) -> Result<Vec<greptime_proto::v1::Row>> {
let mut result = Vec::with_capacity(input_values.len());
for v in input_values {
let payload = json_to_map(v).unwrap();
let r = pipeline
.exec_mut(payload, pipeline_ctx, schema_info)?
.exec_mut(v, pipeline_ctx, schema_info)?
.into_transformed()
.expect("expect transformed result ");
result.push(r.0);
@@ -237,7 +235,7 @@ transform:
fn criterion_benchmark(c: &mut Criterion) {
let input_value_str = include_str!("./data.log");
let input_value = Deserializer::from_str(input_value_str)
.into_iter::<serde_json::Value>()
.into_iter::<VrlValue>()
.collect::<std::result::Result<Vec<_>, _>>()
.unwrap();
let pipeline = prepare_pipeline();

View File

@@ -14,6 +14,7 @@
use common_telemetry::debug;
use snafu::OptionExt;
use vrl::value::Value as VrlValue;
use yaml_rust::Yaml;
use crate::error::{
@@ -21,7 +22,7 @@ use crate::error::{
ValueRequiredForDispatcherRuleSnafu,
};
use crate::etl::ctx_req::TABLE_SUFFIX_KEY;
use crate::Value;
use crate::etl::value::yaml_to_vrl_value;
const FIELD: &str = "field";
const PIPELINE: &str = "pipeline";
@@ -62,7 +63,7 @@ pub(crate) struct Dispatcher {
/// name
#[derive(Debug, PartialEq)]
pub(crate) struct Rule {
pub value: Value,
pub value: VrlValue,
pub table_suffix: String,
pub pipeline: Option<String>,
}
@@ -90,7 +91,8 @@ impl TryFrom<&Yaml> for Dispatcher {
if rule[VALUE].is_badvalue() {
ValueRequiredForDispatcherRuleSnafu.fail()?;
}
let value = Value::try_from(&rule[VALUE])?;
let value = yaml_to_vrl_value(&rule[VALUE])?;
Ok(Rule {
value,
@@ -109,8 +111,9 @@ impl TryFrom<&Yaml> for Dispatcher {
impl Dispatcher {
/// execute dispatcher and returns matched rule if any
pub(crate) fn exec(&self, data: &Value) -> Option<&Rule> {
if let Some(value) = data.get(&self.field) {
pub(crate) fn exec(&self, data: &VrlValue) -> Option<&Rule> {
let data = data.as_object()?;
if let Some(value) = data.get(self.field.as_str()) {
for rule in &self.rules {
if rule.value == *value {
return Some(rule);

View File

@@ -62,7 +62,7 @@ pub enum Error {
#[snafu(display("Processor {processor}: expect string value, but got {v:?}"))]
ProcessorExpectString {
processor: String,
v: crate::Value,
v: vrl::value::Value,
#[snafu(implicit)]
location: Location,
},
@@ -229,12 +229,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to get timestamp"))]
DateFailedToGetTimestamp {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid Pattern: '{s}'. {detail}"))]
DissectInvalidPattern {
s: String,
@@ -372,13 +366,6 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Url decoding error"))]
UrlEncodingDecode {
#[snafu(source)]
error: std::string::FromUtf8Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid transform on_failure value: {value}"))]
TransformOnFailureInvalidValue {
value: String,
@@ -433,17 +420,6 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Null type not supported"))]
CoerceUnsupportedNullType {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Null type not supported when to coerce '{ty}' type"))]
CoerceUnsupportedNullTypeTo {
ty: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Type: {ty} value not supported for Epoch"))]
CoerceUnsupportedEpochType {
ty: String,
@@ -556,12 +532,6 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Input value must be an object"))]
InputValueMustBeObject {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Column options error"))]
ColumnOptions {
#[snafu(source)]
@@ -575,12 +545,6 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Unsupported number type: {value:?}"))]
UnsupportedNumberType {
value: serde_json::Number,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to parse json"))]
JsonParse {
#[snafu(source)]
@@ -694,14 +658,6 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Float is not a number: {}", input_float))]
FloatNaN {
input_float: f64,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid timestamp value: {}", input))]
InvalidTimestamp {
input: String,
@@ -709,14 +665,13 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to convert bytes to utf8"))]
BytesToUtf8 {
#[snafu(source)]
error: std::string::FromUtf8Error,
#[snafu(display("Invalid epoch value '{}' for resolution '{}'", value, resolution))]
InvalidEpochForResolution {
value: i64,
resolution: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Please don't use regex in Vrl script"))]
VrlRegexValue {
#[snafu(implicit)]
@@ -808,6 +763,21 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Float is NaN"))]
FloatIsNan {
#[snafu(source)]
error: ordered_float::FloatIsNan,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Unsupported type in pipeline: {}", ty))]
UnsupportedTypeInPipeline {
ty: String,
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -858,7 +828,6 @@ impl ErrorExt for Error {
| DateParseTimezone { .. }
| DateParse { .. }
| DateFailedToGetLocalTimezone { .. }
| DateFailedToGetTimestamp { .. }
| DissectInvalidPattern { .. }
| DissectEmptyPattern { .. }
| DissectSplitExceedsInput { .. }
@@ -881,7 +850,6 @@ impl ErrorExt for Error {
| RegexNoValidPattern { .. }
| UrlEncodingInvalidMethod { .. }
| DigestPatternInvalid { .. }
| UrlEncodingDecode { .. }
| TransformOnFailureInvalidValue { .. }
| TransformElementMustBeMap { .. }
| TransformFieldMustBeSet { .. }
@@ -891,8 +859,6 @@ impl ErrorExt for Error {
| TransformTimestampIndexCount { .. }
| AutoTransformOneTimestamp { .. }
| InvalidVersionNumber { .. }
| CoerceUnsupportedNullType { .. }
| CoerceUnsupportedNullTypeTo { .. }
| CoerceUnsupportedEpochType { .. }
| CoerceStringToType { .. }
| CoerceJsonTypeTo { .. }
@@ -908,10 +874,8 @@ impl ErrorExt for Error {
| ValueYamlKeyMustBeString { .. }
| YamlLoad { .. }
| YamlParse { .. }
| InputValueMustBeObject { .. }
| ColumnOptions { .. }
| UnsupportedIndexType { .. }
| UnsupportedNumberType { .. }
| IdentifyPipelineColumnTypeMismatch { .. }
| JsonParse { .. }
| JsonPathParse { .. }
@@ -924,12 +888,14 @@ impl ErrorExt for Error {
| InvalidTableSuffixTemplate { .. }
| CompileVrl { .. }
| ExecuteVrl { .. }
| FloatNaN { .. }
| BytesToUtf8 { .. }
| InvalidTimestamp { .. }
| VrlRegexValue { .. }
| VrlReturnValue { .. }
| PipelineMissing { .. } => StatusCode::InvalidArguments,
FloatIsNan { .. }
| InvalidEpochForResolution { .. }
| UnsupportedTypeInPipeline { .. } => StatusCode::InvalidArguments,
}
}

View File

@@ -19,21 +19,19 @@ pub mod processor;
pub mod transform;
pub mod value;
use std::collections::BTreeMap;
use api::v1::Row;
use common_time::timestamp::TimeUnit;
use itertools::Itertools;
use processor::{Processor, Processors};
use snafu::{ensure, OptionExt, ResultExt};
use transform::Transforms;
use value::Value;
use vrl::core::Value as VrlValue;
use yaml_rust::{Yaml, YamlLoader};
use crate::dispatcher::{Dispatcher, Rule};
use crate::error::{
AutoTransformOneTimestampSnafu, Error, InputValueMustBeObjectSnafu, IntermediateKeyIndexSnafu,
InvalidVersionNumberSnafu, Result, YamlLoadSnafu, YamlParseSnafu,
AutoTransformOneTimestampSnafu, Error, IntermediateKeyIndexSnafu, InvalidVersionNumberSnafu,
Result, YamlLoadSnafu, YamlParseSnafu,
};
use crate::etl::processor::ProcessorKind;
use crate::etl::transform::transformer::greptime::values_to_row;
@@ -228,7 +226,7 @@ impl DispatchedTo {
#[derive(Debug)]
pub enum PipelineExecOutput {
Transformed(TransformedOutput),
DispatchedTo(DispatchedTo, Value),
DispatchedTo(DispatchedTo, VrlValue),
}
#[derive(Debug)]
@@ -261,40 +259,6 @@ impl PipelineExecOutput {
}
}
pub fn json_to_map(val: serde_json::Value) -> Result<Value> {
match val {
serde_json::Value::Object(map) => {
let mut intermediate_state = BTreeMap::new();
for (k, v) in map {
intermediate_state.insert(k, Value::try_from(v)?);
}
Ok(Value::Map(intermediate_state.into()))
}
_ => InputValueMustBeObjectSnafu.fail(),
}
}
pub fn json_array_to_map(val: Vec<serde_json::Value>) -> Result<Vec<Value>> {
val.into_iter().map(json_to_map).collect()
}
pub fn simd_json_to_map(val: simd_json::OwnedValue) -> Result<Value> {
match val {
simd_json::OwnedValue::Object(map) => {
let mut intermediate_state = BTreeMap::new();
for (k, v) in map.into_iter() {
intermediate_state.insert(k, Value::try_from(v)?);
}
Ok(Value::Map(intermediate_state.into()))
}
_ => InputValueMustBeObjectSnafu.fail(),
}
}
pub fn simd_json_array_to_map(val: Vec<simd_json::OwnedValue>) -> Result<Vec<Value>> {
val.into_iter().map(simd_json_to_map).collect()
}
impl Pipeline {
fn is_v1(&self) -> bool {
self.doc_version == PipelineDocVersion::V1
@@ -302,7 +266,7 @@ impl Pipeline {
pub fn exec_mut(
&self,
mut val: Value,
mut val: VrlValue,
pipeline_ctx: &PipelineContext<'_>,
schema_info: &mut SchemaInfo,
) -> Result<PipelineExecOutput> {
@@ -409,11 +373,14 @@ macro_rules! setup_pipeline {
}
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use std::sync::Arc;
use api::v1::Rows;
use greptime_proto::v1::value::ValueData;
use greptime_proto::v1::{self, ColumnDataType, SemanticType};
use vrl::prelude::Bytes;
use vrl::value::KeyString;
use super::*;
@@ -454,7 +421,7 @@ transform:
session::context::Channel::Unknown,
);
let payload = json_to_map(input_value).unwrap();
let payload = input_value.into();
let result = pipeline
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -515,9 +482,10 @@ transform:
&pipeline_param,
session::context::Channel::Unknown,
);
let mut payload = BTreeMap::new();
payload.insert("message".to_string(), Value::String(message));
let payload = Value::Map(payload.into());
let payload = VrlValue::Object(BTreeMap::from([(
KeyString::from("message"),
VrlValue::Bytes(Bytes::from(message)),
)]));
let result = pipeline
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
@@ -613,7 +581,7 @@ transform:
session::context::Channel::Unknown,
);
let payload = json_to_map(input_value).unwrap();
let payload = input_value.into();
let result = pipeline
.exec_mut(payload, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -666,7 +634,7 @@ transform:
session::context::Channel::Unknown,
);
let schema = pipeline.schemas().unwrap().clone();
let result = json_to_map(input_value).unwrap();
let result = input_value.into();
let row = pipeline
.exec_mut(result, &pipeline_ctx, &mut schema_info)
@@ -732,7 +700,7 @@ transform:
assert_eq!(
dispatcher.rules[0],
crate::dispatcher::Rule {
value: Value::String("http".to_string()),
value: VrlValue::Bytes(Bytes::from("http")),
table_suffix: "http_events".to_string(),
pipeline: None
}
@@ -741,7 +709,7 @@ transform:
assert_eq!(
dispatcher.rules[1],
crate::dispatcher::Rule {
value: Value::String("database".to_string()),
value: VrlValue::Bytes(Bytes::from("database")),
table_suffix: "db_events".to_string(),
pipeline: Some("database_pipeline".to_string()),
}

View File

@@ -19,10 +19,10 @@ use ahash::{HashMap, HashMapExt};
use api::v1::{RowInsertRequest, RowInsertRequests, Rows};
use session::context::{QueryContext, QueryContextRef};
use snafu::OptionExt;
use vrl::value::Value as VrlValue;
use crate::error::{Result, ValueMustBeMapSnafu};
use crate::tablesuffix::TableSuffixTemplate;
use crate::Value;
const GREPTIME_AUTO_CREATE_TABLE: &str = "greptime_auto_create_table";
const GREPTIME_TTL: &str = "greptime_ttl";
@@ -86,32 +86,34 @@ impl ContextOpt {
}
impl ContextOpt {
pub fn from_pipeline_map_to_opt(pipeline_map: &mut Value) -> Result<Self> {
let pipeline_map = pipeline_map.as_map_mut().context(ValueMustBeMapSnafu)?;
pub fn from_pipeline_map_to_opt(value: &mut VrlValue) -> Result<Self> {
let map = value.as_object_mut().context(ValueMustBeMapSnafu)?;
let mut opt = Self::default();
for k in PIPELINE_HINT_KEYS {
if let Some(v) = pipeline_map.remove(k) {
if let Some(v) = map.remove(k) {
let v = v.to_string_lossy().to_string();
match k {
GREPTIME_AUTO_CREATE_TABLE => {
opt.auto_create_table = Some(v.to_str_value());
opt.auto_create_table = Some(v);
}
GREPTIME_TTL => {
opt.ttl = Some(v.to_str_value());
opt.ttl = Some(v);
}
GREPTIME_APPEND_MODE => {
opt.append_mode = Some(v.to_str_value());
opt.append_mode = Some(v);
}
GREPTIME_MERGE_MODE => {
opt.merge_mode = Some(v.to_str_value());
opt.merge_mode = Some(v);
}
GREPTIME_PHYSICAL_TABLE => {
opt.physical_table = Some(v.to_str_value());
opt.physical_table = Some(v);
}
GREPTIME_SKIP_WAL => {
opt.skip_wal = Some(v.to_str_value());
opt.skip_wal = Some(v);
}
GREPTIME_TABLE_SUFFIX => {
opt.table_suffix = Some(v.to_str_value());
opt.table_suffix = Some(v);
}
_ => {}
}
@@ -123,7 +125,7 @@ impl ContextOpt {
pub(crate) fn resolve_table_suffix(
&mut self,
table_suffix: Option<&TableSuffixTemplate>,
pipeline_map: &Value,
pipeline_map: &VrlValue,
) -> Option<String> {
self.table_suffix
.take()

View File

@@ -28,7 +28,7 @@ pub mod regex;
pub mod select;
pub mod simple_extract;
pub mod urlencoding;
pub mod vrl;
pub mod vrl_processor;
use std::str::FromStr;
@@ -47,6 +47,7 @@ use letter::LetterProcessor;
use regex::RegexProcessor;
use snafu::{OptionExt, ResultExt};
use urlencoding::UrlEncodingProcessor;
use vrl::value::Value as VrlValue;
use crate::error::{
Error, FailedParseFieldFromStringSnafu, FieldMustBeTypeSnafu, InvalidFieldRenameSnafu,
@@ -57,8 +58,7 @@ use crate::etl::field::{Field, Fields};
use crate::etl::processor::json_parse::JsonParseProcessor;
use crate::etl::processor::select::SelectProcessor;
use crate::etl::processor::simple_extract::SimpleExtractProcessor;
use crate::etl::processor::vrl::VrlProcessor;
use crate::Value;
use crate::etl::processor::vrl_processor::VrlProcessor;
const FIELD_NAME: &str = "field";
const FIELDS_NAME: &str = "fields";
@@ -123,7 +123,7 @@ pub trait Processor: std::fmt::Debug + Send + Sync + 'static {
fn ignore_missing(&self) -> bool;
/// Execute the processor on a vector which be preprocessed by the pipeline
fn exec_mut(&self, val: Value) -> Result<Value>;
fn exec_mut(&self, val: VrlValue) -> Result<VrlValue>;
}
#[derive(Debug)]
@@ -224,7 +224,7 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind> {
json_parse::PROCESSOR_JSON_PARSE => {
ProcessorKind::JsonParse(JsonParseProcessor::try_from(value)?)
}
vrl::PROCESSOR_VRL => ProcessorKind::Vrl(VrlProcessor::try_from(value)?),
vrl_processor::PROCESSOR_VRL => ProcessorKind::Vrl(VrlProcessor::try_from(value)?),
select::PROCESSOR_SELECT => ProcessorKind::Select(SelectProcessor::try_from(value)?),
_ => return UnsupportedProcessorSnafu { processor: str_key }.fail(),
};

View File

@@ -18,20 +18,22 @@
use std::collections::BTreeMap;
use ordered_float::NotNan;
use snafu::{OptionExt, ResultExt};
use urlencoding::decode;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
CmcdMissingKeySnafu, CmcdMissingValueSnafu, Error, FailedToParseFloatKeySnafu,
FailedToParseIntKeySnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
ProcessorMissingFieldSnafu, Result,
FailedToParseIntKeySnafu, FloatIsNanSnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_CMCD: &str = "cmcd";
@@ -76,42 +78,43 @@ const CMCD_KEYS: [&str; 18] = [
];
/// function to resolve CMCD_KEY_BS | CMCD_KEY_SU
fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<Value> {
Ok(Value::Boolean(true))
fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<VrlValue> {
Ok(VrlValue::Boolean(true))
}
/// function to resolve CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP | CMCD_KEY_RTP | CMCD_KEY_TB
fn br_tb(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
fn br_tb(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
let v = v.context(CmcdMissingValueSnafu { k, s })?;
let val: i64 = v
.parse()
.context(FailedToParseIntKeySnafu { key: k, value: v })?;
Ok(Value::Int64(val))
Ok(VrlValue::Integer(val))
}
/// function to resolve CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID | CMCD_KEY_V
fn cid_v(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
fn cid_v(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
let v = v.context(CmcdMissingValueSnafu { k, s })?;
Ok(Value::String(v.to_string()))
Ok(VrlValue::Bytes(Bytes::from(v.to_string())))
}
/// function to resolve CMCD_KEY_NOR
fn nor(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
fn nor(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
let v = v.context(CmcdMissingValueSnafu { k, s })?;
let val = match decode(v) {
Ok(val) => val.to_string(),
Err(_) => v.to_string(),
};
Ok(Value::String(val))
Ok(VrlValue::Bytes(Bytes::from(val)))
}
/// function to resolve CMCD_KEY_PR
fn pr(s: &str, k: &str, v: Option<&str>) -> Result<Value> {
fn pr(s: &str, k: &str, v: Option<&str>) -> Result<VrlValue> {
let v = v.context(CmcdMissingValueSnafu { k, s })?;
let val: f64 = v
.parse()
.context(FailedToParseFloatKeySnafu { key: k, value: v })?;
Ok(Value::Float64(val))
let val = NotNan::new(val).context(FloatIsNanSnafu)?;
Ok(VrlValue::Float(val))
}
/// Common Media Client Data Specification:
@@ -156,11 +159,11 @@ pub struct CmcdProcessor {
}
impl CmcdProcessor {
fn generate_key(prefix: &str, key: &str) -> String {
format!("{}_{}", prefix, key)
fn generate_key(prefix: &str, key: &str) -> KeyString {
KeyString::from(format!("{}_{}", prefix, key))
}
fn parse(&self, name: &str, value: &str) -> Result<BTreeMap<String, Value>> {
fn parse(&self, name: &str, value: &str) -> Result<BTreeMap<KeyString, VrlValue>> {
let mut working_set = BTreeMap::new();
let parts = value.split(',');
@@ -250,16 +253,18 @@ impl Processor for CmcdProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let name = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(name) {
Some(Value::String(s)) => {
let results = self.parse(field.target_or_input_field(), s)?;
val.extend(results.into())?;
Some(VrlValue::Bytes(s)) => {
let s = String::from_utf8_lossy(s);
let results = self.parse(field.target_or_input_field(), &s)?;
val.extend(results);
}
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind().to_string(),
@@ -288,7 +293,6 @@ mod tests {
use super::*;
use crate::etl::field::{Field, Fields};
use crate::etl::value::Value;
#[test]
fn test_cmcd() {
@@ -297,23 +301,23 @@ mod tests {
"sid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
vec![(
"prefix_sid",
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
)],
),
(
"br%3D3200%2Cbs%2Cd%3D4004%2Cmtp%3D25400%2Cot%3Dv%2Crtp%3D15000%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22%2Ctb%3D6000",
vec![
("prefix_bs", Value::Boolean(true)),
("prefix_ot", Value::String("v".into())),
("prefix_rtp", Value::Int64(15000)),
("prefix_br", Value::Int64(3200)),
("prefix_tb", Value::Int64(6000)),
("prefix_d", Value::Int64(4004)),
("prefix_bs", VrlValue::Boolean(true)),
("prefix_ot", VrlValue::Bytes(Bytes::from("v"))),
("prefix_rtp", VrlValue::Integer(15000)),
("prefix_br", VrlValue::Integer(3200)),
("prefix_tb", VrlValue::Integer(6000)),
("prefix_d", VrlValue::Integer(4004)),
(
"prefix_sid",
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
),
("prefix_mtp", Value::Int64(25400)),
("prefix_mtp", VrlValue::Integer(25400)),
],
),
(
@@ -322,16 +326,16 @@ mod tests {
vec![
(
"prefix_sid",
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
),
("prefix_rtp", Value::Int64(15000)),
("prefix_rtp", VrlValue::Integer(15000)),
],
),
(
"bs%2Csu",
vec![
("prefix_su", Value::Boolean(true)),
("prefix_bs", Value::Boolean(true)),
("prefix_su", VrlValue::Boolean(true)),
("prefix_bs", VrlValue::Boolean(true)),
],
),
(
@@ -346,7 +350,7 @@ mod tests {
// "prefix_com.examplemyStringKey",
// Value::String("\"myStringValue\"".into()),
// ),
("prefix_d", Value::Int64(4004)),
("prefix_d", VrlValue::Integer(4004)),
],
),
(
@@ -354,11 +358,11 @@ mod tests {
vec![
(
"prefix_sid",
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
),
(
"prefix_nor",
Value::String("\"../300kbps/segment35.m4v\"".into()),
VrlValue::Bytes(Bytes::from("\"../300kbps/segment35.m4v\"")),
),
],
@@ -366,56 +370,56 @@ mod tests {
(
"nrr%3D%2212323-48763%22%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
vec![
("prefix_nrr", Value::String("\"12323-48763\"".into())),
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
(
"prefix_sid",
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
),
],
),
(
"nor%3D%22..%252F300kbps%252Ftrack.m4v%22%2Cnrr%3D%2212323-48763%22%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
vec![
("prefix_nrr", Value::String("\"12323-48763\"".into())),
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
(
"prefix_sid",
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
),
(
"prefix_nor",
Value::String("\"../300kbps/track.m4v\"".into()),
VrlValue::Bytes(Bytes::from("\"../300kbps/track.m4v\"")),
),
],
),
(
"bl%3D21300%2Cbr%3D3200%2Cbs%2Ccid%3D%22faec5fc2-ac30-11eabb37-0242ac130002%22%2Cd%3D4004%2Cdl%3D18500%2Cmtp%3D48100%2Cnor%3D%22..%252F300kbps%252Ftrack.m4v%22%2Cnrr%3D%2212323-48763%22%2Cot%3Dv%2Cpr%3D1.08%2Crtp%3D12000%2Csf%3Dd%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22%2Cst%3Dv%2Csu%2Ctb%3D6000",
vec![
("prefix_bl", Value::Int64(21300)),
("prefix_bs", Value::Boolean(true)),
("prefix_st", Value::String("v".into())),
("prefix_ot", Value::String("v".into())),
("prefix_bl", VrlValue::Integer(21300)),
("prefix_bs", VrlValue::Boolean(true)),
("prefix_st", VrlValue::Bytes(Bytes::from("v"))),
("prefix_ot", VrlValue::Bytes(Bytes::from("v"))),
(
"prefix_sid",
Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
VrlValue::Bytes(Bytes::from("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"")),
),
("prefix_tb", Value::Int64(6000)),
("prefix_d", Value::Int64(4004)),
("prefix_tb", VrlValue::Integer(6000)),
("prefix_d", VrlValue::Integer(4004)),
(
"prefix_cid",
Value::String("\"faec5fc2-ac30-11eabb37-0242ac130002\"".into()),
VrlValue::Bytes(Bytes::from("\"faec5fc2-ac30-11eabb37-0242ac130002\"")),
),
("prefix_mtp", Value::Int64(48100)),
("prefix_rtp", Value::Int64(12000)),
("prefix_mtp", VrlValue::Integer(48100)),
("prefix_rtp", VrlValue::Integer(12000)),
(
"prefix_nor",
Value::String("\"../300kbps/track.m4v\"".into()),
VrlValue::Bytes(Bytes::from("\"../300kbps/track.m4v\"")),
),
("prefix_sf", Value::String("d".into())),
("prefix_br", Value::Int64(3200)),
("prefix_nrr", Value::String("\"12323-48763\"".into())),
("prefix_pr", Value::Float64(1.08)),
("prefix_su", Value::Boolean(true)),
("prefix_dl", Value::Int64(18500)),
("prefix_sf", VrlValue::Bytes(Bytes::from("d"))),
("prefix_br", VrlValue::Integer(3200)),
("prefix_nrr", VrlValue::Bytes(Bytes::from("\"12323-48763\""))),
("prefix_pr", VrlValue::Float(NotNan::new(1.08).unwrap())),
("prefix_su", VrlValue::Boolean(true)),
("prefix_dl", VrlValue::Integer(18500)),
],
),
];
@@ -432,8 +436,8 @@ mod tests {
let expected = vec
.into_iter()
.map(|(k, v)| (k.to_string(), v))
.collect::<BTreeMap<String, Value>>();
.map(|(k, v)| (KeyString::from(k.to_string()), v))
.collect::<BTreeMap<KeyString, VrlValue>>();
let actual = processor.parse("prefix", &decoded).unwrap();
assert_eq!(actual, expected);

View File

@@ -20,17 +20,19 @@ use csv::{ReaderBuilder, Trim};
use itertools::EitherOrBoth::{Both, Left, Right};
use itertools::Itertools;
use snafu::{OptionExt, ResultExt};
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
CsvNoRecordSnafu, CsvQuoteNameSnafu, CsvReadSnafu, CsvSeparatorNameSnafu, Error,
KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_CSV: &str = "csv";
@@ -60,8 +62,8 @@ pub struct CsvProcessor {
impl CsvProcessor {
// process the csv format string to a map with target_fields as keys
fn process(&self, val: &str) -> Result<BTreeMap<String, Value>> {
let mut reader = self.reader.from_reader(val.as_bytes());
fn process(&self, val: &[u8]) -> Result<BTreeMap<KeyString, VrlValue>> {
let mut reader = self.reader.from_reader(val);
if let Some(result) = reader.records().next() {
let record: csv::StringRecord = result.context(CsvReadSnafu)?;
@@ -71,17 +73,18 @@ impl CsvProcessor {
.iter()
.zip_longest(record.iter())
.filter_map(|zipped| match zipped {
Both(target_field, val) => {
Some((target_field.clone(), Value::String(val.into())))
}
Both(target_field, val) => Some((
KeyString::from(target_field.clone()),
VrlValue::Bytes(Bytes::from(val.to_string())),
)),
// if target fields are more than extracted fields, fill the rest with empty value
Left(target_field) => {
let value = self
.empty_value
.as_ref()
.map(|s| Value::String(s.clone()))
.unwrap_or(Value::Null);
Some((target_field.clone(), value))
.map(|s| VrlValue::Bytes(Bytes::from(s.clone())))
.unwrap_or(VrlValue::Null);
Some((KeyString::from(target_field.clone()), value))
}
// if extracted fields are more than target fields, ignore the rest
Right(_) => None,
@@ -190,16 +193,18 @@ impl Processor for CsvProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let name = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(name) {
Some(Value::String(v)) => {
Some(VrlValue::Bytes(v)) => {
let results = self.process(v)?;
val.extend(results.into())?;
val.extend(results);
}
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind().to_string(),
@@ -238,11 +243,11 @@ mod tests {
..Default::default()
};
let result = processor.process("1,2").unwrap();
let result = processor.process(b"1,2").unwrap();
let values: BTreeMap<String, Value> = [
("a".into(), Value::String("1".into())),
("b".into(), Value::String("2".into())),
let values: BTreeMap<KeyString, VrlValue> = [
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
]
.into_iter()
.collect();
@@ -264,12 +269,12 @@ mod tests {
..Default::default()
};
let result = processor.process("1,2").unwrap();
let result = processor.process(b"1,2").unwrap();
let values: BTreeMap<String, Value> = [
("a".into(), Value::String("1".into())),
("b".into(), Value::String("2".into())),
("c".into(), Value::Null),
let values: BTreeMap<KeyString, VrlValue> = [
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
(KeyString::from("c"), VrlValue::Null),
]
.into_iter()
.collect();
@@ -289,12 +294,15 @@ mod tests {
..Default::default()
};
let result = processor.process("1,2").unwrap();
let result = processor.process(b"1,2").unwrap();
let values: BTreeMap<String, Value> = [
("a".into(), Value::String("1".into())),
("b".into(), Value::String("2".into())),
("c".into(), Value::String("default".into())),
let values: BTreeMap<KeyString, VrlValue> = [
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
(
KeyString::from("c"),
VrlValue::Bytes(Bytes::from("default")),
),
]
.into_iter()
.collect();
@@ -315,11 +323,11 @@ mod tests {
..Default::default()
};
let result = processor.process("1,2").unwrap();
let result = processor.process(b"1,2").unwrap();
let values: BTreeMap<String, Value> = [
("a".into(), Value::String("1".into())),
("b".into(), Value::String("2".into())),
let values: BTreeMap<KeyString, VrlValue> = [
(KeyString::from("a"), VrlValue::Bytes(Bytes::from("1"))),
(KeyString::from("b"), VrlValue::Bytes(Bytes::from("2"))),
]
.into_iter()
.collect();

View File

@@ -14,22 +14,22 @@
use std::sync::Arc;
use chrono::{DateTime, NaiveDateTime};
use chrono::{DateTime, NaiveDateTime, Utc};
use chrono_tz::Tz;
use lazy_static::lazy_static;
use snafu::{OptionExt, ResultExt};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
DateFailedToGetLocalTimezoneSnafu, DateFailedToGetTimestampSnafu, DateParseSnafu,
DateParseTimezoneSnafu, Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
ProcessorFailedToParseStringSnafu, ProcessorMissingFieldSnafu, Result,
DateFailedToGetLocalTimezoneSnafu, DateParseSnafu, DateParseTimezoneSnafu, Error,
KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorFailedToParseStringSnafu,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor, FIELDS_NAME,
FIELD_NAME, IGNORE_MISSING_NAME,
};
use crate::etl::value::{Timestamp, Value};
pub(crate) const PROCESSOR_DATE: &str = "date";
@@ -162,7 +162,7 @@ pub struct DateProcessor {
}
impl DateProcessor {
fn parse(&self, val: &str) -> Result<Timestamp> {
fn parse(&self, val: &str) -> Result<DateTime<Utc>> {
let mut tz = Tz::UTC;
if let Some(timezone) = &self.timezone {
tz = timezone.parse::<Tz>().context(DateParseTimezoneSnafu {
@@ -171,8 +171,8 @@ impl DateProcessor {
}
for fmt in self.formats.iter() {
if let Ok(ns) = try_parse(val, fmt, tz) {
return Ok(Timestamp::Nanosecond(ns));
if let Ok(utc_ts) = try_parse(val, fmt, tz) {
return Ok(utc_ts);
}
}
@@ -193,16 +193,19 @@ impl Processor for DateProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(Value::String(s)) => {
let timestamp = self.parse(s)?;
Some(VrlValue::Bytes(s)) => {
let timestamp = self.parse(String::from_utf8_lossy(s).as_ref())?;
let output_key = field.target_or_input_field();
val.insert(output_key.to_string(), Value::Timestamp(timestamp))?;
val.insert(KeyString::from(output_key), VrlValue::Timestamp(timestamp));
}
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind().to_string(),
@@ -224,21 +227,19 @@ impl Processor for DateProcessor {
}
}
/// try to parse val with timezone first, if failed, parse without timezone
fn try_parse(val: &str, fmt: &str, tz: Tz) -> Result<i64> {
// parse the datetime with timezone info
// if failed, try to parse using naive date time and add tz info
// finally convert the datetime to utc
fn try_parse(val: &str, fmt: &str, tz: Tz) -> Result<DateTime<Utc>> {
if let Ok(dt) = DateTime::parse_from_str(val, fmt) {
Ok(dt
.timestamp_nanos_opt()
.context(DateFailedToGetTimestampSnafu)?)
Ok(dt.to_utc())
} else {
let dt = NaiveDateTime::parse_from_str(val, fmt)
.context(DateParseSnafu { value: val })?
.and_local_timezone(tz)
.single()
.context(DateFailedToGetLocalTimezoneSnafu)?;
Ok(dt
.timestamp_nanos_opt()
.context(DateFailedToGetTimestampSnafu)?)
Ok(dt.to_utc())
}
}

View File

@@ -21,15 +21,17 @@
use once_cell::sync::Lazy;
use regex::Regex;
use snafu::OptionExt;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_DECOLORIZE: &str = "decolorize";
@@ -43,13 +45,15 @@ pub struct DecolorizeProcessor {
}
impl DecolorizeProcessor {
fn process_string(&self, val: &str) -> Result<Value> {
Ok(Value::String(RE.replace_all(val, "").into_owned()))
fn process_string(&self, val: &str) -> Result<VrlValue> {
Ok(VrlValue::Bytes(Bytes::from(
RE.replace_all(val, "").to_string(),
)))
}
fn process(&self, val: &Value) -> Result<Value> {
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
match val {
Value::String(val) => self.process_string(val),
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
_ => ProcessorExpectStringSnafu {
processor: PROCESSOR_DECOLORIZE,
v: val.clone(),
@@ -101,11 +105,12 @@ impl crate::etl::processor::Processor for DecolorizeProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -117,7 +122,7 @@ impl crate::etl::processor::Processor for DecolorizeProcessor {
Some(v) => {
let result = self.process(v)?;
let output_index = field.target_or_input_field();
val.insert(output_index.to_string(), result)?;
val.insert(KeyString::from(output_index), result);
}
}
}
@@ -136,16 +141,19 @@ mod tests {
ignore_missing: false,
};
let val = Value::String("\x1b[32mGreen\x1b[0m".to_string());
let val = VrlValue::Bytes(Bytes::from("\x1b[32mGreen\x1b[0m".to_string()));
let result = processor.process(&val).unwrap();
assert_eq!(result, Value::String("Green".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("Green".to_string())));
let val = Value::String("Plain text".to_string());
let val = VrlValue::Bytes(Bytes::from("Plain text".to_string()));
let result = processor.process(&val).unwrap();
assert_eq!(result, Value::String("Plain text".to_string()));
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("Plain text".to_string()))
);
let val = Value::String("\x1b[46mfoo\x1b[0m bar".to_string());
let val = VrlValue::Bytes(Bytes::from("\x1b[46mfoo\x1b[0m bar".to_string()));
let result = processor.process(&val).unwrap();
assert_eq!(result, Value::String("foo bar".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("foo bar".to_string())));
}
}

View File

@@ -23,16 +23,17 @@ use std::borrow::Cow;
use regex::Regex;
use snafu::OptionExt;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
DigestPatternInvalidSnafu, Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
ProcessorMissingFieldSnafu, Result,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_DIGEST: &str = "digest";
@@ -100,7 +101,7 @@ impl DigestProcessor {
re.replace_all(val, "").to_string()
}
fn process_string(&self, val: &str) -> Result<Value> {
fn process_string(&self, val: &str) -> Result<VrlValue> {
let mut input = Cow::from(val);
for pattern in &self.patterns {
if let Cow::Owned(new_string) = pattern.replace_all(&input, "") {
@@ -108,12 +109,12 @@ impl DigestProcessor {
}
}
Ok(Value::String(input.into_owned()))
Ok(VrlValue::Bytes(Bytes::from(input.to_string())))
}
fn process(&self, val: &Value) -> Result<Value> {
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
match val {
Value::String(val) => self.process_string(val),
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
_ => ProcessorExpectStringSnafu {
processor: PROCESSOR_DIGEST,
v: val.clone(),
@@ -200,11 +201,12 @@ impl crate::etl::processor::Processor for DigestProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -216,7 +218,7 @@ impl crate::etl::processor::Processor for DigestProcessor {
Some(v) => {
let result = self.process(v)?;
let output_index = field.target_or_input_field();
val.insert(output_index.to_string(), result)?;
val.insert(KeyString::from(output_index), result);
}
}
}
@@ -237,24 +239,31 @@ mod tests {
patterns: vec![PresetPattern::Ip.regex()],
};
let input = Value::String("192.168.1.1".to_string());
let input = VrlValue::Bytes(Bytes::from("192.168.1.1".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
let input = Value::String("192.168.1.1:8080".to_string());
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
let input = VrlValue::Bytes(Bytes::from("192.168.1.1:8080".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
let input = Value::String("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]".to_string());
let input = VrlValue::Bytes(Bytes::from(
"[2001:0db8:85a3:0000:0000:8a2e:0370:7334]".to_string(),
));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
let input = Value::String("[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080".to_string());
let input = VrlValue::Bytes(Bytes::from(
"[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080".to_string(),
));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
let input = Value::String("not an ip".to_string());
let input = VrlValue::Bytes(Bytes::from("not an ip".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("not an ip".to_string()));
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("not an ip".to_string()))
);
}
#[test]
@@ -265,29 +274,40 @@ mod tests {
patterns: vec![PresetPattern::Uuid.regex()],
};
// UUID v4
let input = Value::String("123e4567-e89b-12d3-a456-426614174000".to_string());
let input = VrlValue::Bytes(Bytes::from(
"123e4567-e89b-12d3-a456-426614174000".to_string(),
));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
// UUID v1
let input = Value::String("6ba7b810-9dad-11d1-80b4-00c04fd430c8".to_string());
let input = VrlValue::Bytes(Bytes::from(
"6ba7b810-9dad-11d1-80b4-00c04fd430c8".to_string(),
));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
// UUID v5
let input = Value::String("886313e1-3b8a-5372-9b90-0c9aee199e5d".to_string());
let input = VrlValue::Bytes(Bytes::from(
"886313e1-3b8a-5372-9b90-0c9aee199e5d".to_string(),
));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
// UUID with uppercase letters
let input = Value::String("A987FBC9-4BED-3078-CF07-9141BA07C9F3".to_string());
let input = VrlValue::Bytes(Bytes::from(
"A987FBC9-4BED-3078-CF07-9141BA07C9F3".to_string(),
));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
// Negative case
let input = Value::String("not a uuid".to_string());
let input = VrlValue::Bytes(Bytes::from("not a uuid".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("not a uuid".to_string()));
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("not a uuid".to_string()))
);
}
#[test]
@@ -299,45 +319,48 @@ mod tests {
};
// Basic brackets
let input = Value::String("[content]".to_string());
let input = VrlValue::Bytes(Bytes::from("[content]".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
let input = Value::String("(content)".to_string());
let input = VrlValue::Bytes(Bytes::from("(content)".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
// Chinese brackets
let input = Value::String("「content」".to_string());
let input = VrlValue::Bytes(Bytes::from("「content」".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
let input = Value::String("『content』".to_string());
let input = VrlValue::Bytes(Bytes::from("『content』".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
let input = Value::String("【content】".to_string());
let input = VrlValue::Bytes(Bytes::from("【content】".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
// Unmatched/unclosed brackets should not match
let input = Value::String("[content".to_string());
let input = VrlValue::Bytes(Bytes::from("[content".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("[content".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("[content".to_string())));
let input = Value::String("content]".to_string());
let input = VrlValue::Bytes(Bytes::from("content]".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("content]".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("content]".to_string())));
// Bad case
let input = Value::String("[content}".to_string());
let input = VrlValue::Bytes(Bytes::from("[content}".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
// Negative case
let input = Value::String("no brackets".to_string());
let input = VrlValue::Bytes(Bytes::from("no brackets".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("no brackets".to_string()));
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("no brackets".to_string()))
);
}
#[test]
@@ -348,16 +371,19 @@ mod tests {
patterns: vec![PresetPattern::Quoted.regex()],
};
let input = Value::String("\"quoted content\"".to_string());
let input = VrlValue::Bytes(Bytes::from("\"quoted content\"".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
let input = Value::String("no quotes".to_string());
let input = VrlValue::Bytes(Bytes::from("no quotes".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("no quotes".to_string()));
let input = Value::String("".to_string());
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("no quotes".to_string()))
);
let input = VrlValue::Bytes(Bytes::from("".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
}
#[test]
@@ -368,15 +394,18 @@ mod tests {
patterns: vec![Regex::new(r"\d+").unwrap()],
};
let input = Value::String("12345".to_string());
let input = VrlValue::Bytes(Bytes::from("12345".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
let input = Value::String("no digits".to_string());
let input = VrlValue::Bytes(Bytes::from("no digits".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("no digits".to_string()));
let input = Value::String("".to_string());
assert_eq!(
result,
VrlValue::Bytes(Bytes::from("no digits".to_string()))
);
let input = VrlValue::Bytes(Bytes::from("".to_string()));
let result = processor.process(&input).unwrap();
assert_eq!(result, Value::String("".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("".to_string())));
}
}

View File

@@ -17,6 +17,8 @@ use std::ops::Deref;
use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
use itertools::Itertools;
use snafu::OptionExt;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
DissectAppendOrderAlreadySetSnafu, DissectConsecutiveNamesSnafu, DissectEmptyPatternSnafu,
@@ -24,13 +26,13 @@ use crate::error::{
DissectNoMatchingPatternSnafu, DissectOrderOnlyAppendModifierSnafu,
DissectOrderOnlyAppendSnafu, DissectSplitExceedsInputSnafu, DissectSplitNotMatchInputSnafu,
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_parse_string, yaml_parse_strings, yaml_string,
Processor, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERNS_NAME, PATTERN_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_DISSECT: &str = "dissect";
@@ -421,7 +423,7 @@ impl DissectProcessor {
name: &'a Name,
value: String,
appends: &mut HashMap<&'a String, Vec<(String, u32)>>,
map: &mut Vec<(&'a String, Value)>,
map: &mut Vec<(&'a String, VrlValue)>,
) {
match name.start_modifier {
Some(StartModifier::NamedSkip) => {
@@ -438,12 +440,16 @@ impl DissectProcessor {
// because transform can know the key name
}
None => {
map.push((&name.name, Value::String(value)));
map.push((&name.name, VrlValue::Bytes(Bytes::from(value))));
}
}
}
fn process_pattern(&self, chs: &[char], pattern: &Pattern) -> Result<Vec<(String, Value)>> {
fn process_pattern(
&self,
chs: &[char],
pattern: &Pattern,
) -> Result<Vec<(KeyString, VrlValue)>> {
let mut map = Vec::new();
let mut pos = 0;
@@ -523,14 +529,17 @@ impl DissectProcessor {
for (name, mut values) in appends {
values.sort_by(|a, b| a.1.cmp(&b.1));
let value = values.into_iter().map(|(a, _)| a).join(sep);
map.push((name, Value::String(value)));
map.push((name, VrlValue::Bytes(Bytes::from(value))));
}
}
Ok(map.into_iter().map(|(k, v)| (k.to_string(), v)).collect())
Ok(map
.into_iter()
.map(|(k, v)| (KeyString::from(k.clone()), v))
.collect())
}
fn process(&self, val: &str) -> Result<Vec<(String, Value)>> {
fn process(&self, val: &str) -> Result<Vec<(KeyString, VrlValue)>> {
let chs = val.chars().collect::<Vec<char>>();
for pattern in &self.patterns {
@@ -600,17 +609,18 @@ impl Processor for DissectProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(Value::String(val_str)) => {
let r = self.process(val_str)?;
Some(VrlValue::Bytes(val_str)) => {
let r = self.process(String::from_utf8_lossy(val_str).as_ref())?;
for (k, v) in r {
val.insert(k, v)?;
val.insert(k, v);
}
}
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -639,17 +649,18 @@ fn is_valid_char(ch: char) -> bool {
#[cfg(test)]
mod tests {
use ahash::HashMap;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use super::{DissectProcessor, EndModifier, Name, Part, StartModifier};
use crate::etl::processor::dissect::Pattern;
use crate::etl::value::Value;
fn assert(pattern_str: &str, input: &str, expected: HashMap<String, Value>) {
fn assert(pattern_str: &str, input: &str, expected: HashMap<KeyString, VrlValue>) {
let chs = input.chars().collect::<Vec<char>>();
let patterns: Vec<Pattern> = vec![pattern_str.parse().unwrap()];
let processor = DissectProcessor::default();
let result: HashMap<String, Value> = processor
let result: HashMap<KeyString, VrlValue> = processor
.process_pattern(&chs, &patterns[0])
.unwrap()
.into_iter()
@@ -991,8 +1002,13 @@ mod tests {
("httpversion", "1.0"),
]
.into_iter()
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())))
.collect::<HashMap<String, Value>>();
.map(|(k, v)| {
(
KeyString::from(k.to_string()),
VrlValue::Bytes(Bytes::from(v.to_string())),
)
})
.collect::<HashMap<KeyString, VrlValue>>();
{
// pattern start with Name
@@ -1032,9 +1048,12 @@ mod tests {
]
.into_iter()
.map(|(pattern, input, expected)| {
let map = expected
.into_iter()
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
let map = expected.into_iter().map(|(k, v)| {
(
KeyString::from(k.to_string()),
VrlValue::Bytes(Bytes::from(v.to_string())),
)
});
(pattern, input, map)
});
@@ -1042,7 +1061,7 @@ mod tests {
assert(
pattern_str,
input,
expected.collect::<HashMap<String, Value>>(),
expected.collect::<HashMap<KeyString, VrlValue>>(),
);
}
}
@@ -1063,9 +1082,12 @@ mod tests {
]
.into_iter()
.map(|(pattern, input, expected)| {
let map = expected
.into_iter()
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
let map = expected.into_iter().map(|(k, v)| {
(
KeyString::from(k.to_string()),
VrlValue::Bytes(Bytes::from(v.to_string())),
)
});
(pattern, input, map)
});
@@ -1073,7 +1095,7 @@ mod tests {
assert(
pattern_str,
input,
expected.collect::<HashMap<String, Value>>(),
expected.collect::<HashMap<KeyString, VrlValue>>(),
);
}
}
@@ -1090,9 +1112,12 @@ mod tests {
)]
.into_iter()
.map(|(pattern, input, expected)| {
let map = expected
.into_iter()
.map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
let map = expected.into_iter().map(|(k, v)| {
(
KeyString::from(k.to_string()),
VrlValue::Bytes(Bytes::from(v.to_string())),
)
});
(pattern, input, map)
});
@@ -1100,7 +1125,7 @@ mod tests {
assert(
pattern_str,
input,
expected.collect::<HashMap<String, Value>>(),
expected.collect::<HashMap<KeyString, VrlValue>>(),
);
}
}

View File

@@ -12,24 +12,26 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use chrono::{DateTime, Utc};
use common_time::timestamp::TimeUnit;
use snafu::{OptionExt, ResultExt};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
EpochInvalidResolutionSnafu, Error, FailedToParseIntSnafu, KeyMustBeStringSnafu,
ProcessorMissingFieldSnafu, ProcessorUnsupportedValueSnafu, Result,
EpochInvalidResolutionSnafu, Error, FailedToParseIntSnafu, InvalidEpochForResolutionSnafu,
KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, ProcessorUnsupportedValueSnafu, Result,
ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME,
};
use crate::etl::value::time::{
use crate::etl::value::{
MICROSECOND_RESOLUTION, MICRO_RESOLUTION, MILLISECOND_RESOLUTION, MILLI_RESOLUTION,
MS_RESOLUTION, NANOSECOND_RESOLUTION, NANO_RESOLUTION, NS_RESOLUTION, SECOND_RESOLUTION,
SEC_RESOLUTION, S_RESOLUTION, US_RESOLUTION,
};
use crate::etl::value::{Timestamp, Value};
pub(crate) const PROCESSOR_EPOCH: &str = "epoch";
const RESOLUTION_NAME: &str = "resolution";
@@ -43,6 +45,18 @@ pub(crate) enum Resolution {
Nano,
}
impl std::fmt::Display for Resolution {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let text = match self {
Resolution::Second => SECOND_RESOLUTION,
Resolution::Milli => MILLISECOND_RESOLUTION,
Resolution::Micro => MICROSECOND_RESOLUTION,
Resolution::Nano => NANOSECOND_RESOLUTION,
};
write!(f, "{}", text)
}
}
impl TryFrom<&str> for Resolution {
type Error = Error;
@@ -84,43 +98,36 @@ pub struct EpochProcessor {
}
impl EpochProcessor {
fn parse(&self, val: &Value) -> Result<Timestamp> {
let t: i64 = match val {
Value::String(s) => s
.parse::<i64>()
.context(FailedToParseIntSnafu { value: s })?,
Value::Int16(i) => *i as i64,
Value::Int32(i) => *i as i64,
Value::Int64(i) => *i,
Value::Uint8(i) => *i as i64,
Value::Uint16(i) => *i as i64,
Value::Uint32(i) => *i as i64,
Value::Uint64(i) => *i as i64,
Value::Float32(f) => *f as i64,
Value::Float64(f) => *f as i64,
Value::Timestamp(t) => match self.resolution {
Resolution::Second => t.timestamp(),
Resolution::Milli => t.timestamp_millis(),
Resolution::Micro => t.timestamp_micros(),
Resolution::Nano => t.timestamp_nanos(),
},
_ => {
return ProcessorUnsupportedValueSnafu {
processor: PROCESSOR_EPOCH,
val: val.to_string(),
fn parse(&self, val: &VrlValue) -> Result<DateTime<Utc>> {
let t: i64 =
match val {
VrlValue::Bytes(bytes) => String::from_utf8_lossy(bytes).parse::<i64>().context(
FailedToParseIntSnafu {
value: val.to_string_lossy(),
},
)?,
VrlValue::Integer(ts) => *ts,
VrlValue::Float(not_nan) => not_nan.into_inner() as i64,
VrlValue::Timestamp(date_time) => return Ok(*date_time),
_ => {
return ProcessorUnsupportedValueSnafu {
processor: PROCESSOR_EPOCH,
val: val.to_string(),
}
.fail();
}
.fail();
}
};
};
match self.resolution {
Resolution::Second => Ok(Timestamp::Second(t)),
Resolution::Milli => Ok(Timestamp::Millisecond(t)),
Resolution::Micro => Ok(Timestamp::Microsecond(t)),
Resolution::Nano => Ok(Timestamp::Nanosecond(t)),
Resolution::Second => DateTime::from_timestamp(t, 0),
Resolution::Milli => DateTime::from_timestamp_millis(t),
Resolution::Micro => DateTime::from_timestamp_micros(t),
Resolution::Nano => Some(DateTime::from_timestamp_nanos(t)),
}
.context(InvalidEpochForResolutionSnafu {
value: t,
resolution: self.resolution.to_string(),
})
}
}
@@ -174,11 +181,12 @@ impl Processor for EpochProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -190,7 +198,10 @@ impl Processor for EpochProcessor {
Some(v) => {
let timestamp = self.parse(v)?;
let output_index = field.target_or_input_field();
val.insert(output_index.to_string(), Value::Timestamp(timestamp))?;
val.insert(
KeyString::from(output_index.to_string()),
VrlValue::Timestamp(timestamp),
);
}
}
}
@@ -200,8 +211,12 @@ impl Processor for EpochProcessor {
#[cfg(test)]
mod tests {
use chrono::DateTime;
use ordered_float::NotNan;
use vrl::prelude::Bytes;
use vrl::value::Value as VrlValue;
use super::EpochProcessor;
use crate::etl::value::Value;
#[test]
fn test_parse_epoch() {
@@ -211,15 +226,15 @@ mod tests {
};
let values = [
Value::String("1573840000".into()),
Value::Int32(1573840000),
Value::Uint64(1573840000),
Value::Float32(1573840000.0),
VrlValue::Bytes(Bytes::from("1573840000")),
VrlValue::Integer(1573840000),
VrlValue::Integer(1573840000),
VrlValue::Float(NotNan::new(1573840000.0).unwrap()),
];
for value in values {
let parsed = processor.parse(&value).unwrap();
assert_eq!(parsed, super::Timestamp::Second(1573840000));
assert_eq!(parsed, DateTime::from_timestamp(1573840000, 0).unwrap());
}
}
}

View File

@@ -14,17 +14,19 @@
use regex::Regex;
use snafu::{OptionExt, ResultExt};
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, GsubPatternRequiredSnafu, GsubReplacementRequiredSnafu, KeyMustBeStringSnafu,
ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, RegexSnafu, Result,
ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, PATTERN_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_GSUB: &str = "gsub";
@@ -40,16 +42,16 @@ pub struct GsubProcessor {
}
impl GsubProcessor {
fn process_string(&self, val: &str) -> Result<Value> {
fn process_string(&self, val: &str) -> Result<VrlValue> {
let new_val = self.pattern.replace_all(val, &self.replacement).to_string();
let val = Value::String(new_val);
let val = VrlValue::Bytes(Bytes::from(new_val));
Ok(val)
}
fn process(&self, val: &Value) -> Result<Value> {
fn process(&self, val: &VrlValue) -> Result<VrlValue> {
match val {
Value::String(val) => self.process_string(val),
VrlValue::Bytes(val) => self.process_string(String::from_utf8_lossy(val).as_ref()),
_ => ProcessorExpectStringSnafu {
processor: PROCESSOR_GSUB,
v: val.clone(),
@@ -117,11 +119,12 @@ impl crate::etl::processor::Processor for GsubProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -133,7 +136,7 @@ impl crate::etl::processor::Processor for GsubProcessor {
Some(v) => {
let result = self.process(v)?;
let output_index = field.target_or_input_field();
val.insert(output_index.to_string(), result)?;
val.insert(KeyString::from(output_index.to_string()), result);
}
}
}
@@ -145,7 +148,6 @@ impl crate::etl::processor::Processor for GsubProcessor {
mod tests {
use super::*;
use crate::etl::processor::gsub::GsubProcessor;
use crate::etl::value::Value;
#[test]
fn test_string_value() {
@@ -156,9 +158,9 @@ mod tests {
ignore_missing: false,
};
let val = Value::String("123".to_string());
let val = VrlValue::Bytes(Bytes::from("123"));
let result = processor.process(&val).unwrap();
assert_eq!(result, Value::String("xxx".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("xxx")));
}
}

View File

@@ -13,17 +13,18 @@
// limitations under the License.
use snafu::OptionExt;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, JoinSeparatorRequiredSnafu, KeyMustBeStringSnafu, ProcessorExpectStringSnafu,
ProcessorMissingFieldSnafu, Result,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, SEPARATOR_NAME,
};
use crate::etl::value::{Array, Value};
pub(crate) const PROCESSOR_JOIN: &str = "join";
@@ -36,14 +37,14 @@ pub struct JoinProcessor {
}
impl JoinProcessor {
fn process(&self, arr: &Array) -> Result<Value> {
fn process(&self, arr: &[VrlValue]) -> Result<VrlValue> {
let val = arr
.iter()
.map(|v| v.to_str_value())
.collect::<Vec<String>>()
.map(|v| v.to_string_lossy())
.collect::<Vec<_>>()
.join(&self.separator);
Ok(Value::String(val))
Ok(VrlValue::Bytes(Bytes::from(val)))
}
}
@@ -94,16 +95,17 @@ impl Processor for JoinProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(Value::Array(arr)) => {
Some(VrlValue::Array(arr)) => {
let result = self.process(arr)?;
let output_index = field.target_or_input_field();
val.insert(output_index.to_string(), result)?;
val.insert(KeyString::from(output_index.to_string()), result);
}
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -129,8 +131,10 @@ impl Processor for JoinProcessor {
#[cfg(test)]
mod tests {
use vrl::prelude::Bytes;
use vrl::value::Value as VrlValue;
use crate::etl::processor::join::JoinProcessor;
use crate::etl::value::Value;
#[test]
fn test_join_processor() {
@@ -140,11 +144,10 @@ mod tests {
};
let arr = vec![
Value::String("a".to_string()),
Value::String("b".to_string()),
]
.into();
VrlValue::Bytes(Bytes::from("a")),
VrlValue::Bytes(Bytes::from("b")),
];
let result = processor.process(&arr).unwrap();
assert_eq!(result, Value::String("a-b".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("a-b")));
}
}

View File

@@ -13,16 +13,17 @@
// limitations under the License.
use snafu::{OptionExt as _, ResultExt};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, FieldMustBeTypeSnafu, JsonParseSnafu, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu,
ProcessorUnsupportedValueSnafu, Result,
ProcessorUnsupportedValueSnafu, Result, ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
};
use crate::{json_to_map, Processor, Value};
use crate::Processor;
pub(crate) const PROCESSOR_JSON_PARSE: &str = "json_parse";
@@ -67,21 +68,21 @@ impl TryFrom<&yaml_rust::yaml::Hash> for JsonParseProcessor {
}
impl JsonParseProcessor {
fn process_field(&self, val: &Value) -> Result<Value> {
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
let Some(json_str) = val.as_str() else {
return FieldMustBeTypeSnafu {
field: val.to_str_type(),
field: val.to_string(),
ty: "string",
}
.fail();
};
let parsed: serde_json::Value = serde_json::from_str(json_str).context(JsonParseSnafu)?;
let parsed: VrlValue = serde_json::from_str(&json_str).context(JsonParseSnafu)?;
match parsed {
serde_json::Value::Object(_) => Ok(json_to_map(parsed)?),
serde_json::Value::Array(arr) => Ok(Value::Array(arr.try_into()?)),
VrlValue::Object(_) => Ok(parsed),
VrlValue::Array(_) => Ok(parsed),
_ => ProcessorUnsupportedValueSnafu {
processor: self.kind(),
val: val.to_str_type(),
val: val.to_string(),
}
.fail(),
}
@@ -97,14 +98,15 @@ impl Processor for JsonParseProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(v) => {
let processed = self.process_field(v)?;
let output_index = field.target_or_input_field();
val.insert(output_index.to_string(), processed)?;
val.insert(KeyString::from(output_index.to_string()), processed);
}
None => {
if !self.ignore_missing {
@@ -123,24 +125,27 @@ impl Processor for JsonParseProcessor {
#[cfg(test)]
mod test {
use std::collections::BTreeMap;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::etl::processor::json_parse::JsonParseProcessor;
#[test]
fn test_json_parse() {
use super::*;
use crate::Value;
let processor = JsonParseProcessor {
..Default::default()
};
let result = processor
.process_field(&Value::String(r#"{"hello": "world"}"#.to_string()))
.process_field(&VrlValue::Bytes(Bytes::from(r#"{"hello": "world"}"#)))
.unwrap();
let expected = Value::Map(crate::Map::one(
"hello".to_string(),
Value::String("world".to_string()),
));
let expected = VrlValue::Object(BTreeMap::from([(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world")),
)]));
assert_eq!(result, expected);
}

View File

@@ -14,17 +14,17 @@
use jsonpath_rust::JsonPath;
use snafu::{OptionExt, ResultExt};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, JsonPathParseResultIndexSnafu, JsonPathParseSnafu, KeyMustBeStringSnafu,
ProcessorMissingFieldSnafu, Result,
Error, JsonParseSnafu, JsonPathParseResultIndexSnafu, JsonPathParseSnafu, KeyMustBeStringSnafu,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, JSON_PATH_NAME, JSON_PATH_RESULT_INDEX_NAME,
};
use crate::Value;
pub(crate) const PROCESSOR_JSON_PATH: &str = "json_path";
@@ -84,7 +84,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for JsonPathProcessor {
#[derive(Debug)]
pub struct JsonPathProcessor {
fields: Fields,
json_path: JsonPath<Value>,
json_path: JsonPath<serde_json::Value>,
ignore_missing: bool,
result_index: Option<usize>,
}
@@ -101,17 +101,22 @@ impl Default for JsonPathProcessor {
}
impl JsonPathProcessor {
fn process_field(&self, val: &Value) -> Result<Value> {
let processed = self.json_path.find(val);
match processed {
Value::Array(arr) => {
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
let v = serde_json::to_value(val).context(JsonParseSnafu)?;
let p = self.json_path.find(&v);
match p {
serde_json::Value::Array(arr) => {
if let Some(index) = self.result_index {
Ok(arr.get(index).cloned().unwrap_or(Value::Null))
Ok(arr
.get(index)
.cloned()
.map(|v| v.into())
.unwrap_or(VrlValue::Null))
} else {
Ok(Value::Array(arr))
Ok(VrlValue::Array(arr.into_iter().map(|v| v.into()).collect()))
}
}
v => Ok(v),
v => Ok(v.into()),
}
}
}
@@ -125,14 +130,15 @@ impl Processor for JsonPathProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(v) => {
let processed = self.process_field(v)?;
let output_index = field.target_or_input_field();
val.insert(output_index.to_string(), processed)?;
val.insert(KeyString::from(output_index), processed);
}
None => {
if !self.ignore_missing {
@@ -151,12 +157,13 @@ impl Processor for JsonPathProcessor {
#[cfg(test)]
mod test {
use crate::Map;
use std::collections::BTreeMap;
use vrl::prelude::Bytes;
#[test]
fn test_json_path() {
use super::*;
use crate::Value;
let json_path = JsonPath::try_from("$.hello").unwrap();
let processor = JsonPathProcessor {
@@ -166,11 +173,11 @@ mod test {
};
let result = processor
.process_field(&Value::Map(Map::one(
"hello",
Value::String("world".to_string()),
)))
.process_field(&VrlValue::Object(BTreeMap::from([(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world")),
)])))
.unwrap();
assert_eq!(result, Value::String("world".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("world")));
}
}

View File

@@ -13,17 +13,18 @@
// limitations under the License.
use snafu::OptionExt;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, KeyMustBeStringSnafu, LetterInvalidMethodSnafu, ProcessorExpectStringSnafu,
ProcessorMissingFieldSnafu, Result,
ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, METHOD_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_LETTER: &str = "letter";
@@ -67,15 +68,14 @@ pub struct LetterProcessor {
}
impl LetterProcessor {
fn process_field(&self, val: &str) -> Result<Value> {
let processed = match self.method {
Method::Upper => val.to_uppercase(),
Method::Lower => val.to_lowercase(),
Method::Capital => capitalize(val),
};
let val = Value::String(processed);
Ok(val)
fn process_field(&self, val: &Bytes) -> VrlValue {
match self.method {
Method::Upper => VrlValue::Bytes(Bytes::from(val.to_ascii_uppercase())),
Method::Lower => VrlValue::Bytes(Bytes::from(val.to_ascii_lowercase())),
Method::Capital => VrlValue::Bytes(Bytes::from(capitalize(
String::from_utf8_lossy(val).as_ref(),
))),
}
}
}
@@ -125,16 +125,17 @@ impl Processor for LetterProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(Value::String(s)) => {
let result = self.process_field(s)?;
Some(VrlValue::Bytes(s)) => {
let result = self.process_field(s);
let output_key = field.target_or_input_field();
val.insert(output_key.to_string(), result)?;
val.insert(KeyString::from(output_key), result);
}
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -167,8 +168,10 @@ fn capitalize(s: &str) -> String {
#[cfg(test)]
mod tests {
use vrl::prelude::Bytes;
use vrl::value::Value as VrlValue;
use crate::etl::processor::letter::{LetterProcessor, Method};
use crate::etl::value::Value;
#[test]
fn test_process() {
@@ -177,8 +180,8 @@ mod tests {
method: Method::Upper,
..Default::default()
};
let processed = processor.process_field("pipeline").unwrap();
assert_eq!(Value::String("PIPELINE".into()), processed)
let processed = processor.process_field(&Bytes::from("pipeline"));
assert_eq!(VrlValue::Bytes(Bytes::from("PIPELINE")), processed)
}
{
@@ -186,8 +189,8 @@ mod tests {
method: Method::Lower,
..Default::default()
};
let processed = processor.process_field("Pipeline").unwrap();
assert_eq!(Value::String("pipeline".into()), processed)
let processed = processor.process_field(&Bytes::from("Pipeline"));
assert_eq!(VrlValue::Bytes(Bytes::from("pipeline")), processed)
}
{
@@ -195,8 +198,8 @@ mod tests {
method: Method::Capital,
..Default::default()
};
let processed = processor.process_field("pipeline").unwrap();
assert_eq!(Value::String("Pipeline".into()), processed)
let processed = processor.process_field(&Bytes::from("pipeline"));
assert_eq!(VrlValue::Bytes(Bytes::from("Pipeline")), processed)
}
}
}

View File

@@ -23,18 +23,19 @@ use std::collections::BTreeMap;
use lazy_static::lazy_static;
use regex::Regex;
use snafu::{OptionExt, ResultExt};
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu,
RegexNamedGroupNotFoundSnafu, RegexNoValidFieldSnafu, RegexNoValidPatternSnafu, RegexSnafu,
Result,
Result, ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor, FIELDS_NAME,
FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
};
use crate::etl::value::Value;
lazy_static! {
static ref GROUPS_NAME_REGEX: Regex = Regex::new(r"\(\?P?<([[:word:]]+)>.+?\)").unwrap();
@@ -168,14 +169,17 @@ impl RegexProcessor {
Ok(())
}
fn process(&self, prefix: &str, val: &str) -> Result<BTreeMap<String, Value>> {
fn process(&self, prefix: &str, val: &str) -> Result<BTreeMap<KeyString, VrlValue>> {
let mut result = BTreeMap::new();
for gr in self.patterns.iter() {
if let Some(captures) = gr.regex.captures(val) {
for group in gr.groups.iter() {
if let Some(capture) = captures.name(group) {
let value = capture.as_str().to_string();
result.insert(generate_key(prefix, group), Value::String(value));
result.insert(
KeyString::from(generate_key(prefix, group)),
VrlValue::Bytes(Bytes::from(value)),
);
}
}
}
@@ -193,16 +197,17 @@ impl Processor for RegexProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let prefix = field.target_or_input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(Value::String(s)) => {
let result = self.process(prefix, s)?;
val.extend(result.into())?;
Some(VrlValue::Bytes(s)) => {
let result = self.process(prefix, String::from_utf8_lossy(s).as_ref())?;
val.extend(result);
}
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -226,12 +231,11 @@ impl Processor for RegexProcessor {
}
#[cfg(test)]
mod tests {
use ahash::{HashMap, HashMapExt};
use itertools::Itertools;
use vrl::value::Value as VrlValue;
use super::*;
use crate::etl::processor::regex::RegexProcessor;
use crate::etl::value::{Map, Value};
#[test]
fn test_simple_parse() {
@@ -250,15 +254,11 @@ ignore_missing: false"#;
let result = processor.process("a", "123").unwrap();
let map = Map { values: result };
let v = vec![(KeyString::from("a_ar"), VrlValue::Bytes(Bytes::from("1")))]
.into_iter()
.collect::<BTreeMap<KeyString, VrlValue>>();
let v = Map {
values: vec![("a_ar".to_string(), Value::String("1".to_string()))]
.into_iter()
.collect(),
};
assert_eq!(v, map);
assert_eq!(v, result);
}
#[test]
@@ -270,15 +270,30 @@ ignore_missing: false"#;
let cw = "[c=w,n=US_CA_SANJOSE,o=55155]";
let breadcrumbs_str = [cc, cg, co, cp, cw].iter().join(",");
let temporary_map: BTreeMap<String, Value> = [
("breadcrumbs_parent", Value::String(cc.to_string())),
("breadcrumbs_edge", Value::String(cg.to_string())),
("breadcrumbs_origin", Value::String(co.to_string())),
("breadcrumbs_peer", Value::String(cp.to_string())),
("breadcrumbs_wrapper", Value::String(cw.to_string())),
let temporary_map: BTreeMap<KeyString, VrlValue> = [
(
"breadcrumbs_parent",
VrlValue::Bytes(Bytes::from(cc.to_string())),
),
(
"breadcrumbs_edge",
VrlValue::Bytes(Bytes::from(cg.to_string())),
),
(
"breadcrumbs_origin",
VrlValue::Bytes(Bytes::from(co.to_string())),
),
(
"breadcrumbs_peer",
VrlValue::Bytes(Bytes::from(cp.to_string())),
),
(
"breadcrumbs_wrapper",
VrlValue::Bytes(Bytes::from(cw.to_string())),
),
]
.into_iter()
.map(|(k, v)| (k.to_string(), v))
.map(|(k, v)| (KeyString::from(k), v))
.collect();
{
@@ -331,35 +346,66 @@ ignore_missing: false"#;
let processor_yaml_hash = processor_yaml.as_hash().unwrap();
let processor = RegexProcessor::try_from(processor_yaml_hash).unwrap();
let mut result = HashMap::new();
let mut result = BTreeMap::new();
for field in processor.fields.iter() {
let s = temporary_map
.get(field.input_field())
.unwrap()
.to_str_value();
let s = temporary_map.get(field.input_field()).unwrap();
let s = s.to_string_lossy();
let prefix = field.target_or_input_field();
let r = processor.process(prefix, &s).unwrap();
let r = processor.process(prefix, s.as_ref()).unwrap();
result.extend(r);
}
let new_values = vec![
("edge_ip", Value::String("12.34.567.89".to_string())),
("edge_request_id", Value::String("12345678".to_string())),
("edge_geo", Value::String("US_CA_SANJOSE".to_string())),
("edge_asn", Value::String("20940".to_string())),
("origin_ip", Value::String("987.654.321.09".to_string())),
("peer_asn", Value::String("55155".to_string())),
("peer_geo", Value::String("US_CA_SANJOSE".to_string())),
("parent_asn", Value::String("55155".to_string())),
("parent_geo", Value::String("US_CA_SANJOSE".to_string())),
("wrapper_asn", Value::String("55155".to_string())),
("wrapper_geo", Value::String("US_CA_SANJOSE".to_string())),
(
"edge_ip",
VrlValue::Bytes(Bytes::from("12.34.567.89".to_string())),
),
(
"edge_request_id",
VrlValue::Bytes(Bytes::from("12345678".to_string())),
),
(
"edge_geo",
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
),
(
"edge_asn",
VrlValue::Bytes(Bytes::from("20940".to_string())),
),
(
"origin_ip",
VrlValue::Bytes(Bytes::from("987.654.321.09".to_string())),
),
(
"peer_asn",
VrlValue::Bytes(Bytes::from("55155".to_string())),
),
(
"peer_geo",
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
),
(
"parent_asn",
VrlValue::Bytes(Bytes::from("55155".to_string())),
),
(
"parent_geo",
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
),
(
"wrapper_asn",
VrlValue::Bytes(Bytes::from("55155".to_string())),
),
(
"wrapper_geo",
VrlValue::Bytes(Bytes::from("US_CA_SANJOSE".to_string())),
),
]
.into_iter()
.map(|(k, v)| (k.to_string(), v))
.collect();
.map(|(k, v)| (KeyString::from(k), v))
.collect::<BTreeMap<KeyString, VrlValue>>();
assert_eq!(result, new_values);
}

View File

@@ -14,6 +14,7 @@
use ahash::{HashSet, HashSetExt};
use snafu::OptionExt;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, KeyMustBeStringSnafu, ProcessorUnsupportedValueSnafu, Result, ValueMustBeMapSnafu,
@@ -22,7 +23,7 @@ use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME, TYPE_NAME,
};
use crate::{Processor, Value};
use crate::Processor;
pub(crate) const PROCESSOR_SELECT: &str = "select";
const INCLUDE_KEY: &str = "include";
@@ -98,8 +99,8 @@ impl Processor for SelectProcessor {
true
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
let v_map = val.as_map_mut().context(ValueMustBeMapSnafu)?;
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
let v_map = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match self.select_type {
SelectType::Include => {
@@ -109,7 +110,7 @@ impl Processor for SelectProcessor {
let field_name = field.input_field();
if let Some(target_name) = field.target_field() {
if let Some(v) = v_map.remove(field_name) {
v_map.insert(target_name.to_string(), v);
v_map.insert(KeyString::from(target_name), v);
}
include_key_set.insert(target_name);
} else {
@@ -133,9 +134,12 @@ impl Processor for SelectProcessor {
mod test {
use std::collections::BTreeMap;
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::etl::field::{Field, Fields};
use crate::etl::processor::select::{SelectProcessor, SelectType};
use crate::{Map, Processor, Value};
use crate::Processor;
#[test]
fn test_select() {
@@ -145,15 +149,24 @@ mod test {
};
let mut p = BTreeMap::new();
p.insert("hello".to_string(), Value::String("world".to_string()));
p.insert("hello2".to_string(), Value::String("world2".to_string()));
p.insert(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world".to_string())),
);
p.insert(
KeyString::from("hello2"),
VrlValue::Bytes(Bytes::from("world2".to_string())),
);
let result = processor.exec_mut(Value::Map(Map { values: p }));
let result = processor.exec_mut(VrlValue::Object(p));
assert!(result.is_ok());
let mut result = result.unwrap();
let p = result.as_map_mut().unwrap();
let p = result.as_object_mut().unwrap();
assert_eq!(p.len(), 1);
assert_eq!(p.get("hello"), Some(&Value::String("world".to_string())));
assert_eq!(
p.get(&KeyString::from("hello")),
Some(&VrlValue::Bytes(Bytes::from("world".to_string())))
);
}
#[test]
@@ -164,15 +177,24 @@ mod test {
};
let mut p = BTreeMap::new();
p.insert("hello".to_string(), Value::String("world".to_string()));
p.insert("hello2".to_string(), Value::String("world2".to_string()));
p.insert(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world".to_string())),
);
p.insert(
KeyString::from("hello2"),
VrlValue::Bytes(Bytes::from("world2".to_string())),
);
let result = processor.exec_mut(Value::Map(Map { values: p }));
let result = processor.exec_mut(VrlValue::Object(p));
assert!(result.is_ok());
let mut result = result.unwrap();
let p = result.as_map_mut().unwrap();
let p = result.as_object_mut().unwrap();
assert_eq!(p.len(), 1);
assert_eq!(p.get("hello3"), Some(&Value::String("world".to_string())));
assert_eq!(
p.get(&KeyString::from("hello3")),
Some(&VrlValue::Bytes(Bytes::from("world".to_string())))
);
}
#[test]
@@ -183,15 +205,24 @@ mod test {
};
let mut p = BTreeMap::new();
p.insert("hello".to_string(), Value::String("world".to_string()));
p.insert("hello2".to_string(), Value::String("world2".to_string()));
p.insert(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world".to_string())),
);
p.insert(
KeyString::from("hello2"),
VrlValue::Bytes(Bytes::from("world2".to_string())),
);
let result = processor.exec_mut(Value::Map(Map { values: p }));
let result = processor.exec_mut(VrlValue::Object(p));
assert!(result.is_ok());
let mut result = result.unwrap();
let p = result.as_map_mut().unwrap();
let p = result.as_object_mut().unwrap();
assert_eq!(p.len(), 1);
assert_eq!(p.get("hello"), None);
assert_eq!(p.get("hello2"), Some(&Value::String("world2".to_string())));
assert_eq!(p.get(&KeyString::from("hello")), None);
assert_eq!(
p.get(&KeyString::from("hello2")),
Some(&VrlValue::Bytes(Bytes::from("world2".to_string())))
);
}
}

View File

@@ -13,14 +13,17 @@
// limitations under the License.
use snafu::OptionExt as _;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{Error, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, Result};
use crate::error::{
Error, KeyMustBeStringSnafu, ProcessorMissingFieldSnafu, Result, ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, KEY_NAME,
};
use crate::{Processor, Value};
use crate::Processor;
pub(crate) const PROCESSOR_SIMPLE_EXTRACT: &str = "simple_extract";
@@ -74,14 +77,14 @@ impl TryFrom<&yaml_rust::yaml::Hash> for SimpleExtractProcessor {
}
impl SimpleExtractProcessor {
fn process_field(&self, val: &Value) -> Result<Value> {
fn process_field(&self, val: &VrlValue) -> Result<VrlValue> {
let mut current = val;
for key in self.key.iter() {
let Value::Map(map) = current else {
return Ok(Value::Null);
let VrlValue::Object(map) = current else {
return Ok(VrlValue::Null);
};
let Some(v) = map.get(key) else {
return Ok(Value::Null);
let Some(v) = map.get(key.as_str()) else {
return Ok(VrlValue::Null);
};
current = v;
}
@@ -98,14 +101,15 @@ impl Processor for SimpleExtractProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(v) => {
let processed = self.process_field(v)?;
let output_index = field.target_or_input_field();
val.insert(output_index.to_string(), processed)?;
val.insert(KeyString::from(output_index), processed);
}
None => {
if !self.ignore_missing {
@@ -124,11 +128,13 @@ impl Processor for SimpleExtractProcessor {
#[cfg(test)]
mod test {
use std::collections::BTreeMap;
use vrl::prelude::Bytes;
#[test]
fn test_simple_extract() {
use super::*;
use crate::{Map, Value};
let processor = SimpleExtractProcessor {
key: vec!["hello".to_string()],
@@ -136,12 +142,12 @@ mod test {
};
let result = processor
.process_field(&Value::Map(Map::one(
"hello",
Value::String("world".to_string()),
)))
.process_field(&VrlValue::Object(BTreeMap::from([(
KeyString::from("hello"),
VrlValue::Bytes(Bytes::from("world".to_string())),
)])))
.unwrap();
assert_eq!(result, Value::String("world".to_string()));
assert_eq!(result, VrlValue::Bytes(Bytes::from("world".to_string())));
}
}

View File

@@ -12,19 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use snafu::{OptionExt, ResultExt};
use urlencoding::{decode, encode};
use snafu::OptionExt;
use urlencoding::{decode_binary, encode_binary};
use vrl::prelude::Bytes;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
UrlEncodingDecodeSnafu, UrlEncodingInvalidMethodSnafu,
UrlEncodingInvalidMethodSnafu, ValueMustBeMapSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, FIELDS_NAME, FIELD_NAME,
IGNORE_MISSING_NAME, METHOD_NAME,
};
use crate::etl::value::Value;
pub(crate) const PROCESSOR_URL_ENCODING: &str = "urlencoding";
@@ -65,12 +66,12 @@ pub struct UrlEncodingProcessor {
}
impl UrlEncodingProcessor {
fn process_field(&self, val: &str) -> Result<Value> {
fn process_field(&self, val: &Bytes) -> Result<VrlValue> {
let processed = match self.method {
Method::Encode => encode(val).to_string(),
Method::Decode => decode(val).context(UrlEncodingDecodeSnafu)?.into_owned(),
Method::Encode => Bytes::from_iter(encode_binary(val).bytes()),
Method::Decode => Bytes::from(decode_binary(val).to_vec()),
};
Ok(Value::String(processed))
Ok(VrlValue::Bytes(processed))
}
}
@@ -125,16 +126,17 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
self.ignore_missing
}
fn exec_mut(&self, mut val: Value) -> Result<Value> {
fn exec_mut(&self, mut val: VrlValue) -> Result<VrlValue> {
for field in self.fields.iter() {
let index = field.input_field();
let val = val.as_object_mut().context(ValueMustBeMapSnafu)?;
match val.get(index) {
Some(Value::String(s)) => {
Some(VrlValue::Bytes(s)) => {
let result = self.process_field(s)?;
let output_index = field.target_or_input_field();
val.insert(output_index.to_string(), result)?;
val.insert(KeyString::from(output_index), result);
}
Some(Value::Null) | None => {
Some(VrlValue::Null) | None => {
if !self.ignore_missing {
return ProcessorMissingFieldSnafu {
processor: self.kind(),
@@ -159,9 +161,11 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
#[cfg(test)]
mod tests {
use vrl::prelude::Bytes;
use vrl::value::Value as VrlValue;
use crate::etl::field::Fields;
use crate::etl::processor::urlencoding::UrlEncodingProcessor;
use crate::etl::value::Value;
#[test]
fn test_decode_url() {
@@ -170,8 +174,8 @@ mod tests {
{
let processor = UrlEncodingProcessor::default();
let result = processor.process_field(encoded).unwrap();
assert_eq!(Value::String(decoded.into()), result)
let result = processor.process_field(&Bytes::from(encoded)).unwrap();
assert_eq!(VrlValue::Bytes(Bytes::from(decoded)), result)
}
{
let processor = UrlEncodingProcessor {
@@ -179,8 +183,8 @@ mod tests {
method: super::Method::Encode,
ignore_missing: false,
};
let result = processor.process_field(decoded).unwrap();
assert_eq!(Value::String(encoded.into()), result)
let result = processor.process_field(&Bytes::from(decoded)).unwrap();
assert_eq!(VrlValue::Bytes(Bytes::from(encoded)), result)
}
}
}

View File

@@ -15,19 +15,18 @@
use std::collections::BTreeMap;
use chrono_tz::Tz;
use snafu::{OptionExt, ResultExt};
use snafu::OptionExt;
use vrl::compiler::runtime::Runtime;
use vrl::compiler::{compile, Program, TargetValue};
use vrl::diagnostic::Formatter;
use vrl::prelude::{Bytes, NotNan, TimeZone};
use vrl::value::{KeyString, Kind, Secrets, Value as VrlValue};
use vrl::prelude::TimeZone;
use vrl::value::{Kind, Secrets, Value as VrlValue};
use crate::error::{
BytesToUtf8Snafu, CompileVrlSnafu, Error, ExecuteVrlSnafu, FloatNaNSnafu,
InvalidTimestampSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu, VrlReturnValueSnafu,
CompileVrlSnafu, Error, ExecuteVrlSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu,
VrlReturnValueSnafu,
};
use crate::etl::processor::yaml_string;
use crate::Value as PipelineValue;
pub(crate) const PROCESSOR_VRL: &str = "vrl";
const SOURCE: &str = "source";
@@ -62,11 +61,9 @@ impl VrlProcessor {
Ok(Self { source, program })
}
pub fn resolve(&self, m: PipelineValue) -> Result<PipelineValue> {
let pipeline_vrl = pipeline_value_to_vrl_value(m)?;
pub fn resolve(&self, value: VrlValue) -> Result<VrlValue> {
let mut target = TargetValue {
value: pipeline_vrl,
value,
metadata: VrlValue::Object(BTreeMap::new()),
secrets: Secrets::default(),
};
@@ -82,7 +79,7 @@ impl VrlProcessor {
.build()
})?;
vrl_value_to_pipeline_value(re)
Ok(re)
}
}
@@ -113,91 +110,17 @@ impl crate::etl::processor::Processor for VrlProcessor {
true
}
fn exec_mut(&self, val: PipelineValue) -> Result<PipelineValue> {
fn exec_mut(&self, val: VrlValue) -> Result<VrlValue> {
let val = self.resolve(val)?;
if let PipelineValue::Map(m) = val {
Ok(PipelineValue::Map(m.values.into()))
if let VrlValue::Object(_) = val {
Ok(val)
} else {
VrlRegexValueSnafu.fail()
}
}
}
fn pipeline_value_to_vrl_value(v: PipelineValue) -> Result<VrlValue> {
match v {
PipelineValue::Null => Ok(VrlValue::Null),
PipelineValue::Int8(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Int16(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Int32(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Int64(x) => Ok(VrlValue::Integer(x)),
PipelineValue::Uint8(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Uint16(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Uint32(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Uint64(x) => Ok(VrlValue::Integer(x as i64)),
PipelineValue::Float32(x) => NotNan::new(x as f64)
.map_err(|_| FloatNaNSnafu { input_float: x }.build())
.map(VrlValue::Float),
PipelineValue::Float64(x) => NotNan::new(x)
.map_err(|_| FloatNaNSnafu { input_float: x }.build())
.map(VrlValue::Float),
PipelineValue::Boolean(x) => Ok(VrlValue::Boolean(x)),
PipelineValue::String(x) => Ok(VrlValue::Bytes(Bytes::copy_from_slice(x.as_bytes()))),
PipelineValue::Timestamp(x) => x
.to_datetime()
.context(InvalidTimestampSnafu {
input: x.to_string(),
})
.map(VrlValue::Timestamp),
PipelineValue::Array(array) => Ok(VrlValue::Array(
array
.into_iter()
.map(pipeline_value_to_vrl_value)
.collect::<Result<Vec<_>>>()?,
)),
PipelineValue::Map(m) => {
let values = m
.values
.into_iter()
.map(|(k, v)| pipeline_value_to_vrl_value(v).map(|v| (KeyString::from(k), v)))
.collect::<Result<BTreeMap<_, _>>>()?;
Ok(VrlValue::Object(values))
}
}
}
fn vrl_value_to_pipeline_value(v: VrlValue) -> Result<PipelineValue> {
match v {
VrlValue::Bytes(bytes) => String::from_utf8(bytes.to_vec())
.context(BytesToUtf8Snafu)
.map(PipelineValue::String),
VrlValue::Regex(_) => VrlRegexValueSnafu.fail(),
VrlValue::Integer(x) => Ok(PipelineValue::Int64(x)),
VrlValue::Float(not_nan) => Ok(PipelineValue::Float64(not_nan.into_inner())),
VrlValue::Boolean(b) => Ok(PipelineValue::Boolean(b)),
VrlValue::Timestamp(date_time) => crate::etl::value::Timestamp::from_datetime(date_time)
.context(InvalidTimestampSnafu {
input: date_time.to_string(),
})
.map(PipelineValue::Timestamp),
VrlValue::Object(bm) => {
let b = bm
.into_iter()
.map(|(k, v)| vrl_value_to_pipeline_value(v).map(|v| (k.to_string(), v)))
.collect::<Result<BTreeMap<String, PipelineValue>>>()?;
Ok(PipelineValue::Map(b.into()))
}
VrlValue::Array(values) => {
let a = values
.into_iter()
.map(vrl_value_to_pipeline_value)
.collect::<Result<Vec<_>>>()?;
Ok(PipelineValue::Array(a.into()))
}
VrlValue::Null => Ok(PipelineValue::Null),
}
}
fn check_regex_output(output_kind: &Kind) -> Result<()> {
if output_kind.is_regex() {
return VrlRegexValueSnafu.fail();
@@ -223,9 +146,10 @@ fn check_regex_output(output_kind: &Kind) -> Result<()> {
#[cfg(test)]
mod tests {
use vrl::prelude::Bytes;
use vrl::value::KeyString;
use super::*;
use crate::etl::value::Timestamp;
use crate::Map;
#[test]
fn test_vrl() {
@@ -243,31 +167,27 @@ del(.user_info)
let mut n = BTreeMap::new();
n.insert(
"name".to_string(),
PipelineValue::String("certain_name".to_string()),
KeyString::from("name"),
VrlValue::Bytes(Bytes::from("certain_name")),
);
let mut m = BTreeMap::new();
m.insert(
"user_info".to_string(),
PipelineValue::Map(Map { values: n }),
);
m.insert(KeyString::from("user_info"), VrlValue::Object(n));
let re = v.resolve(PipelineValue::Map(Map { values: m }));
let re = v.resolve(VrlValue::Object(m));
assert!(re.is_ok());
let re = re.unwrap();
assert!(matches!(re, PipelineValue::Map(_)));
assert!(matches!(re, VrlValue::Object(_)));
let re = re.as_object().unwrap();
assert!(re.get("name").is_some());
let name = re.get("name").unwrap();
assert!(matches!(name.get("a").unwrap(), PipelineValue::String(x) if x == "certain_name"));
assert!(matches!(name.get("b").unwrap(), PipelineValue::String(x) if x == "certain_name"));
let name = name.as_object().unwrap();
assert!(matches!(name.get("a").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
assert!(matches!(name.get("b").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
assert!(re.get("timestamp").is_some());
let timestamp = re.get("timestamp").unwrap();
assert!(matches!(
timestamp,
PipelineValue::Timestamp(Timestamp::Nanosecond(_))
));
assert!(matches!(timestamp, VrlValue::Timestamp(_)));
}
#[test]

View File

@@ -15,16 +15,20 @@
pub mod index;
pub mod transformer;
use api::v1::value::ValueData;
use api::v1::ColumnDataType;
use chrono::Utc;
use snafu::{ensure, OptionExt};
use crate::error::{
Error, KeyMustBeStringSnafu, Result, TransformElementMustBeMapSnafu,
TransformFieldMustBeSetSnafu, TransformOnFailureInvalidValueSnafu, TransformTypeMustBeSetSnafu,
UnsupportedTypeInPipelineSnafu,
};
use crate::etl::field::Fields;
use crate::etl::processor::{yaml_bool, yaml_new_field, yaml_new_fields, yaml_string};
use crate::etl::transform::index::Index;
use crate::etl::value::{Timestamp, Value};
use crate::etl::value::{parse_str_type, parse_str_value};
const TRANSFORM_FIELD: &str = "field";
const TRANSFORM_FIELDS: &str = "fields";
@@ -124,39 +128,61 @@ impl TryFrom<&Vec<yaml_rust::Yaml>> for Transforms {
#[derive(Debug, Clone)]
pub struct Transform {
pub fields: Fields,
pub type_: Value,
pub default: Option<Value>,
pub type_: ColumnDataType,
pub default: Option<ValueData>,
pub index: Option<Index>,
pub tag: bool,
pub on_failure: Option<OnFailure>,
}
impl Default for Transform {
fn default() -> Self {
Transform {
fields: Fields::default(),
type_: Value::Null,
default: None,
index: None,
tag: false,
on_failure: None,
}
}
}
// valid types
// ColumnDataType::Int8
// ColumnDataType::Int16
// ColumnDataType::Int32
// ColumnDataType::Int64
// ColumnDataType::Uint8
// ColumnDataType::Uint16
// ColumnDataType::Uint32
// ColumnDataType::Uint64
// ColumnDataType::Float32
// ColumnDataType::Float64
// ColumnDataType::Boolean
// ColumnDataType::String
// ColumnDataType::TimestampNanosecond
// ColumnDataType::TimestampMicrosecond
// ColumnDataType::TimestampMillisecond
// ColumnDataType::TimestampSecond
// ColumnDataType::Binary
impl Transform {
pub(crate) fn get_default(&self) -> Option<&Value> {
pub(crate) fn get_default(&self) -> Option<&ValueData> {
self.default.as_ref()
}
pub(crate) fn get_type_matched_default_val(&self) -> &Value {
&self.type_
pub(crate) fn get_type_matched_default_val(&self) -> Result<ValueData> {
get_default_for_type(&self.type_)
}
pub(crate) fn get_default_value_when_data_is_none(&self) -> Option<Value> {
if matches!(self.type_, Value::Timestamp(_)) && self.index.is_some_and(|i| i == Index::Time)
{
return Some(Value::Timestamp(Timestamp::default()));
pub(crate) fn get_default_value_when_data_is_none(&self) -> Option<ValueData> {
if is_timestamp_type(&self.type_) && self.index.is_some_and(|i| i == Index::Time) {
let now = Utc::now();
match self.type_ {
ColumnDataType::TimestampSecond => {
return Some(ValueData::TimestampSecondValue(now.timestamp()));
}
ColumnDataType::TimestampMillisecond => {
return Some(ValueData::TimestampMillisecondValue(now.timestamp_millis()));
}
ColumnDataType::TimestampMicrosecond => {
return Some(ValueData::TimestampMicrosecondValue(now.timestamp_micros()));
}
ColumnDataType::TimestampNanosecond => {
return Some(ValueData::TimestampNanosecondValue(
now.timestamp_nanos_opt()?,
));
}
_ => {}
}
}
None
}
@@ -166,17 +192,57 @@ impl Transform {
}
}
fn is_timestamp_type(ty: &ColumnDataType) -> bool {
matches!(
ty,
ColumnDataType::TimestampSecond
| ColumnDataType::TimestampMillisecond
| ColumnDataType::TimestampMicrosecond
| ColumnDataType::TimestampNanosecond
)
}
fn get_default_for_type(ty: &ColumnDataType) -> Result<ValueData> {
let v = match ty {
ColumnDataType::Boolean => ValueData::BoolValue(false),
ColumnDataType::Int8 => ValueData::I8Value(0),
ColumnDataType::Int16 => ValueData::I16Value(0),
ColumnDataType::Int32 => ValueData::I32Value(0),
ColumnDataType::Int64 => ValueData::I64Value(0),
ColumnDataType::Uint8 => ValueData::U8Value(0),
ColumnDataType::Uint16 => ValueData::U16Value(0),
ColumnDataType::Uint32 => ValueData::U32Value(0),
ColumnDataType::Uint64 => ValueData::U64Value(0),
ColumnDataType::Float32 => ValueData::F32Value(0.0),
ColumnDataType::Float64 => ValueData::F64Value(0.0),
ColumnDataType::Binary => ValueData::BinaryValue(jsonb::Value::Null.to_vec()),
ColumnDataType::String => ValueData::StringValue(String::new()),
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(0),
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(0),
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(0),
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(0),
_ => UnsupportedTypeInPipelineSnafu {
ty: ty.as_str_name(),
}
.fail()?,
};
Ok(v)
}
impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
type Error = Error;
fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self> {
let mut fields = Fields::default();
let mut type_ = Value::Null;
let mut default = None;
let mut index = None;
let mut tag = false;
let mut on_failure = None;
let mut type_ = None;
for (k, v) in hash {
let key = k
.as_str()
@@ -192,7 +258,7 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
TRANSFORM_TYPE => {
let t = yaml_string(v, TRANSFORM_TYPE)?;
type_ = Value::parse_str_type(&t)?;
type_ = Some(parse_str_type(&t)?);
}
TRANSFORM_INDEX => {
@@ -205,7 +271,17 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
}
TRANSFORM_DEFAULT => {
default = Some(Value::try_from(v)?);
default = match v {
yaml_rust::Yaml::Real(r) => Some(r.clone()),
yaml_rust::Yaml::Integer(i) => Some(i.to_string()),
yaml_rust::Yaml::String(s) => Some(s.clone()),
yaml_rust::Yaml::Boolean(b) => Some(b.to_string()),
yaml_rust::Yaml::Array(_)
| yaml_rust::Yaml::Hash(_)
| yaml_rust::Yaml::Alias(_)
| yaml_rust::Yaml::Null
| yaml_rust::Yaml::BadValue => None,
};
}
TRANSFORM_ON_FAILURE => {
@@ -219,23 +295,14 @@ impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
// ensure fields and type
ensure!(!fields.is_empty(), TransformFieldMustBeSetSnafu);
ensure!(
type_ != Value::Null,
TransformTypeMustBeSetSnafu {
fields: format!("{:?}", fields)
}
);
let type_ = type_.context(TransformTypeMustBeSetSnafu {
fields: format!("{:?}", fields),
})?;
let final_default = if let Some(default_value) = default {
match default_value {
// if default is not set, then it will be regarded as default null
Value::Null => None,
_ => {
let target = type_.parse_str_value(default_value.to_str_value().as_str())?;
on_failure = Some(OnFailure::Default);
Some(target)
}
}
let target = parse_str_value(&type_, &default_value)?;
on_failure = Some(OnFailure::Default);
Some(target)
} else {
None
};

View File

@@ -14,6 +14,7 @@
pub mod coerce;
use std::borrow::Cow;
use std::collections::{BTreeMap, HashSet};
use std::sync::Arc;
@@ -24,26 +25,27 @@ use api::v1::value::ValueData;
use api::v1::{ColumnDataType, ColumnDataTypeExtension, JsonTypeExtension, SemanticType};
use coerce::{coerce_columns, coerce_value};
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
use common_telemetry::warn;
use greptime_proto::v1::{ColumnSchema, Row, Rows, Value as GreptimeValue};
use itertools::Itertools;
use jsonb::Number;
use once_cell::sync::OnceCell;
use serde_json::Number;
use session::context::Channel;
use snafu::OptionExt;
use vrl::prelude::VrlValueConvert;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
IdentifyPipelineColumnTypeMismatchSnafu, ReachedMaxNestedLevelsSnafu, Result,
TimeIndexMustBeNonNullSnafu, TransformColumnNameMustBeUniqueSnafu,
TransformMultipleTimestampIndexSnafu, TransformTimestampIndexCountSnafu,
UnsupportedNumberTypeSnafu, ValueMustBeMapSnafu,
IdentifyPipelineColumnTypeMismatchSnafu, InvalidTimestampSnafu, ReachedMaxNestedLevelsSnafu,
Result, TimeIndexMustBeNonNullSnafu, TransformColumnNameMustBeUniqueSnafu,
TransformMultipleTimestampIndexSnafu, TransformTimestampIndexCountSnafu, ValueMustBeMapSnafu,
};
use crate::etl::ctx_req::ContextOpt;
use crate::etl::field::{Field, Fields};
use crate::etl::transform::index::Index;
use crate::etl::transform::{Transform, Transforms};
use crate::etl::value::{Timestamp, Value};
use crate::etl::PipelineDocVersion;
use crate::{unwrap_or_continue_if_err, Map, PipelineContext};
use crate::{unwrap_or_continue_if_err, PipelineContext};
const DEFAULT_GREPTIME_TIMESTAMP_COLUMN: &str = "greptime_timestamp";
const DEFAULT_MAX_NESTED_LEVELS_FOR_JSON_FLATTENING: usize = 10;
@@ -133,7 +135,7 @@ impl GreptimePipelineParams {
impl GreptimeTransformer {
/// Add a default timestamp column to the transforms
fn add_greptime_timestamp_column(transforms: &mut Transforms) {
let type_ = Value::Timestamp(Timestamp::Nanosecond(0));
let type_ = ColumnDataType::TimestampNanosecond;
let default = None;
let transform = Transform {
@@ -220,7 +222,7 @@ impl GreptimeTransformer {
pub fn transform_mut(
&self,
pipeline_map: &mut Value,
pipeline_map: &mut VrlValue,
is_v1: bool,
) -> Result<Vec<GreptimeValue>> {
let mut values = vec![GreptimeValue { value_data: None }; self.schema.len()];
@@ -229,6 +231,7 @@ impl GreptimeTransformer {
for field in transform.fields.iter() {
let column_name = field.input_field();
let pipeline_map = pipeline_map.as_object_mut().context(ValueMustBeMapSnafu)?;
// let keep us `get` here to be compatible with v1
match pipeline_map.get(column_name) {
Some(v) => {
@@ -240,11 +243,8 @@ impl GreptimeTransformer {
let value_data = match transform.on_failure {
Some(crate::etl::transform::OnFailure::Default) => {
match transform.get_default() {
Some(default) => coerce_value(default, transform)?,
None => match transform.get_default_value_when_data_is_none() {
Some(default) => coerce_value(&default, transform)?,
None => None,
},
Some(default) => Some(default.clone()),
None => transform.get_default_value_when_data_is_none(),
}
}
Some(crate::etl::transform::OnFailure::Ignore) => None,
@@ -349,63 +349,22 @@ fn resolve_schema(
}
}
fn resolve_number_schema(
n: Number,
column_name: String,
index: Option<usize>,
row: &mut Vec<GreptimeValue>,
schema_info: &mut SchemaInfo,
) -> Result<()> {
let (value, datatype, semantic_type) = if n.is_i64() {
(
ValueData::I64Value(n.as_i64().unwrap()),
ColumnDataType::Int64 as i32,
SemanticType::Field as i32,
)
} else if n.is_u64() {
(
ValueData::U64Value(n.as_u64().unwrap()),
ColumnDataType::Uint64 as i32,
SemanticType::Field as i32,
)
} else if n.is_f64() {
(
ValueData::F64Value(n.as_f64().unwrap()),
ColumnDataType::Float64 as i32,
SemanticType::Field as i32,
)
} else {
return UnsupportedNumberTypeSnafu { value: n }.fail();
};
resolve_schema(
index,
value,
ColumnSchema {
column_name,
datatype,
semantic_type,
datatype_extension: None,
options: None,
},
row,
schema_info,
)
}
fn calc_ts(p_ctx: &PipelineContext, values: &Value) -> Result<Option<ValueData>> {
fn calc_ts(p_ctx: &PipelineContext, values: &VrlValue) -> Result<Option<ValueData>> {
match p_ctx.channel {
Channel::Prometheus => Ok(Some(ValueData::TimestampMillisecondValue(
values
.get(GREPTIME_TIMESTAMP)
.and_then(|v| v.as_i64())
.unwrap_or_default(),
))),
Channel::Prometheus => {
let ts = values
.as_object()
.and_then(|m| m.get(GREPTIME_TIMESTAMP))
.and_then(|ts| ts.try_into_i64().ok())
.unwrap_or_default();
Ok(Some(ValueData::TimestampMillisecondValue(ts)))
}
_ => {
let custom_ts = p_ctx.pipeline_definition.get_custom_ts();
match custom_ts {
Some(ts) => {
let ts_field = values.get(ts.get_column_name());
Some(ts.get_timestamp(ts_field)).transpose()
let ts_field = values.as_object().and_then(|m| m.get(ts.get_column_name()));
Some(ts.get_timestamp_value(ts_field)).transpose()
}
None => Ok(Some(ValueData::TimestampNanosecondValue(
chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default(),
@@ -417,7 +376,7 @@ fn calc_ts(p_ctx: &PipelineContext, values: &Value) -> Result<Option<ValueData>>
pub(crate) fn values_to_row(
schema_info: &mut SchemaInfo,
values: Value,
values: VrlValue,
pipeline_ctx: &PipelineContext<'_>,
row: Option<Vec<GreptimeValue>>,
need_calc_ts: bool,
@@ -439,14 +398,20 @@ pub(crate) fn values_to_row(
.as_ref()
.map_or(DEFAULT_GREPTIME_TIMESTAMP_COLUMN, |ts| ts.get_column_name());
let values = values.into_map().context(ValueMustBeMapSnafu)?;
let values = values.into_object().context(ValueMustBeMapSnafu)?;
for (column_name, value) in values {
if column_name == ts_column_name {
if column_name.as_str() == ts_column_name {
continue;
}
resolve_value(value, column_name, &mut row, schema_info, pipeline_ctx)?;
resolve_value(
value,
column_name.into(),
&mut row,
schema_info,
pipeline_ctx,
)?;
}
Ok(Row { values: row })
}
@@ -460,7 +425,7 @@ fn decide_semantic(p_ctx: &PipelineContext, column_name: &str) -> i32 {
}
fn resolve_value(
value: Value,
value: VrlValue,
column_name: String,
row: &mut Vec<GreptimeValue>,
schema_info: &mut SchemaInfo,
@@ -486,27 +451,23 @@ fn resolve_value(
};
match value {
Value::Null => {}
VrlValue::Null => {}
Value::Int8(_) | Value::Int16(_) | Value::Int32(_) | Value::Int64(_) => {
VrlValue::Integer(v) => {
// safe unwrap after type matched
let v = value.as_i64().unwrap();
resolve_simple_type(ValueData::I64Value(v), column_name, ColumnDataType::Int64)?;
}
Value::Uint8(_) | Value::Uint16(_) | Value::Uint32(_) | Value::Uint64(_) => {
VrlValue::Float(v) => {
// safe unwrap after type matched
let v = value.as_u64().unwrap();
resolve_simple_type(ValueData::U64Value(v), column_name, ColumnDataType::Uint64)?;
resolve_simple_type(
ValueData::F64Value(v.into()),
column_name,
ColumnDataType::Float64,
)?;
}
Value::Float32(_) | Value::Float64(_) => {
// safe unwrap after type matched
let v = value.as_f64().unwrap();
resolve_simple_type(ValueData::F64Value(v), column_name, ColumnDataType::Float64)?;
}
Value::Boolean(v) => {
VrlValue::Boolean(v) => {
resolve_simple_type(
ValueData::BoolValue(v),
column_name,
@@ -514,15 +475,30 @@ fn resolve_value(
)?;
}
Value::String(v) => {
VrlValue::Bytes(v) => {
resolve_simple_type(
ValueData::StringValue(v),
ValueData::StringValue(String::from_utf8_lossy_owned(v.to_vec())),
column_name,
ColumnDataType::String,
)?;
}
Value::Timestamp(Timestamp::Nanosecond(ns)) => {
VrlValue::Regex(v) => {
warn!(
"Persisting regex value in the table, this should not happen, column_name: {}",
column_name
);
resolve_simple_type(
ValueData::StringValue(v.to_string()),
column_name,
ColumnDataType::String,
)?;
}
VrlValue::Timestamp(ts) => {
let ns = ts.timestamp_nanos_opt().context(InvalidTimestampSnafu {
input: ts.to_rfc3339(),
})?;
resolve_simple_type(
ValueData::TimestampNanosecondValue(ns),
column_name,
@@ -530,32 +506,8 @@ fn resolve_value(
)?;
}
Value::Timestamp(Timestamp::Microsecond(us)) => {
resolve_simple_type(
ValueData::TimestampMicrosecondValue(us),
column_name,
ColumnDataType::TimestampMicrosecond,
)?;
}
Value::Timestamp(Timestamp::Millisecond(ms)) => {
resolve_simple_type(
ValueData::TimestampMillisecondValue(ms),
column_name,
ColumnDataType::TimestampMillisecond,
)?;
}
Value::Timestamp(Timestamp::Second(s)) => {
resolve_simple_type(
ValueData::TimestampSecondValue(s),
column_name,
ColumnDataType::TimestampSecond,
)?;
}
Value::Array(_) | Value::Map(_) => {
let data: jsonb::Value = value.into();
VrlValue::Array(_) | VrlValue::Object(_) => {
let data = vrl_value_to_jsonb_value(&value);
resolve_schema(
index,
ValueData::BinaryValue(data.to_vec()),
@@ -576,8 +528,32 @@ fn resolve_value(
Ok(())
}
fn vrl_value_to_jsonb_value<'a>(value: &'a VrlValue) -> jsonb::Value<'a> {
match value {
VrlValue::Bytes(bytes) => jsonb::Value::String(String::from_utf8_lossy(bytes)),
VrlValue::Regex(value_regex) => jsonb::Value::String(Cow::Borrowed(value_regex.as_str())),
VrlValue::Integer(i) => jsonb::Value::Number(Number::Int64(*i)),
VrlValue::Float(not_nan) => jsonb::Value::Number(Number::Float64(not_nan.into_inner())),
VrlValue::Boolean(b) => jsonb::Value::Bool(*b),
VrlValue::Timestamp(date_time) => jsonb::Value::String(Cow::Owned(date_time.to_rfc3339())),
VrlValue::Object(btree_map) => jsonb::Value::Object(
btree_map
.iter()
.map(|(key, value)| (key.to_string(), vrl_value_to_jsonb_value(value)))
.collect(),
),
VrlValue::Array(values) => jsonb::Value::Array(
values
.iter()
.map(|value| vrl_value_to_jsonb_value(value))
.collect(),
),
VrlValue::Null => jsonb::Value::Null,
}
}
fn identity_pipeline_inner(
pipeline_maps: Vec<Value>,
pipeline_maps: Vec<VrlValue>,
pipeline_ctx: &PipelineContext<'_>,
) -> Result<(SchemaInfo, HashMap<ContextOpt, Vec<Row>>)> {
let skip_error = pipeline_ctx.pipeline_param.skip_error();
@@ -587,7 +563,7 @@ fn identity_pipeline_inner(
// set time index column schema first
schema_info.schema.push(ColumnSchema {
column_name: custom_ts
.map(|ts| ts.get_column_name().clone())
.map(|ts| ts.get_column_name().to_string())
.unwrap_or_else(|| DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string()),
datatype: custom_ts.map(|c| c.get_datatype()).unwrap_or_else(|| {
if pipeline_ctx.channel == Channel::Prometheus {
@@ -642,7 +618,7 @@ fn identity_pipeline_inner(
/// 4. The pipeline will return an error if the same column datatype is mismatched
/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema.
pub fn identity_pipeline(
array: Vec<Value>,
array: Vec<VrlValue>,
table: Option<Arc<table::Table>>,
pipeline_ctx: &PipelineContext<'_>,
) -> Result<HashMap<ContextOpt, Rows>> {
@@ -690,22 +666,22 @@ pub fn identity_pipeline(
///
/// The `max_nested_levels` parameter is used to limit the nested levels of the JSON object.
/// The error will be returned if the nested levels is greater than the `max_nested_levels`.
pub fn flatten_object(object: Value, max_nested_levels: usize) -> Result<Value> {
pub fn flatten_object(object: VrlValue, max_nested_levels: usize) -> Result<VrlValue> {
let mut flattened = BTreeMap::new();
let object = object.into_map().context(ValueMustBeMapSnafu)?;
let object = object.into_object().context(ValueMustBeMapSnafu)?;
if !object.is_empty() {
// it will use recursion to flatten the object.
do_flatten_object(&mut flattened, None, object, 1, max_nested_levels)?;
}
Ok(Value::Map(Map { values: flattened }))
Ok(VrlValue::Object(flattened))
}
fn do_flatten_object(
dest: &mut BTreeMap<String, Value>,
dest: &mut BTreeMap<KeyString, VrlValue>,
base: Option<&str>,
object: BTreeMap<String, Value>,
object: BTreeMap<KeyString, VrlValue>,
current_level: usize,
max_nested_levels: usize,
) -> Result<()> {
@@ -715,14 +691,17 @@ fn do_flatten_object(
}
for (key, value) in object {
let new_key = base.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
let new_key = base.map_or_else(
|| key.clone(),
|base_key| format!("{base_key}.{key}").into(),
);
match value {
Value::Map(object) => {
VrlValue::Object(object) => {
do_flatten_object(
dest,
Some(&new_key),
object.values,
object,
current_level + 1,
max_nested_levels,
)?;
@@ -742,7 +721,6 @@ mod tests {
use api::v1::SemanticType;
use super::*;
use crate::etl::{json_array_to_map, json_to_map};
use crate::{identity_pipeline, PipelineDefinition};
#[test]
@@ -754,7 +732,7 @@ mod tests {
Channel::Unknown,
);
{
let array = vec![
let array = [
serde_json::json!({
"woshinull": null,
"name": "Alice",
@@ -774,7 +752,7 @@ mod tests {
"gaga": "gaga"
}),
];
let array = json_array_to_map(array).unwrap();
let array = array.iter().map(|v| v.into()).collect();
let rows = identity_pipeline(array, None, &pipeline_ctx);
assert!(rows.is_err());
assert_eq!(
@@ -783,7 +761,7 @@ mod tests {
);
}
{
let array = vec![
let array = [
serde_json::json!({
"woshinull": null,
"name": "Alice",
@@ -803,7 +781,8 @@ mod tests {
"gaga": "gaga"
}),
];
let rows = identity_pipeline(json_array_to_map(array).unwrap(), None, &pipeline_ctx);
let array = array.iter().map(|v| v.into()).collect();
let rows = identity_pipeline(array, None, &pipeline_ctx);
assert!(rows.is_err());
assert_eq!(
rows.err().unwrap().to_string(),
@@ -811,7 +790,7 @@ mod tests {
);
}
{
let array = vec![
let array = [
serde_json::json!({
"woshinull": null,
"name": "Alice",
@@ -831,7 +810,8 @@ mod tests {
"gaga": "gaga"
}),
];
let rows = identity_pipeline(json_array_to_map(array).unwrap(), None, &pipeline_ctx);
let array = array.iter().map(|v| v.into()).collect();
let rows = identity_pipeline(array, None, &pipeline_ctx);
assert!(rows.is_ok());
let mut rows = rows.unwrap();
assert!(rows.len() == 1);
@@ -842,7 +822,7 @@ mod tests {
assert_eq!(8, rows.rows[1].values.len());
}
{
let array = vec![
let array = [
serde_json::json!({
"woshinull": null,
"name": "Alice",
@@ -864,22 +844,23 @@ mod tests {
];
let tag_column_names = ["name".to_string(), "address".to_string()];
let rows = identity_pipeline_inner(json_array_to_map(array).unwrap(), &pipeline_ctx)
.map(|(mut schema, mut rows)| {
for name in tag_column_names {
if let Some(index) = schema.index.get(&name) {
schema.schema[*index].semantic_type = SemanticType::Tag as i32;
let rows =
identity_pipeline_inner(array.iter().map(|v| v.into()).collect(), &pipeline_ctx)
.map(|(mut schema, mut rows)| {
for name in tag_column_names {
if let Some(index) = schema.index.get(&name) {
schema.schema[*index].semantic_type = SemanticType::Tag as i32;
}
}
}
assert!(rows.len() == 1);
let rows = rows.remove(&ContextOpt::default()).unwrap();
assert!(rows.len() == 1);
let rows = rows.remove(&ContextOpt::default()).unwrap();
Rows {
schema: schema.schema,
rows,
}
});
Rows {
schema: schema.schema,
rows,
}
});
assert!(rows.is_ok());
let rows = rows.unwrap();
@@ -976,8 +957,8 @@ mod tests {
];
for (input, max_depth, expected) in test_cases {
let input = json_to_map(input).unwrap();
let expected = expected.map(|e| json_to_map(e).unwrap());
let input = input.into();
let expected = expected.map(|e| e.into());
let flattened_object = flatten_object(input, max_depth).ok();
assert_eq!(flattened_object, expected);

View File

@@ -18,58 +18,17 @@ use api::v1::{ColumnDataTypeExtension, ColumnOptions, JsonTypeExtension};
use datatypes::schema::{FulltextOptions, SkippingIndexOptions};
use greptime_proto::v1::value::ValueData;
use greptime_proto::v1::{ColumnDataType, ColumnSchema, SemanticType};
use snafu::ResultExt;
use snafu::{OptionExt, ResultExt};
use vrl::value::Value as VrlValue;
use crate::error::{
CoerceIncompatibleTypesSnafu, CoerceJsonTypeToSnafu, CoerceStringToTypeSnafu,
CoerceTypeToJsonSnafu, CoerceUnsupportedEpochTypeSnafu, CoerceUnsupportedNullTypeSnafu,
CoerceUnsupportedNullTypeToSnafu, ColumnOptionsSnafu, Error, Result,
CoerceTypeToJsonSnafu, CoerceUnsupportedEpochTypeSnafu, ColumnOptionsSnafu,
InvalidTimestampSnafu, Result, UnsupportedTypeInPipelineSnafu, VrlRegexValueSnafu,
};
use crate::etl::transform::index::Index;
use crate::etl::transform::transformer::greptime::vrl_value_to_jsonb_value;
use crate::etl::transform::{OnFailure, Transform};
use crate::etl::value::{Timestamp, Value};
impl TryFrom<Value> for ValueData {
type Error = Error;
fn try_from(value: Value) -> Result<Self> {
match value {
Value::Null => CoerceUnsupportedNullTypeSnafu.fail(),
Value::Int8(v) => Ok(ValueData::I32Value(v as i32)),
Value::Int16(v) => Ok(ValueData::I32Value(v as i32)),
Value::Int32(v) => Ok(ValueData::I32Value(v)),
Value::Int64(v) => Ok(ValueData::I64Value(v)),
Value::Uint8(v) => Ok(ValueData::U32Value(v as u32)),
Value::Uint16(v) => Ok(ValueData::U32Value(v as u32)),
Value::Uint32(v) => Ok(ValueData::U32Value(v)),
Value::Uint64(v) => Ok(ValueData::U64Value(v)),
Value::Float32(v) => Ok(ValueData::F32Value(v)),
Value::Float64(v) => Ok(ValueData::F64Value(v)),
Value::Boolean(v) => Ok(ValueData::BoolValue(v)),
Value::String(v) => Ok(ValueData::StringValue(v)),
Value::Timestamp(Timestamp::Nanosecond(ns)) => {
Ok(ValueData::TimestampNanosecondValue(ns))
}
Value::Timestamp(Timestamp::Microsecond(us)) => {
Ok(ValueData::TimestampMicrosecondValue(us))
}
Value::Timestamp(Timestamp::Millisecond(ms)) => {
Ok(ValueData::TimestampMillisecondValue(ms))
}
Value::Timestamp(Timestamp::Second(s)) => Ok(ValueData::TimestampSecondValue(s)),
Value::Array(_) | Value::Map(_) => {
let data: jsonb::Value = value.into();
Ok(ValueData::BinaryValue(data.to_vec()))
}
}
}
}
pub(crate) fn coerce_columns(transform: &Transform) -> Result<Vec<ColumnSchema>> {
let mut columns = Vec::new();
@@ -77,15 +36,21 @@ pub(crate) fn coerce_columns(transform: &Transform) -> Result<Vec<ColumnSchema>>
for field in transform.fields.iter() {
let column_name = field.target_or_input_field().to_string();
let (datatype, datatype_extension) = coerce_type(transform)?;
let ext = if matches!(transform.type_, ColumnDataType::Binary) {
Some(ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
})
} else {
None
};
let semantic_type = coerce_semantic_type(transform) as i32;
let column = ColumnSchema {
column_name,
datatype: datatype as i32,
datatype: transform.type_ as i32,
semantic_type,
datatype_extension,
datatype_extension: ext,
options: coerce_options(transform)?,
};
columns.push(column);
@@ -123,113 +88,60 @@ fn coerce_options(transform: &Transform) -> Result<Option<ColumnOptions>> {
}
}
fn coerce_type(transform: &Transform) -> Result<(ColumnDataType, Option<ColumnDataTypeExtension>)> {
match transform.type_ {
Value::Int8(_) => Ok((ColumnDataType::Int8, None)),
Value::Int16(_) => Ok((ColumnDataType::Int16, None)),
Value::Int32(_) => Ok((ColumnDataType::Int32, None)),
Value::Int64(_) => Ok((ColumnDataType::Int64, None)),
Value::Uint8(_) => Ok((ColumnDataType::Uint8, None)),
Value::Uint16(_) => Ok((ColumnDataType::Uint16, None)),
Value::Uint32(_) => Ok((ColumnDataType::Uint32, None)),
Value::Uint64(_) => Ok((ColumnDataType::Uint64, None)),
Value::Float32(_) => Ok((ColumnDataType::Float32, None)),
Value::Float64(_) => Ok((ColumnDataType::Float64, None)),
Value::Boolean(_) => Ok((ColumnDataType::Boolean, None)),
Value::String(_) => Ok((ColumnDataType::String, None)),
Value::Timestamp(Timestamp::Nanosecond(_)) => {
Ok((ColumnDataType::TimestampNanosecond, None))
}
Value::Timestamp(Timestamp::Microsecond(_)) => {
Ok((ColumnDataType::TimestampMicrosecond, None))
}
Value::Timestamp(Timestamp::Millisecond(_)) => {
Ok((ColumnDataType::TimestampMillisecond, None))
}
Value::Timestamp(Timestamp::Second(_)) => Ok((ColumnDataType::TimestampSecond, None)),
Value::Array(_) | Value::Map(_) => Ok((
ColumnDataType::Binary,
Some(ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
}),
)),
Value::Null => CoerceUnsupportedNullTypeToSnafu {
ty: transform.type_.to_str_type(),
}
.fail(),
}
}
pub(crate) fn coerce_value(val: &Value, transform: &Transform) -> Result<Option<ValueData>> {
pub(crate) fn coerce_value(val: &VrlValue, transform: &Transform) -> Result<Option<ValueData>> {
match val {
Value::Null => Ok(None),
Value::Int8(n) => coerce_i64_value(*n as i64, transform),
Value::Int16(n) => coerce_i64_value(*n as i64, transform),
Value::Int32(n) => coerce_i64_value(*n as i64, transform),
Value::Int64(n) => coerce_i64_value(*n, transform),
Value::Uint8(n) => coerce_u64_value(*n as u64, transform),
Value::Uint16(n) => coerce_u64_value(*n as u64, transform),
Value::Uint32(n) => coerce_u64_value(*n as u64, transform),
Value::Uint64(n) => coerce_u64_value(*n, transform),
Value::Float32(n) => coerce_f64_value(*n as f64, transform),
Value::Float64(n) => coerce_f64_value(*n, transform),
Value::Boolean(b) => coerce_bool_value(*b, transform),
Value::String(s) => coerce_string_value(s, transform),
Value::Timestamp(input_timestamp) => match &transform.type_ {
Value::Timestamp(target_timestamp) => match target_timestamp {
Timestamp::Nanosecond(_) => Ok(Some(ValueData::TimestampNanosecondValue(
input_timestamp.timestamp_nanos(),
))),
Timestamp::Microsecond(_) => Ok(Some(ValueData::TimestampMicrosecondValue(
input_timestamp.timestamp_micros(),
))),
Timestamp::Millisecond(_) => Ok(Some(ValueData::TimestampMillisecondValue(
input_timestamp.timestamp_millis(),
))),
Timestamp::Second(_) => Ok(Some(ValueData::TimestampSecondValue(
input_timestamp.timestamp(),
))),
},
VrlValue::Null => Ok(None),
VrlValue::Integer(n) => coerce_i64_value(*n, transform),
VrlValue::Float(n) => coerce_f64_value(n.into_inner(), transform),
VrlValue::Boolean(b) => coerce_bool_value(*b, transform),
VrlValue::Bytes(b) => coerce_string_value(String::from_utf8_lossy(b).as_ref(), transform),
VrlValue::Timestamp(ts) => match transform.type_ {
ColumnDataType::TimestampNanosecond => Ok(Some(ValueData::TimestampNanosecondValue(
ts.timestamp_nanos_opt().context(InvalidTimestampSnafu {
input: ts.to_rfc3339(),
})?,
))),
ColumnDataType::TimestampMicrosecond => Ok(Some(ValueData::TimestampMicrosecondValue(
ts.timestamp_micros(),
))),
ColumnDataType::TimestampMillisecond => Ok(Some(ValueData::TimestampMillisecondValue(
ts.timestamp_millis(),
))),
ColumnDataType::TimestampSecond => {
Ok(Some(ValueData::TimestampSecondValue(ts.timestamp())))
}
_ => CoerceIncompatibleTypesSnafu {
msg: "Timestamp can only be coerced to another type",
}
.fail(),
},
Value::Array(_) | Value::Map(_) => coerce_json_value(val, transform),
VrlValue::Array(_) | VrlValue::Object(_) => coerce_json_value(val, transform),
VrlValue::Regex(_) => VrlRegexValueSnafu.fail(),
}
}
fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>> {
let val = match transform.type_ {
Value::Int8(_) => ValueData::I8Value(b as i32),
Value::Int16(_) => ValueData::I16Value(b as i32),
Value::Int32(_) => ValueData::I32Value(b as i32),
Value::Int64(_) => ValueData::I64Value(b as i64),
ColumnDataType::Int8 => ValueData::I8Value(b as i32),
ColumnDataType::Int16 => ValueData::I16Value(b as i32),
ColumnDataType::Int32 => ValueData::I32Value(b as i32),
ColumnDataType::Int64 => ValueData::I64Value(b as i64),
Value::Uint8(_) => ValueData::U8Value(b as u32),
Value::Uint16(_) => ValueData::U16Value(b as u32),
Value::Uint32(_) => ValueData::U32Value(b as u32),
Value::Uint64(_) => ValueData::U64Value(b as u64),
ColumnDataType::Uint8 => ValueData::U8Value(b as u32),
ColumnDataType::Uint16 => ValueData::U16Value(b as u32),
ColumnDataType::Uint32 => ValueData::U32Value(b as u32),
ColumnDataType::Uint64 => ValueData::U64Value(b as u64),
Value::Float32(_) => ValueData::F32Value(if b { 1.0 } else { 0.0 }),
Value::Float64(_) => ValueData::F64Value(if b { 1.0 } else { 0.0 }),
ColumnDataType::Float32 => ValueData::F32Value(if b { 1.0 } else { 0.0 }),
ColumnDataType::Float64 => ValueData::F64Value(if b { 1.0 } else { 0.0 }),
Value::Boolean(_) => ValueData::BoolValue(b),
Value::String(_) => ValueData::StringValue(b.to_string()),
ColumnDataType::Boolean => ValueData::BoolValue(b),
ColumnDataType::String => ValueData::StringValue(b.to_string()),
Value::Timestamp(_) => match transform.on_failure {
ColumnDataType::TimestampNanosecond
| ColumnDataType::TimestampMicrosecond
| ColumnDataType::TimestampMillisecond
| ColumnDataType::TimestampSecond => match transform.on_failure {
Some(OnFailure::Ignore) => return Ok(None),
Some(OnFailure::Default) => {
return CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail();
@@ -239,14 +151,19 @@ fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>
}
},
Value::Array(_) | Value::Map(_) => {
ColumnDataType::Binary => {
return CoerceJsonTypeToSnafu {
ty: transform.type_.to_str_type(),
ty: transform.type_.as_str_name(),
}
.fail()
}
Value::Null => return Ok(None),
_ => {
return UnsupportedTypeInPipelineSnafu {
ty: transform.type_.as_str_name(),
}
.fail()
}
};
Ok(Some(val))
@@ -254,37 +171,35 @@ fn coerce_bool_value(b: bool, transform: &Transform) -> Result<Option<ValueData>
fn coerce_i64_value(n: i64, transform: &Transform) -> Result<Option<ValueData>> {
let val = match &transform.type_ {
Value::Int8(_) => ValueData::I8Value(n as i32),
Value::Int16(_) => ValueData::I16Value(n as i32),
Value::Int32(_) => ValueData::I32Value(n as i32),
Value::Int64(_) => ValueData::I64Value(n),
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
ColumnDataType::Int64 => ValueData::I64Value(n),
Value::Uint8(_) => ValueData::U8Value(n as u32),
Value::Uint16(_) => ValueData::U16Value(n as u32),
Value::Uint32(_) => ValueData::U32Value(n as u32),
Value::Uint64(_) => ValueData::U64Value(n as u64),
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
ColumnDataType::Uint64 => ValueData::U64Value(n as u64),
Value::Float32(_) => ValueData::F32Value(n as f32),
Value::Float64(_) => ValueData::F64Value(n as f64),
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
ColumnDataType::Float64 => ValueData::F64Value(n as f64),
Value::Boolean(_) => ValueData::BoolValue(n != 0),
Value::String(_) => ValueData::StringValue(n.to_string()),
ColumnDataType::Boolean => ValueData::BoolValue(n != 0),
ColumnDataType::String => ValueData::StringValue(n.to_string()),
Value::Timestamp(unit) => match unit {
Timestamp::Nanosecond(_) => ValueData::TimestampNanosecondValue(n),
Timestamp::Microsecond(_) => ValueData::TimestampMicrosecondValue(n),
Timestamp::Millisecond(_) => ValueData::TimestampMillisecondValue(n),
Timestamp::Second(_) => ValueData::TimestampSecondValue(n),
},
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(n),
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(n),
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(n),
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(n),
Value::Array(_) | Value::Map(_) => {
ColumnDataType::Binary => {
return CoerceJsonTypeToSnafu {
ty: transform.type_.to_str_type(),
ty: transform.type_.as_str_name(),
}
.fail()
}
Value::Null => return Ok(None),
_ => return Ok(None),
};
Ok(Some(val))
@@ -292,37 +207,35 @@ fn coerce_i64_value(n: i64, transform: &Transform) -> Result<Option<ValueData>>
fn coerce_u64_value(n: u64, transform: &Transform) -> Result<Option<ValueData>> {
let val = match &transform.type_ {
Value::Int8(_) => ValueData::I8Value(n as i32),
Value::Int16(_) => ValueData::I16Value(n as i32),
Value::Int32(_) => ValueData::I32Value(n as i32),
Value::Int64(_) => ValueData::I64Value(n as i64),
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
ColumnDataType::Int64 => ValueData::I64Value(n as i64),
Value::Uint8(_) => ValueData::U8Value(n as u32),
Value::Uint16(_) => ValueData::U16Value(n as u32),
Value::Uint32(_) => ValueData::U32Value(n as u32),
Value::Uint64(_) => ValueData::U64Value(n),
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
ColumnDataType::Uint64 => ValueData::U64Value(n),
Value::Float32(_) => ValueData::F32Value(n as f32),
Value::Float64(_) => ValueData::F64Value(n as f64),
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
ColumnDataType::Float64 => ValueData::F64Value(n as f64),
Value::Boolean(_) => ValueData::BoolValue(n != 0),
Value::String(_) => ValueData::StringValue(n.to_string()),
ColumnDataType::Boolean => ValueData::BoolValue(n != 0),
ColumnDataType::String => ValueData::StringValue(n.to_string()),
Value::Timestamp(unit) => match unit {
Timestamp::Nanosecond(_) => ValueData::TimestampNanosecondValue(n as i64),
Timestamp::Microsecond(_) => ValueData::TimestampMicrosecondValue(n as i64),
Timestamp::Millisecond(_) => ValueData::TimestampMillisecondValue(n as i64),
Timestamp::Second(_) => ValueData::TimestampSecondValue(n as i64),
},
ColumnDataType::TimestampNanosecond => ValueData::TimestampNanosecondValue(n as i64),
ColumnDataType::TimestampMicrosecond => ValueData::TimestampMicrosecondValue(n as i64),
ColumnDataType::TimestampMillisecond => ValueData::TimestampMillisecondValue(n as i64),
ColumnDataType::TimestampSecond => ValueData::TimestampSecondValue(n as i64),
Value::Array(_) | Value::Map(_) => {
ColumnDataType::Binary => {
return CoerceJsonTypeToSnafu {
ty: transform.type_.to_str_type(),
ty: transform.type_.as_str_name(),
}
.fail()
}
Value::Null => return Ok(None),
_ => return Ok(None),
};
Ok(Some(val))
@@ -330,23 +243,26 @@ fn coerce_u64_value(n: u64, transform: &Transform) -> Result<Option<ValueData>>
fn coerce_f64_value(n: f64, transform: &Transform) -> Result<Option<ValueData>> {
let val = match transform.type_ {
Value::Int8(_) => ValueData::I8Value(n as i32),
Value::Int16(_) => ValueData::I16Value(n as i32),
Value::Int32(_) => ValueData::I32Value(n as i32),
Value::Int64(_) => ValueData::I64Value(n as i64),
ColumnDataType::Int8 => ValueData::I8Value(n as i32),
ColumnDataType::Int16 => ValueData::I16Value(n as i32),
ColumnDataType::Int32 => ValueData::I32Value(n as i32),
ColumnDataType::Int64 => ValueData::I64Value(n as i64),
Value::Uint8(_) => ValueData::U8Value(n as u32),
Value::Uint16(_) => ValueData::U16Value(n as u32),
Value::Uint32(_) => ValueData::U32Value(n as u32),
Value::Uint64(_) => ValueData::U64Value(n as u64),
ColumnDataType::Uint8 => ValueData::U8Value(n as u32),
ColumnDataType::Uint16 => ValueData::U16Value(n as u32),
ColumnDataType::Uint32 => ValueData::U32Value(n as u32),
ColumnDataType::Uint64 => ValueData::U64Value(n as u64),
Value::Float32(_) => ValueData::F32Value(n as f32),
Value::Float64(_) => ValueData::F64Value(n),
ColumnDataType::Float32 => ValueData::F32Value(n as f32),
ColumnDataType::Float64 => ValueData::F64Value(n),
Value::Boolean(_) => ValueData::BoolValue(n != 0.0),
Value::String(_) => ValueData::StringValue(n.to_string()),
ColumnDataType::Boolean => ValueData::BoolValue(n != 0.0),
ColumnDataType::String => ValueData::StringValue(n.to_string()),
Value::Timestamp(_) => match transform.on_failure {
ColumnDataType::TimestampNanosecond
| ColumnDataType::TimestampMicrosecond
| ColumnDataType::TimestampMillisecond
| ColumnDataType::TimestampSecond => match transform.on_failure {
Some(OnFailure::Ignore) => return Ok(None),
Some(OnFailure::Default) => {
return CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail();
@@ -356,14 +272,14 @@ fn coerce_f64_value(n: f64, transform: &Transform) -> Result<Option<ValueData>>
}
},
Value::Array(_) | Value::Map(_) => {
ColumnDataType::Binary => {
return CoerceJsonTypeToSnafu {
ty: transform.type_.to_str_type(),
ty: transform.type_.as_str_name(),
}
.fail()
}
Value::Null => return Ok(None),
_ => return Ok(None),
};
Ok(Some(val))
@@ -376,12 +292,12 @@ macro_rules! coerce_string_value {
Err(_) => match $transform.on_failure {
Some(OnFailure::Ignore) => Ok(None),
Some(OnFailure::Default) => match $transform.get_default() {
Some(default) => coerce_value(default, $transform),
None => coerce_value($transform.get_type_matched_default_val(), $transform),
Some(default) => Ok(Some(default.clone())),
None => $transform.get_type_matched_default_val().map(Some),
},
None => CoerceStringToTypeSnafu {
s: $s,
ty: $transform.type_.to_str_type(),
ty: $transform.type_.as_str_name(),
}
.fail(),
},
@@ -389,92 +305,85 @@ macro_rules! coerce_string_value {
};
}
fn coerce_string_value(s: &String, transform: &Transform) -> Result<Option<ValueData>> {
fn coerce_string_value(s: &str, transform: &Transform) -> Result<Option<ValueData>> {
match transform.type_ {
Value::Int8(_) => {
ColumnDataType::Int8 => {
coerce_string_value!(s, transform, i32, I8Value)
}
Value::Int16(_) => {
ColumnDataType::Int16 => {
coerce_string_value!(s, transform, i32, I16Value)
}
Value::Int32(_) => {
ColumnDataType::Int32 => {
coerce_string_value!(s, transform, i32, I32Value)
}
Value::Int64(_) => {
ColumnDataType::Int64 => {
coerce_string_value!(s, transform, i64, I64Value)
}
Value::Uint8(_) => {
ColumnDataType::Uint8 => {
coerce_string_value!(s, transform, u32, U8Value)
}
Value::Uint16(_) => {
ColumnDataType::Uint16 => {
coerce_string_value!(s, transform, u32, U16Value)
}
Value::Uint32(_) => {
ColumnDataType::Uint32 => {
coerce_string_value!(s, transform, u32, U32Value)
}
Value::Uint64(_) => {
ColumnDataType::Uint64 => {
coerce_string_value!(s, transform, u64, U64Value)
}
Value::Float32(_) => {
ColumnDataType::Float32 => {
coerce_string_value!(s, transform, f32, F32Value)
}
Value::Float64(_) => {
ColumnDataType::Float64 => {
coerce_string_value!(s, transform, f64, F64Value)
}
Value::Boolean(_) => {
ColumnDataType::Boolean => {
coerce_string_value!(s, transform, bool, BoolValue)
}
Value::String(_) => Ok(Some(ValueData::StringValue(s.to_string()))),
ColumnDataType::String => Ok(Some(ValueData::StringValue(s.to_string()))),
Value::Timestamp(_) => match transform.on_failure {
ColumnDataType::TimestampNanosecond
| ColumnDataType::TimestampMicrosecond
| ColumnDataType::TimestampMillisecond
| ColumnDataType::TimestampSecond => match transform.on_failure {
Some(OnFailure::Ignore) => Ok(None),
Some(OnFailure::Default) => CoerceUnsupportedEpochTypeSnafu { ty: "Default" }.fail(),
None => CoerceUnsupportedEpochTypeSnafu { ty: "String" }.fail(),
},
Value::Array(_) | Value::Map(_) => CoerceStringToTypeSnafu {
ColumnDataType::Binary => CoerceStringToTypeSnafu {
s,
ty: transform.type_.to_str_type(),
ty: transform.type_.as_str_name(),
}
.fail(),
Value::Null => Ok(None),
_ => Ok(None),
}
}
fn coerce_json_value(v: &Value, transform: &Transform) -> Result<Option<ValueData>> {
fn coerce_json_value(v: &VrlValue, transform: &Transform) -> Result<Option<ValueData>> {
match &transform.type_ {
Value::Array(_) | Value::Map(_) => (),
ColumnDataType::Binary => (),
t => {
return CoerceTypeToJsonSnafu {
ty: t.to_str_type(),
ty: t.as_str_name(),
}
.fail();
}
}
match v {
Value::Map(_) => {
let data: jsonb::Value = v.into();
Ok(Some(ValueData::BinaryValue(data.to_vec())))
}
Value::Array(_) => {
let data: jsonb::Value = v.into();
Ok(Some(ValueData::BinaryValue(data.to_vec())))
}
_ => CoerceTypeToJsonSnafu {
ty: v.to_str_type(),
}
.fail(),
}
let data: jsonb::Value = vrl_value_to_jsonb_value(v);
Ok(Some(ValueData::BinaryValue(data.to_vec())))
}
#[cfg(test)]
mod tests {
use vrl::prelude::Bytes;
use super::*;
use crate::etl::field::Fields;
@@ -482,7 +391,7 @@ mod tests {
fn test_coerce_string_without_on_failure() {
let transform = Transform {
fields: Fields::default(),
type_: Value::Int32(0),
type_: ColumnDataType::Int32,
default: None,
index: None,
on_failure: None,
@@ -491,14 +400,14 @@ mod tests {
// valid string
{
let val = Value::String("123".to_string());
let val = VrlValue::Integer(123);
let result = coerce_value(&val, &transform).unwrap();
assert_eq!(result, Some(ValueData::I32Value(123)));
}
// invalid string
{
let val = Value::String("hello".to_string());
let val = VrlValue::Bytes(Bytes::from("hello"));
let result = coerce_value(&val, &transform);
assert!(result.is_err());
}
@@ -508,14 +417,14 @@ mod tests {
fn test_coerce_string_with_on_failure_ignore() {
let transform = Transform {
fields: Fields::default(),
type_: Value::Int32(0),
type_: ColumnDataType::Int32,
default: None,
index: None,
on_failure: Some(OnFailure::Ignore),
tag: false,
};
let val = Value::String("hello".to_string());
let val = VrlValue::Bytes(Bytes::from("hello"));
let result = coerce_value(&val, &transform).unwrap();
assert_eq!(result, None);
}
@@ -524,7 +433,7 @@ mod tests {
fn test_coerce_string_with_on_failure_default() {
let mut transform = Transform {
fields: Fields::default(),
type_: Value::Int32(0),
type_: ColumnDataType::Int32,
default: None,
index: None,
on_failure: Some(OnFailure::Default),
@@ -533,15 +442,15 @@ mod tests {
// with no explicit default value
{
let val = Value::String("hello".to_string());
let val = VrlValue::Bytes(Bytes::from("hello"));
let result = coerce_value(&val, &transform).unwrap();
assert_eq!(result, Some(ValueData::I32Value(0)));
}
// with explicit default value
{
transform.default = Some(Value::Int32(42));
let val = Value::String("hello".to_string());
transform.default = Some(ValueData::I32Value(42));
let val = VrlValue::Bytes(Bytes::from("hello"));
let result = coerce_value(&val, &transform).unwrap();
assert_eq!(result, Some(ValueData::I32Value(42)));
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,81 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::error::{Error, Result};
use crate::etl::value::Value;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Array {
pub values: Vec<Value>,
}
impl Array {
pub fn new() -> Self {
Array { values: vec![] }
}
}
impl std::fmt::Display for Array {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let values = self
.values
.iter()
.map(|v| v.to_string())
.collect::<Vec<String>>()
.join(", ");
write!(f, "[{}]", values)
}
}
impl std::ops::Deref for Array {
type Target = Vec<Value>;
fn deref(&self) -> &Self::Target {
&self.values
}
}
impl std::ops::DerefMut for Array {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.values
}
}
impl IntoIterator for Array {
type Item = Value;
type IntoIter = std::vec::IntoIter<Value>;
fn into_iter(self) -> Self::IntoIter {
self.values.into_iter()
}
}
impl From<Vec<Value>> for Array {
fn from(values: Vec<Value>) -> Self {
Array { values }
}
}
impl TryFrom<Vec<serde_json::Value>> for Array {
type Error = Error;
fn try_from(value: Vec<serde_json::Value>) -> Result<Self> {
let values = value
.into_iter()
.map(|v| v.try_into())
.collect::<Result<Vec<_>>>()?;
Ok(Array { values })
}
}

View File

@@ -1,70 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use crate::etl::value::Value;
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Map {
pub values: BTreeMap<String, Value>,
}
impl Map {
pub fn one(key: impl Into<String>, value: Value) -> Map {
let mut map = Map::default();
map.insert(key, value);
map
}
pub fn insert(&mut self, key: impl Into<String>, value: Value) {
self.values.insert(key.into(), value);
}
pub fn extend(&mut self, Map { values }: Map) {
self.values.extend(values);
}
}
impl From<BTreeMap<String, Value>> for Map {
fn from(values: BTreeMap<String, Value>) -> Self {
Self { values }
}
}
impl std::ops::Deref for Map {
type Target = BTreeMap<String, Value>;
fn deref(&self) -> &Self::Target {
&self.values
}
}
impl std::ops::DerefMut for Map {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.values
}
}
impl std::fmt::Display for Map {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let values = self
.values
.iter()
.map(|(k, v)| format!("{}: {}", k, v))
.collect::<Vec<String>>()
.join(", ");
write!(f, "{{{}}}", values)
}
}

View File

@@ -1,140 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use chrono::{DateTime, Utc};
use common_time::timestamp::TimeUnit;
#[derive(Debug, Clone, PartialEq)]
pub enum Timestamp {
Nanosecond(i64),
Microsecond(i64),
Millisecond(i64),
Second(i64),
}
pub(crate) const NANOSECOND_RESOLUTION: &str = "nanosecond";
pub(crate) const NANO_RESOLUTION: &str = "nano";
pub(crate) const NS_RESOLUTION: &str = "ns";
pub(crate) const MICROSECOND_RESOLUTION: &str = "microsecond";
pub(crate) const MICRO_RESOLUTION: &str = "micro";
pub(crate) const US_RESOLUTION: &str = "us";
pub(crate) const MILLISECOND_RESOLUTION: &str = "millisecond";
pub(crate) const MILLI_RESOLUTION: &str = "milli";
pub(crate) const MS_RESOLUTION: &str = "ms";
pub(crate) const SECOND_RESOLUTION: &str = "second";
pub(crate) const SEC_RESOLUTION: &str = "sec";
pub(crate) const S_RESOLUTION: &str = "s";
pub(crate) const VALID_RESOLUTIONS: [&str; 12] = [
NANOSECOND_RESOLUTION,
NANO_RESOLUTION,
NS_RESOLUTION,
MICROSECOND_RESOLUTION,
MICRO_RESOLUTION,
US_RESOLUTION,
MILLISECOND_RESOLUTION,
MILLI_RESOLUTION,
MS_RESOLUTION,
SECOND_RESOLUTION,
SEC_RESOLUTION,
S_RESOLUTION,
];
impl Timestamp {
pub(crate) fn timestamp_nanos(&self) -> i64 {
match self {
Timestamp::Nanosecond(v) => *v,
Timestamp::Microsecond(v) => *v * 1_000,
Timestamp::Millisecond(v) => *v * 1_000_000,
Timestamp::Second(v) => *v * 1_000_000_000,
}
}
pub(crate) fn timestamp_micros(&self) -> i64 {
match self {
Timestamp::Nanosecond(v) => *v / 1_000,
Timestamp::Microsecond(v) => *v,
Timestamp::Millisecond(v) => *v * 1_000,
Timestamp::Second(v) => *v * 1_000_000,
}
}
pub(crate) fn timestamp_millis(&self) -> i64 {
match self {
Timestamp::Nanosecond(v) => *v / 1_000_000,
Timestamp::Microsecond(v) => *v / 1_000,
Timestamp::Millisecond(v) => *v,
Timestamp::Second(v) => *v * 1_000,
}
}
pub(crate) fn timestamp(&self) -> i64 {
match self {
Timestamp::Nanosecond(v) => *v / 1_000_000_000,
Timestamp::Microsecond(v) => *v / 1_000_000,
Timestamp::Millisecond(v) => *v / 1_000,
Timestamp::Second(v) => *v,
}
}
pub(crate) fn to_unit(&self, unit: &TimeUnit) -> i64 {
match unit {
TimeUnit::Second => self.timestamp(),
TimeUnit::Millisecond => self.timestamp_millis(),
TimeUnit::Microsecond => self.timestamp_micros(),
TimeUnit::Nanosecond => self.timestamp_nanos(),
}
}
pub fn get_unit(&self) -> TimeUnit {
match self {
Timestamp::Nanosecond(_) => TimeUnit::Nanosecond,
Timestamp::Microsecond(_) => TimeUnit::Microsecond,
Timestamp::Millisecond(_) => TimeUnit::Millisecond,
Timestamp::Second(_) => TimeUnit::Second,
}
}
pub fn to_datetime(&self) -> Option<DateTime<Utc>> {
match self {
Timestamp::Nanosecond(v) => Some(DateTime::from_timestamp_nanos(*v)),
Timestamp::Microsecond(v) => DateTime::from_timestamp_micros(*v),
Timestamp::Millisecond(v) => DateTime::from_timestamp_millis(*v),
Timestamp::Second(v) => DateTime::from_timestamp(*v, 0),
}
}
pub fn from_datetime(dt: DateTime<Utc>) -> Option<Self> {
dt.timestamp_nanos_opt().map(Timestamp::Nanosecond)
}
}
impl Default for Timestamp {
fn default() -> Self {
Timestamp::Nanosecond(chrono::Utc::now().timestamp_nanos_opt().unwrap_or_default())
}
}
impl std::fmt::Display for Timestamp {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let (value, resolution) = match self {
Timestamp::Nanosecond(v) => (v, NANOSECOND_RESOLUTION),
Timestamp::Microsecond(v) => (v, MICROSECOND_RESOLUTION),
Timestamp::Millisecond(v) => (v, MILLISECOND_RESOLUTION),
Timestamp::Second(v) => (v, SECOND_RESOLUTION),
};
write!(f, "{}, resolution: {}", value, resolution)
}
}

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(string_from_utf8_lossy_owned)]
mod dispatcher;
pub mod error;
mod etl;
@@ -24,10 +26,8 @@ pub use etl::processor::Processor;
pub use etl::transform::transformer::greptime::{GreptimePipelineParams, SchemaInfo};
pub use etl::transform::transformer::identity_pipeline;
pub use etl::transform::GreptimeTransformer;
pub use etl::value::{Array, Map, Timestamp, Value};
pub use etl::{
json_array_to_map, json_to_map, parse, simd_json_array_to_map, simd_json_to_map, Content,
DispatchedTo, Pipeline, PipelineExecOutput, TransformedOutput, TransformerMode,
parse, Content, DispatchedTo, Pipeline, PipelineExecOutput, TransformedOutput, TransformerMode,
};
pub use manager::{
pipeline_operator, table, util, IdentityTimeIndex, PipelineContext, PipelineDefinition,

View File

@@ -16,18 +16,22 @@ use std::sync::Arc;
use api::v1::value::ValueData;
use api::v1::ColumnDataType;
use chrono::{DateTime, Utc};
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use datatypes::timestamp::TimestampNanosecond;
use itertools::Itertools;
use session::context::Channel;
use snafu::ensure;
use snafu::{ensure, OptionExt};
use util::to_pipeline_version;
use vrl::value::Value as VrlValue;
use crate::error::{CastTypeSnafu, InvalidCustomTimeIndexSnafu, PipelineMissingSnafu, Result};
use crate::etl::value::time::{MS_RESOLUTION, NS_RESOLUTION, S_RESOLUTION, US_RESOLUTION};
use crate::error::{
CastTypeSnafu, InvalidCustomTimeIndexSnafu, InvalidTimestampSnafu, PipelineMissingSnafu, Result,
};
use crate::etl::value::{MS_RESOLUTION, NS_RESOLUTION, S_RESOLUTION, US_RESOLUTION};
use crate::table::PipelineTable;
use crate::{GreptimePipelineParams, Pipeline, Value};
use crate::{GreptimePipelineParams, Pipeline};
mod pipeline_cache;
pub mod pipeline_operator;
@@ -232,7 +236,7 @@ impl IdentityTimeIndex {
}
}
pub fn get_column_name(&self) -> &String {
pub fn get_column_name(&self) -> &str {
match self {
IdentityTimeIndex::Epoch(field, _, _) => field,
IdentityTimeIndex::DateStr(field, _, _) => field,
@@ -258,25 +262,25 @@ impl IdentityTimeIndex {
}
}
pub fn get_timestamp(&self, value: Option<&Value>) -> Result<ValueData> {
pub fn get_timestamp_value(&self, value: Option<&VrlValue>) -> Result<ValueData> {
match self {
IdentityTimeIndex::Epoch(_, unit, ignore_errors) => {
let v = match value {
Some(Value::Int32(v)) => *v as i64,
Some(Value::Int64(v)) => *v,
Some(Value::Uint32(v)) => *v as i64,
Some(Value::Uint64(v)) => *v as i64,
Some(Value::String(s)) => match s.parse::<i64>() {
Some(VrlValue::Integer(v)) => *v,
Some(VrlValue::Bytes(s)) => match String::from_utf8_lossy(s).parse::<i64>() {
Ok(v) => v,
Err(_) => {
return if_ignore_errors(
*ignore_errors,
*unit,
format!("failed to convert {} to number", s),
format!(
"failed to convert {} to number",
String::from_utf8_lossy(s)
),
)
}
},
Some(Value::Timestamp(timestamp)) => timestamp.to_unit(unit),
Some(VrlValue::Timestamp(timestamp)) => datetime_utc_to_unit(timestamp, unit)?,
Some(v) => {
return if_ignore_errors(
*ignore_errors,
@@ -292,7 +296,7 @@ impl IdentityTimeIndex {
}
IdentityTimeIndex::DateStr(_, format, ignore_errors) => {
let v = match value {
Some(Value::String(s)) => s,
Some(VrlValue::Bytes(s)) => String::from_utf8_lossy(s),
Some(v) => {
return if_ignore_errors(
*ignore_errors,
@@ -309,7 +313,7 @@ impl IdentityTimeIndex {
}
};
let timestamp = match chrono::DateTime::parse_from_str(v, format) {
let timestamp = match chrono::DateTime::parse_from_str(&v, format) {
Ok(ts) => ts,
Err(_) => {
return if_ignore_errors(
@@ -321,13 +325,31 @@ impl IdentityTimeIndex {
};
Ok(ValueData::TimestampNanosecondValue(
timestamp.timestamp_nanos_opt().unwrap_or_default(),
timestamp
.timestamp_nanos_opt()
.context(InvalidTimestampSnafu {
input: timestamp.to_rfc3339(),
})?,
))
}
}
}
}
fn datetime_utc_to_unit(timestamp: &DateTime<Utc>, unit: &TimeUnit) -> Result<i64> {
let ts = match unit {
TimeUnit::Nanosecond => timestamp
.timestamp_nanos_opt()
.context(InvalidTimestampSnafu {
input: timestamp.to_rfc3339(),
})?,
TimeUnit::Microsecond => timestamp.timestamp_micros(),
TimeUnit::Millisecond => timestamp.timestamp_millis(),
TimeUnit::Second => timestamp.timestamp(),
};
Ok(ts)
}
fn if_ignore_errors(ignore_errors: bool, unit: TimeUnit, msg: String) -> Result<ValueData> {
if ignore_errors {
Ok(time_unit_to_value_data(

View File

@@ -15,12 +15,12 @@
use dyn_fmt::AsStrFormatExt;
use regex::Regex;
use snafu::{ensure, OptionExt};
use vrl::value::Value as VrlValue;
use yaml_rust::Yaml;
use crate::error::{
Error, InvalidTableSuffixTemplateSnafu, RequiredTableSuffixTemplateSnafu, Result,
};
use crate::Value;
const REPLACE_KEY: &str = "{}";
@@ -47,22 +47,16 @@ pub(crate) struct TableSuffixTemplate {
}
impl TableSuffixTemplate {
pub fn apply(&self, val: &Value) -> Option<String> {
pub fn apply(&self, val: &VrlValue) -> Option<String> {
let val = val.as_object()?;
let values = self
.keys
.iter()
.filter_map(|key| {
let v = val.get(key)?;
let v = val.get(key.as_str())?;
match v {
Value::Int8(v) => Some(v.to_string()),
Value::Int16(v) => Some(v.to_string()),
Value::Int32(v) => Some(v.to_string()),
Value::Int64(v) => Some(v.to_string()),
Value::Uint8(v) => Some(v.to_string()),
Value::Uint16(v) => Some(v.to_string()),
Value::Uint32(v) => Some(v.to_string()),
Value::Uint64(v) => Some(v.to_string()),
Value::String(v) => Some(v.clone()),
VrlValue::Integer(v) => Some(v.to_string()),
VrlValue::Bytes(v) => Some(String::from_utf8_lossy_owned(v.to_vec())),
_ => None,
}
})

View File

@@ -13,11 +13,12 @@
// limitations under the License.
use greptime_proto::v1::{ColumnDataType, ColumnSchema, Rows, SemanticType};
use pipeline::{json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext};
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext};
use vrl::value::Value as VrlValue;
/// test util function to parse and execute pipeline
pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
let input_value = serde_json::from_str::<serde_json::Value>(input_str).unwrap();
let input_value = serde_json::from_str::<VrlValue>(input_str).unwrap();
let yaml_content = Content::Yaml(pipeline_yaml);
let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline");
@@ -32,21 +33,19 @@ pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
let mut rows = Vec::new();
match input_value {
serde_json::Value::Array(array) => {
VrlValue::Array(array) => {
for value in array {
let intermediate_status = json_to_map(value).unwrap();
let row = pipeline
.exec_mut(intermediate_status, &pipeline_ctx, &mut schema_info)
.exec_mut(value, &pipeline_ctx, &mut schema_info)
.expect("failed to exec pipeline")
.into_transformed()
.expect("expect transformed result ");
rows.push(row.0);
}
}
serde_json::Value::Object(_) => {
let intermediate_status = json_to_map(input_value).unwrap();
VrlValue::Object(_) => {
let row = pipeline
.exec_mut(intermediate_status, &pipeline_ctx, &mut schema_info)
.exec_mut(input_value, &pipeline_ctx, &mut schema_info)
.expect("failed to exec pipeline")
.into_transformed()
.expect("expect transformed result ");

View File

@@ -16,7 +16,7 @@ mod common;
use greptime_proto::v1::value::ValueData::StringValue;
use greptime_proto::v1::{ColumnDataType, SemanticType};
use pipeline::{json_to_map, setup_pipeline, PipelineContext};
use pipeline::{setup_pipeline, PipelineContext};
fn make_string_column_schema(name: String) -> greptime_proto::v1::ColumnSchema {
common::make_column_schema(name, ColumnDataType::String, SemanticType::Field)
@@ -282,7 +282,7 @@ transform:
session::context::Channel::Unknown,
);
let result = json_to_map(input_value).unwrap();
let result = input_value.into();
let row = pipeline.exec_mut(result, &pipeline_ctx, &mut schema_info);

View File

@@ -20,7 +20,7 @@ use greptime_proto::v1::value::ValueData::{
U32Value, U64Value, U8Value,
};
use greptime_proto::v1::Value as GreptimeValue;
use pipeline::{json_to_map, parse, setup_pipeline, Content, Pipeline, PipelineContext};
use pipeline::{parse, setup_pipeline, Content, Pipeline, PipelineContext};
#[test]
fn test_complex_data() {
@@ -425,7 +425,7 @@ transform:
&pipeline_param,
session::context::Channel::Unknown,
);
let stats = json_to_map(input_value).unwrap();
let stats = input_value.into();
let row = pipeline
.exec_mut(stats, &pipeline_ctx, &mut schema_info)
@@ -500,7 +500,7 @@ transform:
session::context::Channel::Unknown,
);
let status = json_to_map(input_value).unwrap();
let status = input_value.into();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -615,7 +615,7 @@ transform:
session::context::Channel::Unknown,
);
let status = json_to_map(input_value).unwrap();
let status = input_value.into();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -687,7 +687,7 @@ transform:
session::context::Channel::Unknown,
);
let status = json_to_map(input_value).unwrap();
let status = input_value.into();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -733,7 +733,7 @@ transform:
session::context::Channel::Unknown,
);
let status = json_to_map(input_value).unwrap();
let status = input_value.into();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -798,7 +798,7 @@ transform:
session::context::Channel::Unknown,
);
let status = json_to_map(input_value).unwrap();
let status = input_value.into();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -845,7 +845,7 @@ transform:
session::context::Channel::Unknown,
);
let status = json_to_map(input_value).unwrap();
let status = input_value.into();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -913,7 +913,7 @@ transform:
session::context::Channel::Unknown,
);
let status = json_to_map(input_value1).unwrap();
let status = input_value1.into();
let dispatched_to = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -922,7 +922,7 @@ transform:
assert_eq!(dispatched_to.table_suffix, "http");
assert_eq!(dispatched_to.pipeline.unwrap(), "access_log_pipeline");
let status = json_to_map(input_value2).unwrap();
let status = input_value2.into();
let row = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap()
@@ -983,7 +983,7 @@ table_suffix: _${logger}
session::context::Channel::Unknown,
);
let status = json_to_map(input_value).unwrap();
let status = input_value.into();
let exec_re = pipeline
.exec_mut(status, &pipeline_ctx, &mut schema_info)
.unwrap();

View File

@@ -128,6 +128,7 @@ tower-http = { version = "0.6", features = ["full"] }
tracing.workspace = true
urlencoding = "2.1"
uuid.workspace = true
vrl.workspace = true
zstd.workspace = true
[target.'cfg(not(windows))'.dependencies]

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use std::sync::Arc;
use std::time::Instant;
@@ -30,9 +31,10 @@ use pipeline::{
use serde_json::{json, Deserializer, Value};
use session::context::{Channel, QueryContext};
use snafu::{ensure, ResultExt};
use vrl::value::Value as VrlValue;
use crate::error::{
status_code_to_http_status, InvalidElasticsearchInputSnafu, ParseJsonSnafu, PipelineSnafu,
status_code_to_http_status, InvalidElasticsearchInputSnafu, ParseJsonSnafu,
Result as ServersResult,
};
use crate::http::event::{
@@ -287,8 +289,8 @@ fn parse_bulk_request(
msg_field: &Option<String>,
) -> ServersResult<Vec<PipelineIngestRequest>> {
// Read the ndjson payload and convert it to `Vec<Value>`. Return error if the input is not a valid JSON.
let values: Vec<Value> = Deserializer::from_str(input)
.into_iter::<Value>()
let values: Vec<VrlValue> = Deserializer::from_str(input)
.into_iter::<VrlValue>()
.collect::<Result<_, _>>()
.context(ParseJsonSnafu)?;
@@ -307,12 +309,13 @@ fn parse_bulk_request(
// For Elasticsearch post `_bulk` API, each chunk contains two objects:
// 1. The first object is the command, it should be `create` or `index`.
// 2. The second object is the document data.
while let Some(mut cmd) = values.next() {
while let Some(cmd) = values.next() {
// NOTE: Although the native Elasticsearch API supports upsert in `index` command, we don't support change any data in `index` command and it's same as `create` command.
let index = if let Some(cmd) = cmd.get_mut("create") {
get_index_from_cmd(cmd.take())?
} else if let Some(cmd) = cmd.get_mut("index") {
get_index_from_cmd(cmd.take())?
let mut cmd = cmd.into_object();
let index = if let Some(cmd) = cmd.as_mut().and_then(|c| c.remove("create")) {
get_index_from_cmd(cmd)?
} else if let Some(cmd) = cmd.as_mut().and_then(|c| c.remove("index")) {
get_index_from_cmd(cmd)?
} else {
return InvalidElasticsearchInputSnafu {
reason: format!(
@@ -339,7 +342,6 @@ fn parse_bulk_request(
}
);
let log_value = pipeline::json_to_map(log_value).context(PipelineSnafu)?;
requests.push(PipelineIngestRequest {
table: index.unwrap_or_else(|| index_from_url.as_ref().unwrap().clone()),
values: vec![log_value],
@@ -357,39 +359,50 @@ fn parse_bulk_request(
}
// Get the index from the command. We will take index as the table name in GreptimeDB.
fn get_index_from_cmd(mut v: Value) -> ServersResult<Option<String>> {
if let Some(index) = v.get_mut("_index") {
if let Value::String(index) = index.take() {
Ok(Some(index))
} else {
// If the `_index` exists, it should be a string.
InvalidElasticsearchInputSnafu {
reason: "index is not a string in bulk request".to_string(),
}
.fail()
}
fn get_index_from_cmd(v: VrlValue) -> ServersResult<Option<String>> {
let Some(index) = v.into_object().and_then(|mut m| m.remove("_index")) else {
return Ok(None);
};
if let VrlValue::Bytes(index) = index {
Ok(Some(String::from_utf8_lossy(&index).to_string()))
} else {
Ok(None)
// If the `_index` exists, it should be a string.
InvalidElasticsearchInputSnafu {
reason: "index is not a string in bulk request",
}
.fail()
}
}
// If the msg_field is provided, fetch the value of the field from the document data.
// For example, if the `msg_field` is `message`, and the document data is `{"message":"hello"}`, the log value will be Value::String("hello").
fn get_log_value_from_msg_field(mut v: Value, msg_field: &str) -> Value {
if let Some(message) = v.get_mut(msg_field) {
let message = message.take();
fn get_log_value_from_msg_field(v: VrlValue, msg_field: &str) -> VrlValue {
let VrlValue::Object(mut m) = v else {
return v;
};
if let Some(message) = m.remove(msg_field) {
match message {
Value::String(s) => match serde_json::from_str::<Value>(&s) {
Ok(s) => s,
// If the message is not a valid JSON, return a map with the original message key and value.
Err(_) => json!({msg_field: s}),
},
VrlValue::Bytes(bytes) => {
match serde_json::from_slice::<VrlValue>(&bytes) {
Ok(v) => v,
// If the message is not a valid JSON, return a map with the original message key and value.
Err(_) => {
let map = BTreeMap::from([(
msg_field.to_string().into(),
VrlValue::Bytes(bytes),
)]);
VrlValue::Object(map)
}
}
}
// If the message is not a string, just use the original message as the log value.
_ => message,
}
} else {
// If the msg_field is not found, just use the original message as the log value.
v
VrlValue::Object(m)
}
}
@@ -414,12 +427,14 @@ mod tests {
PipelineIngestRequest {
table: "test".to_string(),
values: vec![
pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap(),
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
],
},
PipelineIngestRequest {
table: "test".to_string(),
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
values: vec![
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
],
},
]),
),
@@ -436,11 +451,15 @@ mod tests {
Ok(vec![
PipelineIngestRequest {
table: "test".to_string(),
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
values: vec![
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
],
},
PipelineIngestRequest {
table: "logs".to_string(),
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
values: vec![
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
],
},
]),
),
@@ -457,11 +476,15 @@ mod tests {
Ok(vec![
PipelineIngestRequest {
table: "test".to_string(),
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
values: vec![
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
],
},
PipelineIngestRequest {
table: "logs".to_string(),
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
values: vec![
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
],
},
]),
),
@@ -477,7 +500,9 @@ mod tests {
Ok(vec![
PipelineIngestRequest {
table: "test".to_string(),
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
values: vec![
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
],
},
]),
),
@@ -494,11 +519,15 @@ mod tests {
Ok(vec![
PipelineIngestRequest {
table: "test".to_string(),
values: vec![pipeline::json_to_map(json!({"foo1": "foo1_value", "bar1": "bar1_value"})).unwrap()],
values: vec![
json!({"foo1": "foo1_value", "bar1": "bar1_value"}).into(),
],
},
PipelineIngestRequest {
table: "test".to_string(),
values: vec![pipeline::json_to_map(json!({"foo2": "foo2_value", "bar2": "bar2_value"})).unwrap()],
values: vec![
json!({"foo2": "foo2_value", "bar2": "bar2_value"}).into(),
],
},
]),
),
@@ -516,13 +545,13 @@ mod tests {
PipelineIngestRequest {
table: "logs-generic-default".to_string(),
values: vec![
pipeline::json_to_map(json!({"message": "172.16.0.1 - - [25/May/2024:20:19:37 +0000] \"GET /contact HTTP/1.1\" 404 162 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1\""})).unwrap(),
json!({"message": "172.16.0.1 - - [25/May/2024:20:19:37 +0000] \"GET /contact HTTP/1.1\" 404 162 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1\""}).into(),
],
},
PipelineIngestRequest {
table: "logs-generic-default".to_string(),
values: vec![
pipeline::json_to_map(json!({"message": "10.0.0.1 - - [25/May/2024:20:18:37 +0000] \"GET /images/logo.png HTTP/1.1\" 304 0 \"-\" \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0\""})).unwrap(),
json!({"message": "10.0.0.1 - - [25/May/2024:20:18:37 +0000] \"GET /images/logo.png HTTP/1.1\" 304 0 \"-\" \"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0\""}).into(),
],
},
]),

View File

@@ -35,14 +35,14 @@ use headers::ContentType;
use lazy_static::lazy_static;
use mime_guess::mime;
use pipeline::util::to_pipeline_version;
use pipeline::{
ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition, Value as PipelineValue,
};
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition};
use serde::{Deserialize, Serialize};
use serde_json::{json, Deserializer, Map, Value as JsonValue};
use session::context::{Channel, QueryContext, QueryContextRef};
use simd_json::Buffers;
use snafu::{ensure, OptionExt, ResultExt};
use strum::{EnumIter, IntoEnumIterator};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
status_code_to_http_status, Error, InvalidParameterSnafu, ParseJsonSnafu, PipelineSnafu, Result,
@@ -117,7 +117,7 @@ pub(crate) struct PipelineIngestRequest {
/// The table where the log data will be written to.
pub table: String,
/// The log data to be ingested.
pub values: Vec<PipelineValue>,
pub values: Vec<VrlValue>,
}
pub struct PipelineContent(String);
@@ -295,18 +295,18 @@ pub async fn delete_pipeline(
/// Transform NDJSON array into a single array
/// always return an array
fn transform_ndjson_array_factory(
values: impl IntoIterator<Item = Result<JsonValue, serde_json::Error>>,
values: impl IntoIterator<Item = Result<VrlValue, serde_json::Error>>,
ignore_error: bool,
) -> Result<Vec<JsonValue>> {
) -> Result<Vec<VrlValue>> {
values
.into_iter()
.try_fold(Vec::with_capacity(100), |mut acc_array, item| match item {
Ok(item_value) => {
match item_value {
JsonValue::Array(item_array) => {
VrlValue::Array(item_array) => {
acc_array.extend(item_array);
}
JsonValue::Object(_) => {
VrlValue::Object(_) => {
acc_array.push(item_value);
}
_ => {
@@ -331,7 +331,7 @@ fn transform_ndjson_array_factory(
/// Dryrun pipeline with given data
async fn dryrun_pipeline_inner(
value: Vec<PipelineValue>,
value: Vec<VrlValue>,
pipeline: Arc<pipeline::Pipeline>,
pipeline_handler: PipelineHandlerRef,
query_ctx: &QueryContextRef,
@@ -494,7 +494,7 @@ fn add_step_info_for_pipeline_dryrun_error(step_msg: &str, e: Error) -> Response
/// Parse the data with given content type
/// If the content type is invalid, return error
/// content type is one of application/json, text/plain, application/x-ndjson
fn parse_dryrun_data(data_type: String, data: String) -> Result<Vec<PipelineValue>> {
fn parse_dryrun_data(data_type: String, data: String) -> Result<Vec<VrlValue>> {
if let Ok(content_type) = ContentType::from_str(&data_type) {
extract_pipeline_value_by_content_type(content_type, Bytes::from(data), false)
} else {
@@ -741,17 +741,15 @@ impl<'a> TryFrom<&'a ContentType> for EventPayloadResolver<'a> {
}
impl EventPayloadResolver<'_> {
fn parse_payload(&self, payload: Bytes, ignore_errors: bool) -> Result<Vec<PipelineValue>> {
fn parse_payload(&self, payload: Bytes, ignore_errors: bool) -> Result<Vec<VrlValue>> {
match self.inner {
EventPayloadResolverInner::Json => {
pipeline::json_array_to_map(transform_ndjson_array_factory(
Deserializer::from_slice(&payload).into_iter(),
ignore_errors,
)?)
.context(PipelineSnafu)
}
EventPayloadResolverInner::Json => transform_ndjson_array_factory(
Deserializer::from_slice(&payload).into_iter(),
ignore_errors,
),
EventPayloadResolverInner::Ndjson => {
let mut result = Vec::with_capacity(1000);
let mut buffer = Buffers::new(1000);
for (index, line) in payload.lines().enumerate() {
let mut line = match line {
Ok(line) if !line.is_empty() => line,
@@ -768,8 +766,10 @@ impl EventPayloadResolver<'_> {
// simd_json, according to description, only de-escapes string at character level,
// like any other json parser. So it should be safe here.
if let Ok(v) = simd_json::to_owned_value(unsafe { line.as_bytes_mut() }) {
let v = pipeline::simd_json_to_map(v).context(PipelineSnafu)?;
if let Ok(v) = simd_json::serde::from_slice_with_buffers(
unsafe { line.as_bytes_mut() },
&mut buffer,
) {
result.push(v);
} else if !ignore_errors {
warn!("invalid JSON at index: {}, content: {:?}", index, line);
@@ -787,8 +787,11 @@ impl EventPayloadResolver<'_> {
.filter_map(|line| line.ok().filter(|line| !line.is_empty()))
.map(|line| {
let mut map = BTreeMap::new();
map.insert("message".to_string(), PipelineValue::String(line));
PipelineValue::Map(map.into())
map.insert(
KeyString::from("message"),
VrlValue::Bytes(Bytes::from(line)),
);
VrlValue::Object(map)
})
.collect::<Vec<_>>();
Ok(result)
@@ -801,7 +804,7 @@ fn extract_pipeline_value_by_content_type(
content_type: ContentType,
payload: Bytes,
ignore_errors: bool,
) -> Result<Vec<PipelineValue>> {
) -> Result<Vec<VrlValue>> {
EventPayloadResolver::try_from(&content_type).and_then(|resolver| {
resolver
.parse_payload(payload, ignore_errors)
@@ -899,36 +902,37 @@ pub struct LogState {
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_transform_ndjson() {
let s = "{\"a\": 1}\n{\"b\": 2}";
let a = JsonValue::Array(
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
let a = serde_json::to_string(
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
)
.to_string();
.unwrap();
assert_eq!(a, "[{\"a\":1},{\"b\":2}]");
let s = "{\"a\": 1}";
let a = JsonValue::Array(
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
let a = serde_json::to_string(
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
)
.to_string();
.unwrap();
assert_eq!(a, "[{\"a\":1}]");
let s = "[{\"a\": 1}]";
let a = JsonValue::Array(
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
let a = serde_json::to_string(
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
)
.to_string();
.unwrap();
assert_eq!(a, "[{\"a\":1}]");
let s = "[{\"a\": 1}, {\"b\": 2}]";
let a = JsonValue::Array(
transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
let a = serde_json::to_string(
&transform_ndjson_array_factory(Deserializer::from_str(s).into_iter(), false).unwrap(),
)
.to_string();
.unwrap();
assert_eq!(a, "[{\"a\":1},{\"b\":2}]");
}
@@ -945,21 +949,18 @@ mod tests {
let fail_rest =
extract_pipeline_value_by_content_type(ContentType::json(), payload.clone(), true);
assert!(fail_rest.is_ok());
assert_eq!(
fail_rest.unwrap(),
pipeline::json_array_to_map(vec![json!({"a": 1})]).unwrap()
);
assert_eq!(fail_rest.unwrap(), vec![json!({"a": 1}).into()]);
let fail_only_wrong =
extract_pipeline_value_by_content_type(NDJSON_CONTENT_TYPE.clone(), payload, true);
assert!(fail_only_wrong.is_ok());
let mut map1 = BTreeMap::new();
map1.insert("a".to_string(), PipelineValue::Uint64(1));
let map1 = PipelineValue::Map(map1.into());
map1.insert(KeyString::from("a"), VrlValue::Integer(1));
let map1 = VrlValue::Object(map1);
let mut map2 = BTreeMap::new();
map2.insert("c".to_string(), PipelineValue::Uint64(1));
let map2 = PipelineValue::Map(map2.into());
map2.insert(KeyString::from("c"), VrlValue::Integer(1));
let map2 = VrlValue::Object(map2);
assert_eq!(fail_only_wrong.unwrap(), vec![map1, map2]);
}
}

View File

@@ -25,6 +25,7 @@ use axum::extract::State;
use axum::Extension;
use axum_extra::TypedHeader;
use bytes::Bytes;
use chrono::DateTime;
use common_query::prelude::GREPTIME_TIMESTAMP;
use common_query::{Output, OutputData};
use common_telemetry::{error, warn};
@@ -39,6 +40,7 @@ use prost::Message;
use quoted_string::test_utils::TestSpec;
use session::context::{Channel, QueryContext};
use snafu::{ensure, OptionExt, ResultExt};
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
DecodeOtlpRequestSnafu, InvalidLokiLabelsSnafu, InvalidLokiPayloadSnafu, ParseJsonSnafu,
@@ -197,7 +199,7 @@ pub async fn loki_ingest(
}
/// This is the holder of the loki lines parsed from json or protobuf.
/// The generic here is either [serde_json::Value] or [Vec<LabelPairAdapter>].
/// The generic here is either [VrlValue] or [Vec<LabelPairAdapter>].
/// Depending on the target destination, this can be converted to [LokiRawItem] or [LokiPipeline].
pub struct LokiMiddleItem<T> {
pub ts: i64,
@@ -218,7 +220,7 @@ pub struct LokiRawItem {
/// This is the line item prepared for the pipeline engine.
pub struct LokiPipeline {
pub map: pipeline::Value,
pub map: VrlValue,
}
/// This is the flow of the Loki ingestion.
@@ -255,7 +257,7 @@ pub struct LokiPipeline {
/// +------------------+ +---------------------+
fn extract_item<T>(content_type: ContentType, bytes: Bytes) -> Result<Box<dyn Iterator<Item = T>>>
where
LokiMiddleItem<serde_json::Value>: Into<T>,
LokiMiddleItem<VrlValue>: Into<T>,
LokiMiddleItem<Vec<LabelPairAdapter>>: Into<T>,
{
match content_type {
@@ -270,15 +272,14 @@ where
}
struct LokiJsonParser {
pub streams: VecDeque<serde_json::Value>,
pub streams: VecDeque<VrlValue>,
}
impl LokiJsonParser {
pub fn from_bytes(bytes: Bytes) -> Result<Self> {
let payload: serde_json::Value =
serde_json::from_slice(bytes.as_ref()).context(ParseJsonSnafu)?;
let payload: VrlValue = serde_json::from_slice(bytes.as_ref()).context(ParseJsonSnafu)?;
let serde_json::Value::Object(mut map) = payload else {
let VrlValue::Object(mut map) = payload else {
return InvalidLokiPayloadSnafu {
msg: "payload is not an object",
}
@@ -289,7 +290,7 @@ impl LokiJsonParser {
msg: "missing streams",
})?;
let serde_json::Value::Array(streams) = streams else {
let VrlValue::Array(streams) = streams else {
return InvalidLokiPayloadSnafu {
msg: "streams is not an array",
}
@@ -308,7 +309,7 @@ impl Iterator for LokiJsonParser {
fn next(&mut self) -> Option<Self::Item> {
while let Some(stream) = self.streams.pop_front() {
// get lines from the map
let serde_json::Value::Object(mut map) = stream else {
let VrlValue::Object(mut map) = stream else {
warn!("stream is not an object, {:?}", stream);
continue;
};
@@ -316,7 +317,7 @@ impl Iterator for LokiJsonParser {
warn!("missing lines on stream, {:?}", map);
continue;
};
let serde_json::Value::Array(lines) = lines else {
let VrlValue::Array(lines) = lines else {
warn!("lines is not an array, {:?}", lines);
continue;
};
@@ -325,13 +326,15 @@ impl Iterator for LokiJsonParser {
let labels = map
.remove(LABEL_KEY)
.and_then(|m| match m {
serde_json::Value::Object(labels) => Some(labels),
VrlValue::Object(labels) => Some(labels),
_ => None,
})
.map(|m| {
m.into_iter()
.filter_map(|(k, v)| match v {
serde_json::Value::String(v) => Some((k, v)),
VrlValue::Bytes(v) => {
Some((k.into(), String::from_utf8_lossy(&v).to_string()))
}
_ => None,
})
.collect::<BTreeMap<String, String>>()
@@ -347,16 +350,16 @@ impl Iterator for LokiJsonParser {
}
struct JsonStreamItem {
pub lines: VecDeque<serde_json::Value>,
pub lines: VecDeque<VrlValue>,
pub labels: Option<BTreeMap<String, String>>,
}
impl Iterator for JsonStreamItem {
type Item = LokiMiddleItem<serde_json::Value>;
type Item = LokiMiddleItem<VrlValue>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(line) = self.lines.pop_front() {
let serde_json::Value::Array(line) = line else {
let VrlValue::Array(line) = line else {
warn!("line is not an array, {:?}", line);
continue;
};
@@ -364,11 +367,11 @@ impl Iterator for JsonStreamItem {
warn!("line is too short, {:?}", line);
continue;
}
let mut line: VecDeque<serde_json::Value> = line.into();
let mut line: VecDeque<VrlValue> = line.into();
// get ts
let ts = line.pop_front().and_then(|ts| match ts {
serde_json::Value::String(ts) => ts.parse::<i64>().ok(),
VrlValue::Bytes(ts) => String::from_utf8_lossy(&ts).parse::<i64>().ok(),
_ => {
warn!("missing or invalid timestamp, {:?}", ts);
None
@@ -379,7 +382,7 @@ impl Iterator for JsonStreamItem {
};
let line_text = line.pop_front().and_then(|l| match l {
serde_json::Value::String(l) => Some(l),
VrlValue::Bytes(l) => Some(String::from_utf8_lossy(&l).to_string()),
_ => {
warn!("missing or invalid line, {:?}", l);
None
@@ -402,8 +405,8 @@ impl Iterator for JsonStreamItem {
}
}
impl From<LokiMiddleItem<serde_json::Value>> for LokiRawItem {
fn from(val: LokiMiddleItem<serde_json::Value>) -> Self {
impl From<LokiMiddleItem<VrlValue>> for LokiRawItem {
fn from(val: LokiMiddleItem<VrlValue>) -> Self {
let LokiMiddleItem {
ts,
line,
@@ -413,13 +416,16 @@ impl From<LokiMiddleItem<serde_json::Value>> for LokiRawItem {
let structured_metadata = structured_metadata
.and_then(|m| match m {
serde_json::Value::Object(m) => Some(m),
VrlValue::Object(m) => Some(m),
_ => None,
})
.map(|m| {
m.into_iter()
.filter_map(|(k, v)| match v {
serde_json::Value::String(v) => Some((k, Value::String(v.into()))),
VrlValue::Bytes(bytes) => Some((
k.into(),
Value::String(String::from_utf8_lossy(&bytes).to_string().into()),
)),
_ => None,
})
.collect::<BTreeMap<String, Value>>()
@@ -436,8 +442,8 @@ impl From<LokiMiddleItem<serde_json::Value>> for LokiRawItem {
}
}
impl From<LokiMiddleItem<serde_json::Value>> for LokiPipeline {
fn from(value: LokiMiddleItem<serde_json::Value>) -> Self {
impl From<LokiMiddleItem<VrlValue>> for LokiPipeline {
fn from(value: LokiMiddleItem<VrlValue>) -> Self {
let LokiMiddleItem {
ts,
line,
@@ -447,37 +453,33 @@ impl From<LokiMiddleItem<serde_json::Value>> for LokiPipeline {
let mut map = BTreeMap::new();
map.insert(
GREPTIME_TIMESTAMP.to_string(),
pipeline::Value::Timestamp(pipeline::Timestamp::Nanosecond(ts)),
KeyString::from(GREPTIME_TIMESTAMP),
VrlValue::Timestamp(DateTime::from_timestamp_nanos(ts)),
);
map.insert(
LOKI_LINE_COLUMN_NAME.to_string(),
pipeline::Value::String(line),
KeyString::from(LOKI_LINE_COLUMN_NAME),
VrlValue::Bytes(line.into()),
);
if let Some(serde_json::Value::Object(m)) = structured_metadata {
if let Some(VrlValue::Object(m)) = structured_metadata {
for (k, v) in m {
match pipeline::Value::try_from(v) {
Ok(v) => {
map.insert(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, k), v);
}
Err(e) => {
warn!("not a valid value, {:?}", e);
}
}
map.insert(
KeyString::from(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, k)),
v,
);
}
}
if let Some(v) = labels {
v.into_iter().for_each(|(k, v)| {
map.insert(
format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k),
pipeline::Value::String(v),
KeyString::from(format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k)),
VrlValue::Bytes(v.into()),
);
});
}
LokiPipeline {
map: pipeline::Value::Map(pipeline::Map::from(map)),
map: VrlValue::Object(map),
}
}
}
@@ -584,12 +586,12 @@ impl From<LokiMiddleItem<Vec<LabelPairAdapter>>> for LokiPipeline {
let mut map = BTreeMap::new();
map.insert(
GREPTIME_TIMESTAMP.to_string(),
pipeline::Value::Timestamp(pipeline::Timestamp::Nanosecond(ts)),
KeyString::from(GREPTIME_TIMESTAMP),
VrlValue::Timestamp(DateTime::from_timestamp_nanos(ts)),
);
map.insert(
LOKI_LINE_COLUMN_NAME.to_string(),
pipeline::Value::String(line),
KeyString::from(LOKI_LINE_COLUMN_NAME),
VrlValue::Bytes(line.into()),
);
structured_metadata
@@ -597,22 +599,22 @@ impl From<LokiMiddleItem<Vec<LabelPairAdapter>>> for LokiPipeline {
.into_iter()
.for_each(|d| {
map.insert(
format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, d.name),
pipeline::Value::String(d.value),
KeyString::from(format!("{}{}", LOKI_PIPELINE_METADATA_PREFIX, d.name)),
VrlValue::Bytes(d.value.into()),
);
});
if let Some(v) = labels {
v.into_iter().for_each(|(k, v)| {
map.insert(
format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k),
pipeline::Value::String(v),
KeyString::from(format!("{}{}", LOKI_PIPELINE_LABEL_PREFIX, k)),
VrlValue::Bytes(v.into()),
);
});
}
LokiPipeline {
map: pipeline::Value::Map(pipeline::Map::from(map)),
map: VrlValue::Object(map),
}
}
}

View File

@@ -23,10 +23,10 @@ use common_error::ext::ErrorExt;
use common_query::Output;
use datafusion_expr::LogicalPlan;
use log_query::LogQuery;
use pipeline::Value;
use query::parser::PromQuery;
use session::context::QueryContextRef;
use sql::statements::statement::Statement;
use vrl::value::Value;
/// SqlQueryInterceptor can track life cycle of a sql query and customize or
/// abort its execution at given point.

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap as StdHashMap;
use std::collections::{BTreeMap, HashMap as StdHashMap};
use api::v1::column_data_type_extension::TypeExt;
use api::v1::value::ValueData;
@@ -20,6 +20,7 @@ use api::v1::{
ColumnDataType, ColumnDataTypeExtension, ColumnOptions, ColumnSchema, JsonTypeExtension, Row,
RowInsertRequest, Rows, SemanticType, Value as GreptimeValue,
};
use bytes::Bytes;
use jsonb::{Number as JsonbNumber, Value as JsonbValue};
use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
use opentelemetry_proto::tonic::common::v1::{any_value, AnyValue, InstrumentationScope, KeyValue};
@@ -27,13 +28,13 @@ use opentelemetry_proto::tonic::logs::v1::{LogRecord, ResourceLogs, ScopeLogs};
use pipeline::{
ContextReq, GreptimePipelineParams, PipelineContext, PipelineWay, SchemaInfo, SelectInfo,
};
use serde_json::{Map, Value};
use session::context::QueryContextRef;
use snafu::{ensure, ResultExt};
use snafu::ensure;
use vrl::prelude::NotNan;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::{
IncompatibleSchemaSnafu, NotSupportedSnafu, PipelineSnafu, Result,
UnsupportedJsonDataTypeForTagSnafu,
IncompatibleSchemaSnafu, NotSupportedSnafu, Result, UnsupportedJsonDataTypeForTagSnafu,
};
use crate::http::event::PipelineIngestRequest;
use crate::otlp::trace::attributes::OtlpAnyValue;
@@ -69,8 +70,7 @@ pub async fn to_grpc_insert_requests(
Ok(ContextReq::default_opt_with_reqs(vec![insert_request]))
}
PipelineWay::Pipeline(pipeline_def) => {
let data = parse_export_logs_service_request(request);
let array = pipeline::json_array_to_map(data).context(PipelineSnafu)?;
let array = parse_export_logs_service_request(request);
let pipeline_ctx =
PipelineContext::new(&pipeline_def, &pipeline_params, query_ctx.channel());
@@ -93,16 +93,16 @@ pub async fn to_grpc_insert_requests(
}
}
fn scope_to_pipeline_value(scope: Option<InstrumentationScope>) -> (Value, Value, Value) {
fn scope_to_pipeline_value(scope: Option<InstrumentationScope>) -> (VrlValue, VrlValue, VrlValue) {
scope
.map(|x| {
(
Value::Object(key_value_to_map(x.attributes)),
Value::String(x.version),
Value::String(x.name),
VrlValue::Object(key_value_to_map(x.attributes)),
VrlValue::Bytes(x.version.into()),
VrlValue::Bytes(x.name.into()),
)
})
.unwrap_or((Value::Null, Value::Null, Value::Null))
.unwrap_or((VrlValue::Null, VrlValue::Null, VrlValue::Null))
}
fn scope_to_jsonb(
@@ -121,53 +121,59 @@ fn scope_to_jsonb(
fn log_to_pipeline_value(
log: LogRecord,
resource_schema_url: Value,
resource_attr: Value,
scope_schema_url: Value,
scope_name: Value,
scope_version: Value,
scope_attrs: Value,
) -> Value {
let log_attrs = Value::Object(key_value_to_map(log.attributes));
let mut map = Map::new();
map.insert("Timestamp".to_string(), Value::from(log.time_unix_nano));
resource_schema_url: VrlValue,
resource_attr: VrlValue,
scope_schema_url: VrlValue,
scope_name: VrlValue,
scope_version: VrlValue,
scope_attrs: VrlValue,
) -> VrlValue {
let log_attrs = VrlValue::Object(key_value_to_map(log.attributes));
let mut map = BTreeMap::new();
map.insert(
"ObservedTimestamp".to_string(),
Value::from(log.observed_time_unix_nano),
"Timestamp".into(),
VrlValue::Integer(log.time_unix_nano as i64),
);
map.insert(
"ObservedTimestamp".into(),
VrlValue::Integer(log.observed_time_unix_nano as i64),
);
// need to be convert to string
map.insert(
"TraceId".to_string(),
Value::String(bytes_to_hex_string(&log.trace_id)),
"TraceId".into(),
VrlValue::Bytes(bytes_to_hex_string(&log.trace_id).into()),
);
map.insert(
"SpanId".to_string(),
Value::String(bytes_to_hex_string(&log.span_id)),
"SpanId".into(),
VrlValue::Bytes(bytes_to_hex_string(&log.span_id).into()),
);
map.insert("TraceFlags".to_string(), Value::from(log.flags));
map.insert("SeverityText".to_string(), Value::String(log.severity_text));
map.insert("TraceFlags".into(), VrlValue::Integer(log.flags as i64));
map.insert(
"SeverityNumber".to_string(),
Value::from(log.severity_number),
"SeverityText".into(),
VrlValue::Bytes(log.severity_text.into()),
);
map.insert(
"SeverityNumber".into(),
VrlValue::Integer(log.severity_number as i64),
);
// need to be convert to string
map.insert(
"Body".to_string(),
"Body".into(),
log.body
.as_ref()
.map(|x| Value::String(log_body_to_string(x)))
.unwrap_or(Value::Null),
.map(|x| VrlValue::Bytes(log_body_to_string(x).into()))
.unwrap_or(VrlValue::Null),
);
map.insert("ResourceSchemaUrl".to_string(), resource_schema_url);
map.insert("ResourceSchemaUrl".into(), resource_schema_url);
map.insert("ResourceAttributes".to_string(), resource_attr);
map.insert("ScopeSchemaUrl".to_string(), scope_schema_url);
map.insert("ScopeName".to_string(), scope_name);
map.insert("ScopeVersion".to_string(), scope_version);
map.insert("ScopeAttributes".to_string(), scope_attrs);
map.insert("LogAttributes".to_string(), log_attrs);
Value::Object(map)
map.insert("ResourceAttributes".into(), resource_attr);
map.insert("ScopeSchemaUrl".into(), scope_schema_url);
map.insert("ScopeName".into(), scope_name);
map.insert("ScopeVersion".into(), scope_version);
map.insert("ScopeAttributes".into(), scope_attrs);
map.insert("LogAttributes".into(), log_attrs);
VrlValue::Object(map)
}
fn build_otlp_logs_identity_schema() -> Vec<ColumnSchema> {
@@ -622,18 +628,18 @@ fn merge_values(
/// transform otlp logs request to pipeline value
/// https://opentelemetry.io/docs/concepts/signals/logs/
fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<Value> {
fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<VrlValue> {
let mut result = Vec::new();
for r in request.resource_logs {
let resource_attr = r
.resource
.map(|x| Value::Object(key_value_to_map(x.attributes)))
.unwrap_or(Value::Null);
let resource_schema_url = Value::String(r.schema_url);
.map(|x| VrlValue::Object(key_value_to_map(x.attributes)))
.unwrap_or(VrlValue::Null);
let resource_schema_url = VrlValue::Bytes(r.schema_url.into());
for scope_logs in r.scope_logs {
let (scope_attrs, scope_version, scope_name) =
scope_to_pipeline_value(scope_logs.scope);
let scope_schema_url = Value::String(scope_logs.schema_url);
let scope_schema_url = VrlValue::Bytes(scope_logs.schema_url.into());
for log in scope_logs.log_records {
let value = log_to_pipeline_value(
log,
@@ -652,43 +658,39 @@ fn parse_export_logs_service_request(request: ExportLogsServiceRequest) -> Vec<V
}
// convert AnyValue to pipeline value
fn any_value_to_pipeline_value(value: any_value::Value) -> Value {
fn any_value_to_vrl_value(value: any_value::Value) -> VrlValue {
match value {
any_value::Value::StringValue(s) => Value::String(s),
any_value::Value::IntValue(i) => Value::from(i),
any_value::Value::DoubleValue(d) => Value::from(d),
any_value::Value::BoolValue(b) => Value::Bool(b),
any_value::Value::ArrayValue(a) => {
let values = a
any_value::Value::StringValue(s) => VrlValue::Bytes(s.into()),
any_value::Value::IntValue(i) => VrlValue::Integer(i),
any_value::Value::DoubleValue(d) => VrlValue::Float(NotNan::new(d).unwrap()),
any_value::Value::BoolValue(b) => VrlValue::Boolean(b),
any_value::Value::ArrayValue(array_value) => {
let values = array_value
.values
.into_iter()
.map(|v| match v.value {
Some(value) => any_value_to_pipeline_value(value),
None => Value::Null,
})
.filter_map(|v| v.value.map(any_value_to_vrl_value))
.collect();
Value::Array(values)
VrlValue::Array(values)
}
any_value::Value::KvlistValue(kv) => {
let value = key_value_to_map(kv.values);
Value::Object(value)
any_value::Value::KvlistValue(key_value_list) => {
VrlValue::Object(key_value_to_map(key_value_list.values))
}
any_value::Value::BytesValue(b) => Value::String(bytes_to_hex_string(&b)),
any_value::Value::BytesValue(items) => VrlValue::Bytes(Bytes::from(items)),
}
}
// convert otlp keyValue vec to map
fn key_value_to_map(key_values: Vec<KeyValue>) -> Map<String, Value> {
let mut map = Map::new();
fn key_value_to_map(key_values: Vec<KeyValue>) -> BTreeMap<KeyString, VrlValue> {
let mut map = BTreeMap::new();
for kv in key_values {
let value = match kv.value {
Some(value) => match value.value {
Some(value) => any_value_to_pipeline_value(value),
None => Value::Null,
Some(value) => any_value_to_vrl_value(value),
None => VrlValue::Null,
},
None => Value::Null,
None => VrlValue::Null,
};
map.insert(kv.key.clone(), value);
map.insert(kv.key.into(), value);
}
map
}

View File

@@ -20,12 +20,13 @@ use api::greptime_proto;
use api::v1::{ColumnDataType, ColumnSchema, RowInsertRequest, Rows, SemanticType};
use common_time::timestamp::TimeUnit;
use pipeline::{
unwrap_or_continue_if_err, ContextReq, DispatchedTo, Pipeline, PipelineContext,
PipelineDefinition, PipelineExecOutput, SchemaInfo, TransformedOutput, TransformerMode, Value,
GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
identity_pipeline, unwrap_or_continue_if_err, ContextReq, DispatchedTo, Pipeline,
PipelineContext, PipelineDefinition, PipelineExecOutput, SchemaInfo, TransformedOutput,
TransformerMode, GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME,
};
use session::context::{Channel, QueryContextRef};
use snafu::ResultExt;
use vrl::value::Value as VrlValue;
use crate::error::{CatalogSnafu, PipelineSnafu, Result};
use crate::http::event::PipelineIngestRequest;
@@ -93,7 +94,7 @@ async fn run_identity_pipeline(
.await
.context(CatalogSnafu)?
};
pipeline::identity_pipeline(data_array, table, pipeline_ctx)
identity_pipeline(data_array, table, pipeline_ctx)
.map(|opt_map| ContextReq::from_opt_map(opt_map, table_name))
.context(PipelineSnafu)
}
@@ -117,7 +118,7 @@ async fn run_custom_pipeline(
} = pipeline_req;
let arr_len = pipeline_maps.len();
let mut transformed_map = HashMap::new();
let mut dispatched: BTreeMap<DispatchedTo, Vec<Value>> = BTreeMap::new();
let mut dispatched: BTreeMap<DispatchedTo, Vec<VrlValue>> = BTreeMap::new();
let mut schema_info = match pipeline.transformer() {
TransformerMode::GreptimeTransformer(greptime_transformer) => {

View File

@@ -20,12 +20,15 @@ use std::slice;
use api::prom_store::remote::Sample;
use bytes::{Buf, Bytes};
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition, Value};
use common_telemetry::warn;
use pipeline::{ContextReq, GreptimePipelineParams, PipelineContext, PipelineDefinition};
use prost::encoding::message::merge;
use prost::encoding::{decode_key, decode_varint, WireType};
use prost::DecodeError;
use session::context::QueryContextRef;
use snafu::OptionExt;
use vrl::prelude::NotNan;
use vrl::value::{KeyString, Value as VrlValue};
use crate::error::InternalSnafu;
use crate::http::event::PipelineIngestRequest;
@@ -342,7 +345,7 @@ impl PromWriteRequest {
/// let's keep it that way for now.
pub struct PromSeriesProcessor {
pub(crate) use_pipeline: bool,
pub(crate) table_values: BTreeMap<String, Vec<Value>>,
pub(crate) table_values: BTreeMap<String, Vec<VrlValue>>,
// optional fields for pipeline
pub(crate) pipeline_handler: Option<PipelineHandlerRef>,
@@ -379,29 +382,33 @@ impl PromSeriesProcessor {
series: &mut PromTimeSeries,
prom_validation_mode: PromValidationMode,
) -> Result<(), DecodeError> {
let mut vec_pipeline_map: Vec<Value> = Vec::new();
let mut vec_pipeline_map = Vec::new();
let mut pipeline_map = BTreeMap::new();
for l in series.labels.iter() {
let name = prom_validation_mode.decode_string(&l.name)?;
let value = prom_validation_mode.decode_string(&l.value)?;
pipeline_map.insert(name, Value::String(value));
pipeline_map.insert(KeyString::from(name), VrlValue::Bytes(value.into()));
}
let one_sample = series.samples.len() == 1;
for s in series.samples.iter() {
// skip NaN value
if s.value.is_nan() {
let Ok(value) = NotNan::new(s.value) else {
warn!("Invalid float value: {}", s.value);
continue;
}
};
let timestamp = s.timestamp;
pipeline_map.insert(GREPTIME_TIMESTAMP.to_string(), Value::Int64(timestamp));
pipeline_map.insert(GREPTIME_VALUE.to_string(), Value::Float64(s.value));
pipeline_map.insert(
KeyString::from(GREPTIME_TIMESTAMP),
VrlValue::Integer(timestamp),
);
pipeline_map.insert(KeyString::from(GREPTIME_VALUE), VrlValue::Float(value));
if one_sample {
vec_pipeline_map.push(Value::Map(pipeline_map.into()));
vec_pipeline_map.push(VrlValue::Object(pipeline_map));
break;
} else {
vec_pipeline_map.push(Value::Map(pipeline_map.clone().into()));
vec_pipeline_map.push(VrlValue::Object(pipeline_map.clone()));
}
}