Skip to main content

pipeline/etl/processor/
vrl_processor.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::cell::RefCell;
16use std::collections::BTreeMap;
17
18use chrono_tz::Tz;
19use once_cell::sync::Lazy;
20use snafu::{OptionExt, ensure};
21use vrl::compiler::runtime::Runtime;
22use vrl::compiler::{Program, TargetValue, compile};
23use vrl::diagnostic::Formatter;
24use vrl::prelude::TimeZone;
25use vrl::value::{Kind, Secrets, Value as VrlValue};
26
27use crate::error::{
28    CompileVrlSnafu, Error, ExecuteVrlSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu,
29    VrlReturnValueSnafu,
30};
31use crate::etl::processor::yaml_string;
32
33pub(crate) const PROCESSOR_VRL: &str = "vrl";
34const SOURCE: &str = "source";
35
36static UTC_TIMEZONE: Lazy<TimeZone> = Lazy::new(|| TimeZone::Named(Tz::UTC));
37
38thread_local! {
39    static VRL_RUNTIME: RefCell<Runtime> = RefCell::new(Runtime::default());
40}
41
42#[derive(Debug)]
43pub struct VrlProcessor {
44    source: String,
45    program: Program,
46}
47
48impl VrlProcessor {
49    pub fn new(source: String) -> Result<Self> {
50        let fns = vrl::stdlib::all();
51
52        let compile_result = compile(&source, &fns).map_err(|e| {
53            CompileVrlSnafu {
54                msg: Formatter::new(&source, e).to_string(),
55            }
56            .build()
57        })?;
58
59        let program = compile_result.program;
60
61        // check if the return value is have regex
62        let result_def = program.final_type_info().result;
63        let kind = result_def.kind();
64        // Check if the return type could possibly be an object or array.
65        // We use contains_* methods since VRL type inference may return
66        // a Kind that represents multiple possible types.
67        ensure!(
68            kind.contains_object() || kind.contains_array(),
69            VrlReturnValueSnafu {
70                result_kind: kind.clone(),
71            }
72        );
73        check_regex_output(kind)?;
74
75        Ok(Self { source, program })
76    }
77
78    pub fn resolve(&self, value: VrlValue) -> Result<VrlValue> {
79        let mut target = TargetValue {
80            value,
81            metadata: VrlValue::Object(BTreeMap::new()),
82            secrets: Secrets::default(),
83        };
84
85        let re = VRL_RUNTIME
86            .with(|runtime| {
87                let mut runtime = runtime.borrow_mut();
88                runtime.clear();
89                let result = runtime.resolve(&mut target, &self.program, &UTC_TIMEZONE);
90                runtime.clear();
91                result
92            })
93            .map_err(|e| {
94                ExecuteVrlSnafu {
95                    msg: e.get_expression_error().to_string(),
96                }
97                .build()
98            })?;
99
100        Ok(re)
101    }
102}
103
104impl TryFrom<&yaml_rust::yaml::Hash> for VrlProcessor {
105    type Error = Error;
106
107    fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self> {
108        let mut source = String::new();
109        for (k, v) in value.iter() {
110            let key = k
111                .as_str()
112                .with_context(|| KeyMustBeStringSnafu { k: k.clone() })?;
113            if key == SOURCE {
114                source = yaml_string(v, SOURCE)?;
115            }
116        }
117        let processor = VrlProcessor::new(source)?;
118        Ok(processor)
119    }
120}
121
122impl crate::etl::processor::Processor for VrlProcessor {
123    fn kind(&self) -> &str {
124        PROCESSOR_VRL
125    }
126
127    fn ignore_missing(&self) -> bool {
128        true
129    }
130
131    fn exec_mut(&self, val: VrlValue) -> Result<VrlValue> {
132        self.resolve(val)
133    }
134}
135
136fn check_regex_output(output_kind: &Kind) -> Result<()> {
137    if output_kind.is_regex() {
138        return VrlRegexValueSnafu.fail();
139    }
140
141    if let Some(arr) = output_kind.as_array() {
142        let k = arr.known();
143        for v in k.values() {
144            check_regex_output(v)?
145        }
146    }
147
148    if let Some(obj) = output_kind.as_object() {
149        let k = obj.known();
150        for v in k.values() {
151            check_regex_output(v)?
152        }
153    }
154
155    Ok(())
156}
157
158#[cfg(test)]
159mod tests {
160
161    use vrl::prelude::Bytes;
162    use vrl::value::KeyString;
163
164    use super::*;
165
166    #[test]
167    fn test_vrl() {
168        let source = r#"
169.name.a = .user_info.name
170.name.b = .user_info.name
171del(.user_info)
172.timestamp = now()
173.
174"#;
175
176        let v = VrlProcessor::new(source.to_string());
177        assert!(v.is_ok());
178        let v = v.unwrap();
179
180        let mut n = BTreeMap::new();
181        n.insert(
182            KeyString::from("name"),
183            VrlValue::Bytes(Bytes::from("certain_name")),
184        );
185
186        let mut m = BTreeMap::new();
187        m.insert(KeyString::from("user_info"), VrlValue::Object(n));
188
189        let re = v.resolve(VrlValue::Object(m));
190        assert!(re.is_ok());
191        let re = re.unwrap();
192
193        assert!(matches!(re, VrlValue::Object(_)));
194        let re = re.as_object().unwrap();
195        assert!(re.get("name").is_some());
196        let name = re.get("name").unwrap();
197        let name = name.as_object().unwrap();
198        assert!(matches!(name.get("a").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
199        assert!(matches!(name.get("b").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
200        assert!(re.get("timestamp").is_some());
201        let timestamp = re.get("timestamp").unwrap();
202        assert!(matches!(timestamp, VrlValue::Timestamp(_)));
203    }
204
205    #[test]
206    fn test_yaml_to_vrl() {
207        let yaml = r#"
208processors:
209  - vrl:
210      source: |
211        .name.a = .user_info.name
212        .name.b = .user_info.name
213        del(.user_info)
214        .timestamp = now()
215        .
216"#;
217        let y = yaml_rust::YamlLoader::load_from_str(yaml).unwrap();
218        let vrl_processor_yaml = y
219            .first()
220            .and_then(|x| x.as_hash())
221            .and_then(|x| x.get(&yaml_rust::Yaml::String("processors".to_string())))
222            .and_then(|x| x.as_vec())
223            .and_then(|x| x.first())
224            .and_then(|x| x.as_hash())
225            .and_then(|x| x.get(&yaml_rust::Yaml::String("vrl".to_string())))
226            .and_then(|x| x.as_hash())
227            .unwrap();
228
229        let vrl = VrlProcessor::try_from(vrl_processor_yaml);
230        assert!(vrl.is_ok());
231        let vrl = vrl.unwrap();
232
233        assert_eq!(
234            vrl.source,
235            ".name.a = .user_info.name\n.name.b = .user_info.name\ndel(.user_info)\n.timestamp = now()\n.\n"
236        );
237    }
238
239    #[test]
240    fn test_regex() {
241        let source = r#"
242.re = r'(?i)^Hello, World!$'
243del(.re)
244.re = r'(?i)^Hello, World!$'
245.
246"#;
247
248        let v = VrlProcessor::new(source.to_string());
249        assert!(v.is_err());
250    }
251}