pipeline/etl/processor/
vrl_processor.rs1use std::cell::RefCell;
16use std::collections::BTreeMap;
17
18use chrono_tz::Tz;
19use once_cell::sync::Lazy;
20use snafu::{OptionExt, ensure};
21use vrl::compiler::runtime::Runtime;
22use vrl::compiler::{Program, TargetValue, compile};
23use vrl::diagnostic::Formatter;
24use vrl::prelude::TimeZone;
25use vrl::value::{Kind, Secrets, Value as VrlValue};
26
27use crate::error::{
28 CompileVrlSnafu, Error, ExecuteVrlSnafu, KeyMustBeStringSnafu, Result, VrlRegexValueSnafu,
29 VrlReturnValueSnafu,
30};
31use crate::etl::processor::yaml_string;
32
33pub(crate) const PROCESSOR_VRL: &str = "vrl";
34const SOURCE: &str = "source";
35
36static UTC_TIMEZONE: Lazy<TimeZone> = Lazy::new(|| TimeZone::Named(Tz::UTC));
37
38thread_local! {
39 static VRL_RUNTIME: RefCell<Runtime> = RefCell::new(Runtime::default());
40}
41
42#[derive(Debug)]
43pub struct VrlProcessor {
44 source: String,
45 program: Program,
46}
47
48impl VrlProcessor {
49 pub fn new(source: String) -> Result<Self> {
50 let fns = vrl::stdlib::all();
51
52 let compile_result = compile(&source, &fns).map_err(|e| {
53 CompileVrlSnafu {
54 msg: Formatter::new(&source, e).to_string(),
55 }
56 .build()
57 })?;
58
59 let program = compile_result.program;
60
61 let result_def = program.final_type_info().result;
63 let kind = result_def.kind();
64 ensure!(
68 kind.contains_object() || kind.contains_array(),
69 VrlReturnValueSnafu {
70 result_kind: kind.clone(),
71 }
72 );
73 check_regex_output(kind)?;
74
75 Ok(Self { source, program })
76 }
77
78 pub fn resolve(&self, value: VrlValue) -> Result<VrlValue> {
79 let mut target = TargetValue {
80 value,
81 metadata: VrlValue::Object(BTreeMap::new()),
82 secrets: Secrets::default(),
83 };
84
85 let re = VRL_RUNTIME
86 .with(|runtime| {
87 let mut runtime = runtime.borrow_mut();
88 runtime.clear();
89 let result = runtime.resolve(&mut target, &self.program, &UTC_TIMEZONE);
90 runtime.clear();
91 result
92 })
93 .map_err(|e| {
94 ExecuteVrlSnafu {
95 msg: e.get_expression_error().to_string(),
96 }
97 .build()
98 })?;
99
100 Ok(re)
101 }
102}
103
104impl TryFrom<&yaml_rust::yaml::Hash> for VrlProcessor {
105 type Error = Error;
106
107 fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self> {
108 let mut source = String::new();
109 for (k, v) in value.iter() {
110 let key = k
111 .as_str()
112 .with_context(|| KeyMustBeStringSnafu { k: k.clone() })?;
113 if key == SOURCE {
114 source = yaml_string(v, SOURCE)?;
115 }
116 }
117 let processor = VrlProcessor::new(source)?;
118 Ok(processor)
119 }
120}
121
122impl crate::etl::processor::Processor for VrlProcessor {
123 fn kind(&self) -> &str {
124 PROCESSOR_VRL
125 }
126
127 fn ignore_missing(&self) -> bool {
128 true
129 }
130
131 fn exec_mut(&self, val: VrlValue) -> Result<VrlValue> {
132 self.resolve(val)
133 }
134}
135
136fn check_regex_output(output_kind: &Kind) -> Result<()> {
137 if output_kind.is_regex() {
138 return VrlRegexValueSnafu.fail();
139 }
140
141 if let Some(arr) = output_kind.as_array() {
142 let k = arr.known();
143 for v in k.values() {
144 check_regex_output(v)?
145 }
146 }
147
148 if let Some(obj) = output_kind.as_object() {
149 let k = obj.known();
150 for v in k.values() {
151 check_regex_output(v)?
152 }
153 }
154
155 Ok(())
156}
157
158#[cfg(test)]
159mod tests {
160
161 use vrl::prelude::Bytes;
162 use vrl::value::KeyString;
163
164 use super::*;
165
166 #[test]
167 fn test_vrl() {
168 let source = r#"
169.name.a = .user_info.name
170.name.b = .user_info.name
171del(.user_info)
172.timestamp = now()
173.
174"#;
175
176 let v = VrlProcessor::new(source.to_string());
177 assert!(v.is_ok());
178 let v = v.unwrap();
179
180 let mut n = BTreeMap::new();
181 n.insert(
182 KeyString::from("name"),
183 VrlValue::Bytes(Bytes::from("certain_name")),
184 );
185
186 let mut m = BTreeMap::new();
187 m.insert(KeyString::from("user_info"), VrlValue::Object(n));
188
189 let re = v.resolve(VrlValue::Object(m));
190 assert!(re.is_ok());
191 let re = re.unwrap();
192
193 assert!(matches!(re, VrlValue::Object(_)));
194 let re = re.as_object().unwrap();
195 assert!(re.get("name").is_some());
196 let name = re.get("name").unwrap();
197 let name = name.as_object().unwrap();
198 assert!(matches!(name.get("a").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
199 assert!(matches!(name.get("b").unwrap(), VrlValue::Bytes(x) if x == "certain_name"));
200 assert!(re.get("timestamp").is_some());
201 let timestamp = re.get("timestamp").unwrap();
202 assert!(matches!(timestamp, VrlValue::Timestamp(_)));
203 }
204
205 #[test]
206 fn test_yaml_to_vrl() {
207 let yaml = r#"
208processors:
209 - vrl:
210 source: |
211 .name.a = .user_info.name
212 .name.b = .user_info.name
213 del(.user_info)
214 .timestamp = now()
215 .
216"#;
217 let y = yaml_rust::YamlLoader::load_from_str(yaml).unwrap();
218 let vrl_processor_yaml = y
219 .first()
220 .and_then(|x| x.as_hash())
221 .and_then(|x| x.get(&yaml_rust::Yaml::String("processors".to_string())))
222 .and_then(|x| x.as_vec())
223 .and_then(|x| x.first())
224 .and_then(|x| x.as_hash())
225 .and_then(|x| x.get(&yaml_rust::Yaml::String("vrl".to_string())))
226 .and_then(|x| x.as_hash())
227 .unwrap();
228
229 let vrl = VrlProcessor::try_from(vrl_processor_yaml);
230 assert!(vrl.is_ok());
231 let vrl = vrl.unwrap();
232
233 assert_eq!(
234 vrl.source,
235 ".name.a = .user_info.name\n.name.b = .user_info.name\ndel(.user_info)\n.timestamp = now()\n.\n"
236 );
237 }
238
239 #[test]
240 fn test_regex() {
241 let source = r#"
242.re = r'(?i)^Hello, World!$'
243del(.re)
244.re = r'(?i)^Hello, World!$'
245.
246"#;
247
248 let v = VrlProcessor::new(source.to_string());
249 assert!(v.is_err());
250 }
251}