feat: impl proc macro range_fn and some aggr_over_time functions (#1072)

* impl range_fn proc macro

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl some aggr_over_time fn

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl present_over_time and absent_over_time

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* accomplish planner, and correct type cast

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* document the macro

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix styles

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update irate/idelta test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add test cases

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2023-03-07 23:39:45 +08:00
committed by GitHub
parent 819b60ca13
commit 3a527c0fd5
10 changed files with 688 additions and 36 deletions

View File

@@ -10,6 +10,7 @@ proc-macro = true
[dependencies]
quote = "1.0"
syn = "1.0"
proc-macro2 = "1.0"
[dev-dependencies]
arc-swap = "1.0"

View File

@@ -12,8 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod range_fn;
use proc_macro::TokenStream;
use quote::{quote, quote_spanned};
use range_fn::process_range_fn;
use syn::parse::Parser;
use syn::spanned::Spanned;
use syn::{parse_macro_input, DeriveInput, ItemStruct};
@@ -83,3 +86,31 @@ pub fn as_aggr_func_creator(_args: TokenStream, input: TokenStream) -> TokenStre
}
.into()
}
/// Attribute macro to convert an arithimetic function to a range function. The annotated function
/// should accept servaral arrays as input and return a single value as output. This procedure
/// macro can works on any number of input parameters. Return type can be either primitive type
/// or wrapped in `Option`.
///
/// # Example
/// Take `count_over_time()` in PromQL as an example:
/// ```rust, ignore
/// /// The count of all values in the specified interval.
/// #[range_fn(
/// name = "CountOverTime",
/// ret = "Float64Array",
/// display_name = "prom_count_over_time"
/// )]
/// pub fn count_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> f64 {
/// values.len() as f64
/// }
/// ```
///
/// # Arguments
/// - `name`: The name of the generated [ScalarUDF] struct.
/// - `ret`: The return type of the generated UDF function.
/// - `display_name`: The display name of the generated UDF function.
#[proc_macro_attribute]
pub fn range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
process_range_fn(args, input)
}

View File

@@ -0,0 +1,230 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::quote;
use syn::punctuated::Punctuated;
use syn::spanned::Spanned;
use syn::token::Comma;
use syn::{
parse_macro_input, Attribute, AttributeArgs, FnArg, Ident, ItemFn, Meta, MetaNameValue,
NestedMeta, Signature, Type, TypeReference, Visibility,
};
/// Internal util macro to early return on error.
macro_rules! ok {
($item:expr) => {
match $item {
Ok(item) => item,
Err(e) => return e.into_compile_error().into(),
}
};
}
pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
// extract arg map
let arg_pairs = parse_macro_input!(args as AttributeArgs);
let arg_span = arg_pairs[0].span();
let arg_map = ok!(extract_arg_map(arg_pairs));
// decompose the fn block
let compute_fn = parse_macro_input!(input as ItemFn);
let ItemFn {
attrs,
vis,
sig,
block,
} = compute_fn;
// extract fn arg list
let Signature {
inputs,
ident: fn_name,
..
} = &sig;
let arg_types = ok!(extract_input_types(inputs));
// build the struct and its impl block
let struct_code = build_struct(
attrs,
vis,
ok!(get_ident(&arg_map, "name", arg_span)),
ok!(get_ident(&arg_map, "display_name", arg_span)),
);
let calc_fn_code = build_calc_fn(
ok!(get_ident(&arg_map, "name", arg_span)),
arg_types,
fn_name.clone(),
ok!(get_ident(&arg_map, "ret", arg_span)),
);
// preserve this fn, but remove its `pub` modifier
let input_fn_code: TokenStream = quote! {
#sig { #block }
}
.into();
let mut result = TokenStream::new();
result.extend(struct_code);
result.extend(calc_fn_code);
result.extend(input_fn_code);
result
}
/// Extract a String <-> Ident map from the attribute args.
fn extract_arg_map(args: Vec<NestedMeta>) -> Result<HashMap<String, Ident>, syn::Error> {
args.into_iter()
.map(|meta| {
if let NestedMeta::Meta(Meta::NameValue(MetaNameValue { path, lit, .. })) = meta {
let name = path.get_ident().unwrap().to_string();
let ident = match lit {
syn::Lit::Str(lit_str) => lit_str.parse::<Ident>(),
_ => Err(syn::Error::new(
lit.span(),
"Unexpected attribute format. Expected `name = \"value\"`",
)),
}?;
Ok((name, ident))
} else {
Err(syn::Error::new(
meta.span(),
"Unexpected attribute format. Expected `name = \"value\"`",
))
}
})
.collect::<Result<HashMap<String, Ident>, syn::Error>>()
}
/// Helper function to get an Ident from the previous arg map.
fn get_ident(map: &HashMap<String, Ident>, key: &str, span: Span) -> Result<Ident, syn::Error> {
map.get(key)
.cloned()
.ok_or_else(|| syn::Error::new(span, format!("Expect attribute {key} but not found")))
}
/// Extract the argument list from the annotated function.
fn extract_input_types(inputs: &Punctuated<FnArg, Comma>) -> Result<Vec<Type>, syn::Error> {
inputs
.iter()
.map(|arg| match arg {
FnArg::Receiver(receiver) => Err(syn::Error::new(receiver.span(), "expected bool")),
FnArg::Typed(pat_type) => Ok(*pat_type.ty.clone()),
})
.collect()
}
fn build_struct(
attrs: Vec<Attribute>,
vis: Visibility,
name: Ident,
display_name_ident: Ident,
) -> TokenStream {
let display_name = display_name_ident.to_string();
quote! {
#(#attrs)*
#[derive(Debug)]
#vis struct #name {}
impl #name {
pub const fn name() -> &'static str {
#display_name
}
pub fn scalar_udf() -> ScalarUDF {
ScalarUDF {
name: Self::name().to_string(),
signature: Signature::new(
TypeSignature::Exact(Self::input_type()),
Volatility::Immutable,
),
return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))),
fun: Arc::new(Self::calc),
}
}
// TODO(ruihang): this should be parameterized
// time index column and value column
fn input_type() -> Vec<DataType> {
vec![
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
RangeArray::convert_data_type(DataType::Float64),
]
}
// TODO(ruihang): this should be parameterized
fn return_type() -> DataType {
DataType::Float64
}
}
}
.into()
}
fn build_calc_fn(
name: Ident,
param_types: Vec<Type>,
fn_name: Ident,
ret_type: Ident,
) -> TokenStream {
let param_names = param_types
.iter()
.enumerate()
.map(|(i, ty)| Ident::new(&format!("param_{}", i), ty.span()))
.collect::<Vec<_>>();
let unref_param_types = param_types
.iter()
.map(|ty| {
if let Type::Reference(TypeReference { elem, .. }) = ty {
elem.as_ref().clone()
} else {
ty.clone()
}
})
.collect::<Vec<_>>();
let num_params = param_types.len();
let param_numbers = (0..num_params).collect::<Vec<_>>();
let range_array_names = param_names
.iter()
.map(|name| Ident::new(&format!("{}_range_array", name), name.span()))
.collect::<Vec<_>>();
let first_range_array_name = range_array_names.first().unwrap().clone();
quote! {
impl #name {
fn calc(input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
assert_eq!(input.len(), #num_params);
#( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.data().clone().into())?; )*
// TODO(ruihang): add ensure!()
let mut result_array = Vec::new();
for index in 0..#first_range_array_name.len(){
#( let #param_names = #range_array_names.get(index).unwrap().as_any().downcast_ref::<#unref_param_types>().unwrap().clone(); )*
// TODO(ruihang): add ensure!() to check length
let result = #fn_name(#( &#param_names, )*);
result_array.push(result);
}
let result = ColumnarValue::Array(Arc::new(#ret_type::from_iter(result_array)));
Ok(result)
}
}
}
.into()
}