mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-27 08:29:59 +00:00
feat(flow): add eval_batch for ScalarExpr (#4551)
* refactor: better perf flow * feat(WIP): batching proc * feat: UnaryFunc::eval_batch untested * feat: BinaryFunc::eval_batch untested * feat: VariadicFunc::eval_batch un tested * feat: literal eval_batch * refactor: move DfScalarFunc to separate file * chore: remove unused imports * feat: eval_batch df func&ifthen * chore: remove unused file * refactor: use Batch type * chore: remove unused * chore: remove a done TODO * refactor: per review * chore: import * refactor: eval_batch if then * chore: typo
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -3798,6 +3798,7 @@ name = "flow"
|
||||
version = "0.9.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
|
||||
@@ -9,6 +9,7 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
arrow.workspace = true
|
||||
arrow-schema.workspace = true
|
||||
async-recursion = "1.0"
|
||||
async-trait.workspace = true
|
||||
|
||||
@@ -16,32 +16,21 @@
|
||||
//!
|
||||
//! And the [`Context`] is the environment for the render process, it contains all the necessary information for the render process
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::value::{ListValue, Value};
|
||||
use hydroflow::futures::SinkExt;
|
||||
use hydroflow::lattices::cc_traits::Get;
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use hydroflow::scheduled::port::{PortCtx, SEND};
|
||||
use itertools::Itertools;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use super::state::Scheduler;
|
||||
use crate::compute::state::DataflowState;
|
||||
use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
|
||||
use crate::error::{Error, EvalSnafu, InvalidQuerySnafu, NotImplementedSnafu, PlanSnafu};
|
||||
use crate::expr::error::{DataTypeSnafu, InternalSnafu};
|
||||
use crate::expr::{
|
||||
self, EvalError, GlobalId, LocalId, MapFilterProject, MfpPlan, SafeMfpPlan, ScalarExpr,
|
||||
};
|
||||
use crate::plan::{AccumulablePlan, KeyValPlan, Plan, ReducePlan, TypedPlan};
|
||||
use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
|
||||
use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter, Arrangement};
|
||||
use crate::compute::types::{Collection, CollectionBundle, ErrCollector, Toff};
|
||||
use crate::error::{Error, InvalidQuerySnafu, NotImplementedSnafu};
|
||||
use crate::expr::{self, GlobalId, LocalId};
|
||||
use crate::plan::{Plan, TypedPlan};
|
||||
use crate::repr::{self, DiffRow};
|
||||
|
||||
mod map;
|
||||
mod reduce;
|
||||
@@ -218,20 +207,17 @@ mod test {
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
use common_time::DateTime;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use hydroflow::scheduled::handoff::VecHandoff;
|
||||
use pretty_assertions::{assert_eq, assert_ne};
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
use crate::expr::BinaryFunc;
|
||||
use crate::repr::Row;
|
||||
pub fn run_and_check(
|
||||
state: &mut DataflowState,
|
||||
df: &mut Hydroflow,
|
||||
time_range: Range<i64>,
|
||||
time_range: std::ops::Range<i64>,
|
||||
expected: BTreeMap<i64, Vec<DiffRow>>,
|
||||
output: Rc<RefCell<Vec<DiffRow>>>,
|
||||
) {
|
||||
|
||||
@@ -24,7 +24,7 @@ use crate::compute::state::Scheduler;
|
||||
use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
|
||||
use crate::error::{Error, PlanSnafu};
|
||||
use crate::expr::{EvalError, MapFilterProject, MfpPlan, ScalarExpr};
|
||||
use crate::plan::{Plan, TypedPlan};
|
||||
use crate::plan::TypedPlan;
|
||||
use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
|
||||
use crate::utils::ArrangeHandler;
|
||||
|
||||
@@ -206,8 +206,6 @@ fn eval_mfp_core(
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
@@ -216,6 +214,7 @@ mod test {
|
||||
use crate::compute::render::test::{get_output_handle, harness_test_ctx, run_and_check};
|
||||
use crate::compute::state::DataflowState;
|
||||
use crate::expr::{self, BinaryFunc, GlobalId};
|
||||
use crate::plan::Plan;
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
|
||||
/// test if temporal filter works properly
|
||||
|
||||
@@ -18,17 +18,15 @@ use std::ops::Range;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::value::{ListValue, Value};
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use hydroflow::scheduled::port::{PortCtx, SEND};
|
||||
use itertools::Itertools;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::compute::render::{Context, SubgraphArg};
|
||||
use crate::compute::state::Scheduler;
|
||||
use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
|
||||
use crate::error::{Error, PlanSnafu};
|
||||
use crate::expr::error::{DataAlreadyExpiredSnafu, DataTypeSnafu, InternalSnafu};
|
||||
use crate::expr::{AggregateExpr, EvalError, ScalarExpr};
|
||||
use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, Plan, ReducePlan, TypedPlan};
|
||||
use crate::expr::{EvalError, ScalarExpr};
|
||||
use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan, TypedPlan};
|
||||
use crate::repr::{self, DiffRow, KeyValDiffRow, RelationType, Row};
|
||||
use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter, KeyExpiryManager};
|
||||
|
||||
@@ -790,8 +788,6 @@ fn from_val_to_slice_idx(
|
||||
// TODO(discord9): add tests for accum ser/de
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
use common_time::{DateTime, Interval, Timestamp};
|
||||
use datatypes::data_type::{ConcreteDataType, ConcreteDataType as CDT};
|
||||
@@ -800,7 +796,10 @@ mod test {
|
||||
use super::*;
|
||||
use crate::compute::render::test::{get_output_handle, harness_test_ctx, run_and_check};
|
||||
use crate::compute::state::DataflowState;
|
||||
use crate::expr::{self, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, UnaryFunc};
|
||||
use crate::expr::{
|
||||
self, AggregateExpr, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, UnaryFunc,
|
||||
};
|
||||
use crate::plan::Plan;
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
|
||||
/// SELECT sum(number) FROM numbers_with_ts GROUP BY tumble(ts, '1 second', '2021-07-01 00:00:00')
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
|
||||
use common_telemetry::{debug, info};
|
||||
use common_telemetry::debug;
|
||||
use hydroflow::scheduled::graph_ext::GraphExt;
|
||||
use itertools::Itertools;
|
||||
use snafu::OptionExt;
|
||||
@@ -27,7 +27,7 @@ use crate::compute::render::Context;
|
||||
use crate::compute::types::{Arranged, Collection, CollectionBundle, Toff};
|
||||
use crate::error::{Error, PlanSnafu};
|
||||
use crate::expr::error::InternalSnafu;
|
||||
use crate::expr::{EvalError, GlobalId};
|
||||
use crate::expr::EvalError;
|
||||
use crate::repr::{DiffRow, Row, BROADCAST_CAP};
|
||||
|
||||
#[allow(clippy::mutable_key_type)]
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::{BTreeMap, BTreeSet, VecDeque};
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::rc::Rc;
|
||||
|
||||
use hydroflow::scheduled::graph::Hydroflow;
|
||||
|
||||
@@ -22,12 +22,11 @@ use hydroflow::scheduled::handoff::TeeingHandoff;
|
||||
use hydroflow::scheduled::port::RecvPort;
|
||||
use hydroflow::scheduled::SubgraphId;
|
||||
use itertools::Itertools;
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::compute::render::Context;
|
||||
use crate::expr::{EvalError, ScalarExpr};
|
||||
use crate::repr::DiffRow;
|
||||
use crate::utils::{ArrangeHandler, Arrangement};
|
||||
use crate::utils::ArrangeHandler;
|
||||
|
||||
pub type Toff<T = DiffRow> = TeeingHandoff<T>;
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
//! for declare Expression in dataflow, including map, reduce, id and join(TODO!) etc.
|
||||
|
||||
mod df_func;
|
||||
pub(crate) mod error;
|
||||
mod func;
|
||||
mod id;
|
||||
@@ -22,9 +23,92 @@ mod relation;
|
||||
mod scalar;
|
||||
mod signature;
|
||||
|
||||
pub(crate) use error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu};
|
||||
use datatypes::prelude::DataType;
|
||||
use datatypes::vectors::VectorRef;
|
||||
pub(crate) use df_func::{DfScalarFunction, RawDfScalarFn};
|
||||
pub(crate) use error::{EvalError, InvalidArgumentSnafu};
|
||||
pub(crate) use func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
|
||||
pub(crate) use id::{GlobalId, Id, LocalId};
|
||||
use itertools::Itertools;
|
||||
pub(crate) use linear::{MapFilterProject, MfpPlan, SafeMfpPlan};
|
||||
pub(crate) use relation::{AggregateExpr, AggregateFunc};
|
||||
pub(crate) use scalar::{DfScalarFunction, RawDfScalarFn, ScalarExpr, TypedExpr};
|
||||
pub(crate) use scalar::{ScalarExpr, TypedExpr};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::expr::error::DataTypeSnafu;
|
||||
|
||||
/// A batch of vectors with the same length but without schema, only useful in dataflow
|
||||
pub struct Batch {
|
||||
batch: Vec<VectorRef>,
|
||||
row_count: usize,
|
||||
}
|
||||
|
||||
impl Batch {
|
||||
pub fn new(batch: Vec<VectorRef>, row_count: usize) -> Self {
|
||||
Self { batch, row_count }
|
||||
}
|
||||
|
||||
pub fn batch(&self) -> &[VectorRef] {
|
||||
&self.batch
|
||||
}
|
||||
|
||||
pub fn row_count(&self) -> usize {
|
||||
self.row_count
|
||||
}
|
||||
|
||||
/// Slices the `Batch`, returning a new `Batch`.
|
||||
///
|
||||
/// # Panics
|
||||
/// This function panics if `offset + length > self.row_count()`.
|
||||
pub fn slice(&self, offset: usize, length: usize) -> Batch {
|
||||
let batch = self
|
||||
.batch()
|
||||
.iter()
|
||||
.map(|v| v.slice(offset, length))
|
||||
.collect_vec();
|
||||
Batch::new(batch, length)
|
||||
}
|
||||
|
||||
/// append another batch to self
|
||||
pub fn append_batch(&mut self, other: Batch) -> Result<(), EvalError> {
|
||||
ensure!(
|
||||
self.batch.len() == other.batch.len(),
|
||||
InvalidArgumentSnafu {
|
||||
reason: format!(
|
||||
"Expect two batch to have same numbers of column, found {} and {} columns",
|
||||
self.batch.len(),
|
||||
other.batch.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
let batch_builders = self
|
||||
.batch
|
||||
.iter()
|
||||
.map(|v| {
|
||||
v.data_type()
|
||||
.create_mutable_vector(self.row_count() + other.row_count())
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
let mut result = vec![];
|
||||
let zelf_row_count = self.row_count();
|
||||
let other_row_count = other.row_count();
|
||||
for (idx, mut builder) in batch_builders.into_iter().enumerate() {
|
||||
builder
|
||||
.extend_slice_of(self.batch()[idx].as_ref(), 0, zelf_row_count)
|
||||
.context(DataTypeSnafu {
|
||||
msg: "Failed to extend vector",
|
||||
})?;
|
||||
builder
|
||||
.extend_slice_of(other.batch()[idx].as_ref(), 0, other_row_count)
|
||||
.context(DataTypeSnafu {
|
||||
msg: "Failed to extend vector",
|
||||
})?;
|
||||
result.push(builder.to_vector());
|
||||
}
|
||||
self.batch = result;
|
||||
self.row_count = zelf_row_count + other_row_count;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
293
src/flow/src/expr/df_func.rs
Normal file
293
src/flow/src/expr/df_func.rs
Normal file
@@ -0,0 +1,293 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Porting Datafusion scalar function to our scalar function to be used in dataflow
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::RecordBatchOptions;
|
||||
use bytes::BytesMut;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::DfRecordBatch;
|
||||
use common_telemetry::debug;
|
||||
use datafusion_physical_expr::PhysicalExpr;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use prost::Message;
|
||||
use snafu::{IntoError, ResultExt};
|
||||
use substrait::error::{DecodeRelSnafu, EncodeRelSnafu};
|
||||
use substrait::substrait_proto_df::proto::expression::ScalarFunction;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::expr::error::{
|
||||
ArrowSnafu, DatafusionSnafu as EvalDatafusionSnafu, EvalError, ExternalSnafu,
|
||||
InvalidArgumentSnafu,
|
||||
};
|
||||
use crate::expr::{Batch, ScalarExpr};
|
||||
use crate::repr::RelationDesc;
|
||||
use crate::transform::{from_scalar_fn_to_df_fn_impl, FunctionExtensions};
|
||||
|
||||
/// A way to represent a scalar function that is implemented in Datafusion
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DfScalarFunction {
|
||||
/// The raw bytes encoded datafusion scalar function
|
||||
pub(crate) raw_fn: RawDfScalarFn,
|
||||
// TODO(discord9): directly from datafusion expr
|
||||
/// The implementation of the function
|
||||
pub(crate) fn_impl: Arc<dyn PhysicalExpr>,
|
||||
/// The input schema of the function
|
||||
pub(crate) df_schema: Arc<datafusion_common::DFSchema>,
|
||||
}
|
||||
|
||||
impl DfScalarFunction {
|
||||
pub fn new(raw_fn: RawDfScalarFn, fn_impl: Arc<dyn PhysicalExpr>) -> Result<Self, Error> {
|
||||
Ok(Self {
|
||||
df_schema: Arc::new(raw_fn.input_schema.to_df_schema()?),
|
||||
raw_fn,
|
||||
fn_impl,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn try_from_raw_fn(raw_fn: RawDfScalarFn) -> Result<Self, Error> {
|
||||
Ok(Self {
|
||||
fn_impl: raw_fn.get_fn_impl().await?,
|
||||
df_schema: Arc::new(raw_fn.input_schema.to_df_schema()?),
|
||||
raw_fn,
|
||||
})
|
||||
}
|
||||
|
||||
/// Evaluate a batch of expressions using input values
|
||||
pub fn eval_batch(&self, batch: &Batch, exprs: &[ScalarExpr]) -> Result<VectorRef, EvalError> {
|
||||
let row_count = batch.row_count();
|
||||
let batch: Vec<_> = exprs
|
||||
.iter()
|
||||
.map(|expr| expr.eval_batch(batch))
|
||||
.collect::<Result<_, _>>()?;
|
||||
|
||||
let schema = self.df_schema.inner().clone();
|
||||
|
||||
let arrays = batch
|
||||
.iter()
|
||||
.map(|array| array.to_arrow_array())
|
||||
.collect::<Vec<_>>();
|
||||
let rb = DfRecordBatch::try_new_with_options(schema, arrays, &RecordBatchOptions::new().with_row_count(Some(row_count))).map_err(|err| {
|
||||
ArrowSnafu {
|
||||
context:
|
||||
"Failed to create RecordBatch from values when eval_batch datafusion scalar function",
|
||||
}
|
||||
.into_error(err)
|
||||
})?;
|
||||
|
||||
let len = rb.num_rows();
|
||||
|
||||
let res = self.fn_impl.evaluate(&rb).map_err(|err| {
|
||||
EvalDatafusionSnafu {
|
||||
raw: err,
|
||||
context: "Failed to evaluate datafusion scalar function",
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let res = common_query::columnar_value::ColumnarValue::try_from(&res)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let res_vec = res
|
||||
.try_into_vector(len)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
Ok(res_vec)
|
||||
}
|
||||
|
||||
/// eval a list of expressions using input values
|
||||
fn eval_args(values: &[Value], exprs: &[ScalarExpr]) -> Result<Vec<Value>, EvalError> {
|
||||
exprs
|
||||
.iter()
|
||||
.map(|expr| expr.eval(values))
|
||||
.collect::<Result<_, _>>()
|
||||
}
|
||||
|
||||
// TODO(discord9): add RecordBatch support
|
||||
pub fn eval(&self, values: &[Value], exprs: &[ScalarExpr]) -> Result<Value, EvalError> {
|
||||
// first eval exprs to construct values to feed to datafusion
|
||||
let values: Vec<_> = Self::eval_args(values, exprs)?;
|
||||
if values.is_empty() {
|
||||
return InvalidArgumentSnafu {
|
||||
reason: "values is empty".to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
// TODO(discord9): make cols all array length of one
|
||||
let mut cols = vec![];
|
||||
for (idx, typ) in self
|
||||
.raw_fn
|
||||
.input_schema
|
||||
.typ()
|
||||
.column_types
|
||||
.iter()
|
||||
.enumerate()
|
||||
{
|
||||
let typ = typ.scalar_type();
|
||||
let mut array = typ.create_mutable_vector(1);
|
||||
array.push_value_ref(values[idx].as_value_ref());
|
||||
cols.push(array.to_vector().to_arrow_array());
|
||||
}
|
||||
let schema = self.df_schema.inner().clone();
|
||||
let rb = DfRecordBatch::try_new_with_options(
|
||||
schema,
|
||||
cols,
|
||||
&RecordBatchOptions::new().with_row_count(Some(1)),
|
||||
)
|
||||
.map_err(|err| {
|
||||
ArrowSnafu {
|
||||
context:
|
||||
"Failed to create RecordBatch from values when eval datafusion scalar function",
|
||||
}
|
||||
.into_error(err)
|
||||
})?;
|
||||
|
||||
let res = self.fn_impl.evaluate(&rb).map_err(|err| {
|
||||
EvalDatafusionSnafu {
|
||||
raw: err,
|
||||
context: "Failed to evaluate datafusion scalar function",
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let res = common_query::columnar_value::ColumnarValue::try_from(&res)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let res_vec = res
|
||||
.try_into_vector(1)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let res_val = res_vec
|
||||
.try_get(0)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
Ok(res_val)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct RawDfScalarFn {
|
||||
/// The raw bytes encoded datafusion scalar function
|
||||
pub(crate) f: bytes::BytesMut,
|
||||
/// The input schema of the function
|
||||
pub(crate) input_schema: RelationDesc,
|
||||
/// Extension contains mapping from function reference to function name
|
||||
pub(crate) extensions: FunctionExtensions,
|
||||
}
|
||||
|
||||
impl RawDfScalarFn {
|
||||
pub fn from_proto(
|
||||
f: &substrait::substrait_proto_df::proto::expression::ScalarFunction,
|
||||
input_schema: RelationDesc,
|
||||
extensions: FunctionExtensions,
|
||||
) -> Result<Self, Error> {
|
||||
let mut buf = BytesMut::new();
|
||||
f.encode(&mut buf)
|
||||
.context(EncodeRelSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(crate::error::ExternalSnafu)?;
|
||||
Ok(Self {
|
||||
f: buf,
|
||||
input_schema,
|
||||
extensions,
|
||||
})
|
||||
}
|
||||
async fn get_fn_impl(&self) -> Result<Arc<dyn PhysicalExpr>, Error> {
|
||||
let f = ScalarFunction::decode(&mut self.f.as_ref())
|
||||
.context(DecodeRelSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(crate::error::ExternalSnafu)?;
|
||||
debug!("Decoded scalar function: {:?}", f);
|
||||
|
||||
let input_schema = &self.input_schema;
|
||||
let extensions = &self.extensions;
|
||||
|
||||
from_scalar_fn_to_df_fn_impl(&f, input_schema, extensions).await
|
||||
}
|
||||
}
|
||||
|
||||
impl std::cmp::PartialEq for DfScalarFunction {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.raw_fn.eq(&other.raw_fn)
|
||||
}
|
||||
}
|
||||
|
||||
// can't derive Eq because of Arc<dyn PhysicalExpr> not eq, so implement it manually
|
||||
impl std::cmp::Eq for DfScalarFunction {}
|
||||
|
||||
impl std::cmp::PartialOrd for DfScalarFunction {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
impl std::cmp::Ord for DfScalarFunction {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.raw_fn.cmp(&other.raw_fn)
|
||||
}
|
||||
}
|
||||
impl std::hash::Hash for DfScalarFunction {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.raw_fn.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use substrait::substrait_proto_df::proto::expression::literal::LiteralType;
|
||||
use substrait::substrait_proto_df::proto::expression::{Literal, RexType};
|
||||
use substrait::substrait_proto_df::proto::function_argument::ArgType;
|
||||
use substrait::substrait_proto_df::proto::{Expression, FunctionArgument};
|
||||
|
||||
use super::*;
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_df_scalar_function() {
|
||||
let raw_scalar_func = ScalarFunction {
|
||||
function_reference: 0,
|
||||
arguments: vec![FunctionArgument {
|
||||
arg_type: Some(ArgType::Value(Expression {
|
||||
rex_type: Some(RexType::Literal(Literal {
|
||||
nullable: false,
|
||||
type_variation_reference: 0,
|
||||
literal_type: Some(LiteralType::I64(-1)),
|
||||
})),
|
||||
})),
|
||||
}],
|
||||
output_type: None,
|
||||
..Default::default()
|
||||
};
|
||||
let input_schema = RelationDesc::try_new(
|
||||
RelationType::new(vec![ColumnType::new_nullable(
|
||||
ConcreteDataType::null_datatype(),
|
||||
)]),
|
||||
vec!["null_column".to_string()],
|
||||
)
|
||||
.unwrap();
|
||||
let extensions = FunctionExtensions::from_iter(vec![(0, "abs")]);
|
||||
let raw_fn = RawDfScalarFn::from_proto(&raw_scalar_func, input_schema, extensions).unwrap();
|
||||
let df_func = DfScalarFunction::try_from_raw_fn(raw_fn).await.unwrap();
|
||||
assert_eq!(
|
||||
df_func
|
||||
.eval(&[Value::Null], &[ScalarExpr::Column(0)])
|
||||
.unwrap(),
|
||||
Value::Int64(1)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -14,17 +14,12 @@
|
||||
|
||||
//! Error handling for expression evaluation.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use arrow_schema::ArrowError;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_macro::stack_trace_debug;
|
||||
use common_telemetry::common_error::ext::ErrorExt;
|
||||
use common_telemetry::common_error::status_code::StatusCode;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{Location, ResultExt, Snafu};
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
fn is_send_sync() {
|
||||
fn check<T: Send + Sync>() {}
|
||||
@@ -113,6 +108,7 @@ pub enum EvalError {
|
||||
|
||||
#[snafu(display("Arrow error: {raw:?}, context: {context}"))]
|
||||
Arrow {
|
||||
#[snafu(source)]
|
||||
raw: ArrowError,
|
||||
context: String,
|
||||
#[snafu(implicit)]
|
||||
|
||||
@@ -15,17 +15,20 @@
|
||||
//! This module contains the definition of functions that can be used in expressions.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::OnceLock;
|
||||
use std::sync::{Arc, OnceLock};
|
||||
|
||||
use arrow::array::{ArrayRef, BooleanArray};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::debug;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{DateTime, Timestamp};
|
||||
use datafusion_expr::Operator;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::DataType;
|
||||
use datatypes::types::cast;
|
||||
use datatypes::types::cast::CastOption;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
BooleanVector, DateTimeVector, Helper, TimestampMillisecondVector, VectorRef,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::smallvec;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -34,12 +37,12 @@ use substrait::df_logical_plan::consumer::name_to_op;
|
||||
|
||||
use crate::error::{Error, ExternalSnafu, InvalidQuerySnafu, PlanSnafu};
|
||||
use crate::expr::error::{
|
||||
CastValueSnafu, DivisionByZeroSnafu, EvalError, InternalSnafu, OverflowSnafu,
|
||||
ArrowSnafu, CastValueSnafu, DataTypeSnafu, DivisionByZeroSnafu, EvalError, OverflowSnafu,
|
||||
TryFromValueSnafu, TypeMismatchSnafu,
|
||||
};
|
||||
use crate::expr::signature::{GenericFn, Signature};
|
||||
use crate::expr::{InvalidArgumentSnafu, ScalarExpr, TypedExpr};
|
||||
use crate::repr::{self, value_to_internal_ts, Row};
|
||||
use crate::expr::{Batch, InvalidArgumentSnafu, ScalarExpr, TypedExpr};
|
||||
use crate::repr::{self, value_to_internal_ts};
|
||||
|
||||
/// UnmaterializableFunc is a function that can't be eval independently,
|
||||
/// and require special handling
|
||||
@@ -221,6 +224,129 @@ impl UnaryFunc {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn eval_batch(&self, batch: &Batch, expr: &ScalarExpr) -> Result<VectorRef, EvalError> {
|
||||
let arg_col = expr.eval_batch(batch)?;
|
||||
match self {
|
||||
Self::Not => {
|
||||
let arrow_array = arg_col.to_arrow_array();
|
||||
let bool_array = arrow_array
|
||||
.as_any()
|
||||
.downcast_ref::<BooleanArray>()
|
||||
.context({
|
||||
TypeMismatchSnafu {
|
||||
expected: ConcreteDataType::boolean_datatype(),
|
||||
actual: arg_col.data_type(),
|
||||
}
|
||||
})?;
|
||||
let ret = arrow::compute::not(bool_array).context(ArrowSnafu { context: "not" })?;
|
||||
let ret = BooleanVector::from(ret);
|
||||
Ok(Arc::new(ret))
|
||||
}
|
||||
Self::IsNull => {
|
||||
let arrow_array = arg_col.to_arrow_array();
|
||||
let ret = arrow::compute::is_null(&arrow_array)
|
||||
.context(ArrowSnafu { context: "is_null" })?;
|
||||
let ret = BooleanVector::from(ret);
|
||||
Ok(Arc::new(ret))
|
||||
}
|
||||
Self::IsTrue | Self::IsFalse => {
|
||||
let arrow_array = arg_col.to_arrow_array();
|
||||
let bool_array = arrow_array
|
||||
.as_any()
|
||||
.downcast_ref::<BooleanArray>()
|
||||
.context({
|
||||
TypeMismatchSnafu {
|
||||
expected: ConcreteDataType::boolean_datatype(),
|
||||
actual: arg_col.data_type(),
|
||||
}
|
||||
})?;
|
||||
|
||||
if matches!(self, Self::IsTrue) {
|
||||
Ok(Arc::new(BooleanVector::from(bool_array.clone())))
|
||||
} else {
|
||||
let ret =
|
||||
arrow::compute::not(bool_array).context(ArrowSnafu { context: "not" })?;
|
||||
Ok(Arc::new(BooleanVector::from(ret)))
|
||||
}
|
||||
}
|
||||
Self::StepTimestamp => {
|
||||
let datetime_array = get_datetime_array(&arg_col)?;
|
||||
let date_array_ref = datetime_array
|
||||
.as_any()
|
||||
.downcast_ref::<arrow::array::Date64Array>()
|
||||
.context({
|
||||
TypeMismatchSnafu {
|
||||
expected: ConcreteDataType::boolean_datatype(),
|
||||
actual: ConcreteDataType::from_arrow_type(datetime_array.data_type()),
|
||||
}
|
||||
})?;
|
||||
|
||||
let ret = arrow::compute::unary(date_array_ref, |arr| arr + 1);
|
||||
let ret = DateTimeVector::from(ret);
|
||||
Ok(Arc::new(ret))
|
||||
}
|
||||
Self::Cast(to) => {
|
||||
let arrow_array = arg_col.to_arrow_array();
|
||||
let ret = arrow::compute::cast(&arrow_array, &to.as_arrow_type())
|
||||
.context(ArrowSnafu { context: "cast" })?;
|
||||
let vector = Helper::try_into_vector(ret).context(DataTypeSnafu {
|
||||
msg: "Fail to convert to Vector",
|
||||
})?;
|
||||
Ok(vector)
|
||||
}
|
||||
Self::TumbleWindowFloor {
|
||||
window_size,
|
||||
start_time,
|
||||
} => {
|
||||
let datetime_array = get_datetime_array(&arg_col)?;
|
||||
let date_array_ref = datetime_array
|
||||
.as_any()
|
||||
.downcast_ref::<arrow::array::Date64Array>()
|
||||
.context({
|
||||
TypeMismatchSnafu {
|
||||
expected: ConcreteDataType::boolean_datatype(),
|
||||
actual: ConcreteDataType::from_arrow_type(datetime_array.data_type()),
|
||||
}
|
||||
})?;
|
||||
|
||||
let start_time = start_time.map(|t| t.val());
|
||||
let window_size = (window_size.to_nanosecond() / 1_000_000) as repr::Duration; // nanosecond to millisecond
|
||||
|
||||
let ret = arrow::compute::unary(date_array_ref, |ts| {
|
||||
get_window_start(ts, window_size, start_time)
|
||||
});
|
||||
|
||||
let ret = TimestampMillisecondVector::from(ret);
|
||||
Ok(Arc::new(ret))
|
||||
}
|
||||
Self::TumbleWindowCeiling {
|
||||
window_size,
|
||||
start_time,
|
||||
} => {
|
||||
let datetime_array = get_datetime_array(&arg_col)?;
|
||||
let date_array_ref = datetime_array
|
||||
.as_any()
|
||||
.downcast_ref::<arrow::array::Date64Array>()
|
||||
.context({
|
||||
TypeMismatchSnafu {
|
||||
expected: ConcreteDataType::boolean_datatype(),
|
||||
actual: ConcreteDataType::from_arrow_type(datetime_array.data_type()),
|
||||
}
|
||||
})?;
|
||||
|
||||
let start_time = start_time.map(|t| t.val());
|
||||
let window_size = (window_size.to_nanosecond() / 1_000_000) as repr::Duration; // nanosecond to millisecond
|
||||
|
||||
let ret = arrow::compute::unary(date_array_ref, |ts| {
|
||||
get_window_start(ts, window_size, start_time) + window_size
|
||||
});
|
||||
|
||||
let ret = TimestampMillisecondVector::from(ret);
|
||||
Ok(Arc::new(ret))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate the function with given values and expression
|
||||
///
|
||||
/// # Arguments
|
||||
@@ -314,6 +440,23 @@ impl UnaryFunc {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_datetime_array(vector: &VectorRef) -> Result<arrow::array::ArrayRef, EvalError> {
|
||||
let arrow_array = vector.to_arrow_array();
|
||||
let datetime_array =
|
||||
if *arrow_array.data_type() == ConcreteDataType::datetime_datatype().as_arrow_type() {
|
||||
arrow_array
|
||||
} else {
|
||||
arrow::compute::cast(
|
||||
&arrow_array,
|
||||
&ConcreteDataType::datetime_datatype().as_arrow_type(),
|
||||
)
|
||||
.context(ArrowSnafu {
|
||||
context: "Trying to cast to datetime in StepTimestamp",
|
||||
})?
|
||||
};
|
||||
Ok(datetime_array)
|
||||
}
|
||||
|
||||
fn get_window_start(
|
||||
ts: repr::Timestamp,
|
||||
window_size: repr::Duration,
|
||||
@@ -692,6 +835,98 @@ impl BinaryFunc {
|
||||
Ok((spec_fn, signature))
|
||||
}
|
||||
|
||||
pub fn eval_batch(
|
||||
&self,
|
||||
batch: &Batch,
|
||||
expr1: &ScalarExpr,
|
||||
expr2: &ScalarExpr,
|
||||
) -> Result<VectorRef, EvalError> {
|
||||
let left = expr1.eval_batch(batch)?;
|
||||
let left = left.to_arrow_array();
|
||||
let right = expr2.eval_batch(batch)?;
|
||||
let right = right.to_arrow_array();
|
||||
|
||||
let arrow_array: ArrayRef = match self {
|
||||
Self::Eq => Arc::new(
|
||||
arrow::compute::kernels::cmp::eq(&left, &right)
|
||||
.context(ArrowSnafu { context: "eq" })?,
|
||||
),
|
||||
Self::NotEq => Arc::new(
|
||||
arrow::compute::kernels::cmp::neq(&left, &right)
|
||||
.context(ArrowSnafu { context: "neq" })?,
|
||||
),
|
||||
Self::Lt => Arc::new(
|
||||
arrow::compute::kernels::cmp::lt(&left, &right)
|
||||
.context(ArrowSnafu { context: "lt" })?,
|
||||
),
|
||||
Self::Lte => Arc::new(
|
||||
arrow::compute::kernels::cmp::lt_eq(&left, &right)
|
||||
.context(ArrowSnafu { context: "lte" })?,
|
||||
),
|
||||
Self::Gt => Arc::new(
|
||||
arrow::compute::kernels::cmp::gt(&left, &right)
|
||||
.context(ArrowSnafu { context: "gt" })?,
|
||||
),
|
||||
Self::Gte => Arc::new(
|
||||
arrow::compute::kernels::cmp::gt_eq(&left, &right)
|
||||
.context(ArrowSnafu { context: "gte" })?,
|
||||
),
|
||||
|
||||
Self::AddInt16
|
||||
| Self::AddInt32
|
||||
| Self::AddInt64
|
||||
| Self::AddUInt16
|
||||
| Self::AddUInt32
|
||||
| Self::AddUInt64
|
||||
| Self::AddFloat32
|
||||
| Self::AddFloat64 => arrow::compute::kernels::numeric::add(&left, &right)
|
||||
.context(ArrowSnafu { context: "add" })?,
|
||||
|
||||
Self::SubInt16
|
||||
| Self::SubInt32
|
||||
| Self::SubInt64
|
||||
| Self::SubUInt16
|
||||
| Self::SubUInt32
|
||||
| Self::SubUInt64
|
||||
| Self::SubFloat32
|
||||
| Self::SubFloat64 => arrow::compute::kernels::numeric::sub(&left, &right)
|
||||
.context(ArrowSnafu { context: "sub" })?,
|
||||
|
||||
Self::MulInt16
|
||||
| Self::MulInt32
|
||||
| Self::MulInt64
|
||||
| Self::MulUInt16
|
||||
| Self::MulUInt32
|
||||
| Self::MulUInt64
|
||||
| Self::MulFloat32
|
||||
| Self::MulFloat64 => arrow::compute::kernels::numeric::mul(&left, &right)
|
||||
.context(ArrowSnafu { context: "mul" })?,
|
||||
|
||||
Self::DivInt16
|
||||
| Self::DivInt32
|
||||
| Self::DivInt64
|
||||
| Self::DivUInt16
|
||||
| Self::DivUInt32
|
||||
| Self::DivUInt64
|
||||
| Self::DivFloat32
|
||||
| Self::DivFloat64 => arrow::compute::kernels::numeric::mul(&left, &right)
|
||||
.context(ArrowSnafu { context: "div" })?,
|
||||
|
||||
Self::ModInt16
|
||||
| Self::ModInt32
|
||||
| Self::ModInt64
|
||||
| Self::ModUInt16
|
||||
| Self::ModUInt32
|
||||
| Self::ModUInt64 => arrow::compute::kernels::numeric::rem(&left, &right)
|
||||
.context(ArrowSnafu { context: "rem" })?,
|
||||
};
|
||||
|
||||
let vector = Helper::try_into_vector(arrow_array).context(DataTypeSnafu {
|
||||
msg: "Fail to convert to Vector",
|
||||
})?;
|
||||
Ok(vector)
|
||||
}
|
||||
|
||||
/// Evaluate the function with given values and expression
|
||||
///
|
||||
/// # Arguments
|
||||
@@ -824,6 +1059,51 @@ impl VariadicFunc {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn eval_batch(&self, batch: &Batch, exprs: &[ScalarExpr]) -> Result<VectorRef, EvalError> {
|
||||
ensure!(
|
||||
!exprs.is_empty(),
|
||||
InvalidArgumentSnafu {
|
||||
reason: format!("Variadic function {:?} requires at least 1 arguments", self)
|
||||
}
|
||||
);
|
||||
let args = exprs
|
||||
.iter()
|
||||
.map(|expr| expr.eval_batch(batch).map(|v| v.to_arrow_array()))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let mut iter = args.into_iter();
|
||||
|
||||
let first = iter.next().unwrap();
|
||||
let mut left = first
|
||||
.as_any()
|
||||
.downcast_ref::<BooleanArray>()
|
||||
.context({
|
||||
TypeMismatchSnafu {
|
||||
expected: ConcreteDataType::boolean_datatype(),
|
||||
actual: ConcreteDataType::from_arrow_type(first.data_type()),
|
||||
}
|
||||
})?
|
||||
.clone();
|
||||
|
||||
for right in iter {
|
||||
let right = right.as_any().downcast_ref::<BooleanArray>().context({
|
||||
TypeMismatchSnafu {
|
||||
expected: ConcreteDataType::boolean_datatype(),
|
||||
actual: ConcreteDataType::from_arrow_type(right.data_type()),
|
||||
}
|
||||
})?;
|
||||
left = match self {
|
||||
Self::And => {
|
||||
arrow::compute::and(&left, right).context(ArrowSnafu { context: "and" })?
|
||||
}
|
||||
Self::Or => {
|
||||
arrow::compute::or(&left, right).context(ArrowSnafu { context: "or" })?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Arc::new(BooleanVector::from(left)))
|
||||
}
|
||||
|
||||
/// Evaluate the function with given values and expressions
|
||||
pub fn eval(&self, values: &[Value], exprs: &[ScalarExpr]) -> Result<Value, EvalError> {
|
||||
match self {
|
||||
|
||||
@@ -14,17 +14,15 @@
|
||||
|
||||
//! define MapFilterProject which is a compound operator that can be applied row-by-row.
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet, VecDeque};
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
use common_telemetry::debug;
|
||||
use datatypes::value::Value;
|
||||
use itertools::Itertools;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::error::{Error, InvalidQuerySnafu};
|
||||
use crate::expr::error::{EvalError, InternalSnafu};
|
||||
use crate::expr::{Id, InvalidArgumentSnafu, LocalId, ScalarExpr};
|
||||
use crate::expr::{InvalidArgumentSnafu, ScalarExpr};
|
||||
use crate::repr::{self, value_to_internal_ts, Diff, Row};
|
||||
|
||||
/// A compound operator that can be applied row-by-row.
|
||||
@@ -738,7 +736,6 @@ impl MfpPlan {
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use itertools::Itertools;
|
||||
|
||||
use super::*;
|
||||
use crate::expr::{BinaryFunc, UnaryFunc, UnmaterializableFunc};
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
//! Describes an aggregation function and it's input expression.
|
||||
|
||||
pub(crate) use func::AggregateFunc;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::ScalarExpr;
|
||||
|
||||
|
||||
@@ -24,11 +24,9 @@ use std::any::type_name;
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_decimal::Decimal128;
|
||||
use common_time::{Date, DateTime};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::value::{OrderedF32, OrderedF64, OrderedFloat, Value};
|
||||
use enum_dispatch::enum_dispatch;
|
||||
use hydroflow::futures::stream::Concat;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ensure;
|
||||
|
||||
@@ -761,7 +759,10 @@ fn ty_eq_without_precision(left: ConcreteDataType, right: ConcreteDataType) -> b
|
||||
#[allow(clippy::too_many_lines)]
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use common_time::DateTime;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_accum() {
|
||||
let testcases = vec![
|
||||
|
||||
@@ -16,16 +16,15 @@ use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::{OrderedF32, OrderedF64, Value};
|
||||
use datatypes::value::Value;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::smallvec;
|
||||
use snafu::{IntoError, OptionExt, ResultExt};
|
||||
use snafu::{IntoError, OptionExt};
|
||||
use strum::{EnumIter, IntoEnumIterator};
|
||||
|
||||
use crate::error::{DatafusionSnafu, Error, InvalidQuerySnafu};
|
||||
use crate::expr::error::{EvalError, TryFromValueSnafu, TypeMismatchSnafu};
|
||||
use crate::expr::error::EvalError;
|
||||
use crate::expr::relation::accum::{Accum, Accumulator};
|
||||
use crate::expr::signature::{GenericFn, Signature};
|
||||
use crate::repr::Diff;
|
||||
|
||||
@@ -15,34 +15,22 @@
|
||||
//! Scalar expressions.
|
||||
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use bytes::BytesMut;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::DfRecordBatch;
|
||||
use common_telemetry::debug;
|
||||
use datafusion_physical_expr::PhysicalExpr;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::{arrow_array, value};
|
||||
use prost::Message;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use substrait::error::{DecodeRelSnafu, EncodeRelSnafu};
|
||||
use substrait::substrait_proto_df::proto::expression::{RexType, ScalarFunction};
|
||||
use substrait::substrait_proto_df::proto::Expression;
|
||||
use datatypes::vectors::{BooleanVector, Helper, NullVector, Vector, VectorRef};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
DatafusionSnafu, Error, InvalidQuerySnafu, UnexpectedSnafu, UnsupportedTemporalFilterSnafu,
|
||||
};
|
||||
use crate::expr::error::{
|
||||
ArrowSnafu, DatafusionSnafu as EvalDatafusionSnafu, EvalError, ExternalSnafu,
|
||||
InvalidArgumentSnafu, OptimizeSnafu,
|
||||
DataTypeSnafu, EvalError, InternalSnafu, InvalidArgumentSnafu, OptimizeSnafu, TypeMismatchSnafu,
|
||||
};
|
||||
use crate::expr::func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
|
||||
use crate::repr::{ColumnType, RelationDesc, RelationType};
|
||||
use crate::transform::{from_scalar_fn_to_df_fn_impl, FunctionExtensions};
|
||||
use crate::expr::{Batch, DfScalarFunction};
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
/// A scalar expression with a known type.
|
||||
#[derive(Ord, PartialOrd, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub struct TypedExpr {
|
||||
@@ -174,163 +162,6 @@ pub enum ScalarExpr {
|
||||
},
|
||||
}
|
||||
|
||||
/// A way to represent a scalar function that is implemented in Datafusion
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DfScalarFunction {
|
||||
raw_fn: RawDfScalarFn,
|
||||
// TODO(discord9): directly from datafusion expr
|
||||
fn_impl: Arc<dyn PhysicalExpr>,
|
||||
df_schema: Arc<datafusion_common::DFSchema>,
|
||||
}
|
||||
|
||||
impl DfScalarFunction {
|
||||
pub fn new(raw_fn: RawDfScalarFn, fn_impl: Arc<dyn PhysicalExpr>) -> Result<Self, Error> {
|
||||
Ok(Self {
|
||||
df_schema: Arc::new(raw_fn.input_schema.to_df_schema()?),
|
||||
raw_fn,
|
||||
fn_impl,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn try_from_raw_fn(raw_fn: RawDfScalarFn) -> Result<Self, Error> {
|
||||
Ok(Self {
|
||||
fn_impl: raw_fn.get_fn_impl().await?,
|
||||
df_schema: Arc::new(raw_fn.input_schema.to_df_schema()?),
|
||||
raw_fn,
|
||||
})
|
||||
}
|
||||
|
||||
/// eval a list of expressions using input values
|
||||
fn eval_args(values: &[Value], exprs: &[ScalarExpr]) -> Result<Vec<Value>, EvalError> {
|
||||
exprs
|
||||
.iter()
|
||||
.map(|expr| expr.eval(values))
|
||||
.collect::<Result<_, _>>()
|
||||
}
|
||||
|
||||
// TODO(discord9): add RecordBatch support
|
||||
pub fn eval(&self, values: &[Value], exprs: &[ScalarExpr]) -> Result<Value, EvalError> {
|
||||
// first eval exprs to construct values to feed to datafusion
|
||||
let values: Vec<_> = Self::eval_args(values, exprs)?;
|
||||
if values.is_empty() {
|
||||
return InvalidArgumentSnafu {
|
||||
reason: "values is empty".to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
// TODO(discord9): make cols all array length of one
|
||||
let mut cols = vec![];
|
||||
for (idx, typ) in self
|
||||
.raw_fn
|
||||
.input_schema
|
||||
.typ()
|
||||
.column_types
|
||||
.iter()
|
||||
.enumerate()
|
||||
{
|
||||
let typ = typ.scalar_type();
|
||||
let mut array = typ.create_mutable_vector(1);
|
||||
array.push_value_ref(values[idx].as_value_ref());
|
||||
cols.push(array.to_vector().to_arrow_array());
|
||||
}
|
||||
let schema = self.df_schema.inner().clone();
|
||||
let rb = DfRecordBatch::try_new(schema, cols).map_err(|err| {
|
||||
ArrowSnafu {
|
||||
raw: err,
|
||||
context:
|
||||
"Failed to create RecordBatch from values when eval datafusion scalar function",
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let res = self.fn_impl.evaluate(&rb).map_err(|err| {
|
||||
EvalDatafusionSnafu {
|
||||
raw: err,
|
||||
context: "Failed to evaluate datafusion scalar function",
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
let res = common_query::columnar_value::ColumnarValue::try_from(&res)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let res_vec = res
|
||||
.try_into_vector(1)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let res_val = res_vec
|
||||
.try_get(0)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
Ok(res_val)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct RawDfScalarFn {
|
||||
/// The raw bytes encoded datafusion scalar function
|
||||
pub(crate) f: bytes::BytesMut,
|
||||
/// The input schema of the function
|
||||
pub(crate) input_schema: RelationDesc,
|
||||
/// Extension contains mapping from function reference to function name
|
||||
pub(crate) extensions: FunctionExtensions,
|
||||
}
|
||||
|
||||
impl RawDfScalarFn {
|
||||
pub fn from_proto(
|
||||
f: &substrait::substrait_proto_df::proto::expression::ScalarFunction,
|
||||
input_schema: RelationDesc,
|
||||
extensions: FunctionExtensions,
|
||||
) -> Result<Self, Error> {
|
||||
let mut buf = BytesMut::new();
|
||||
f.encode(&mut buf)
|
||||
.context(EncodeRelSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(crate::error::ExternalSnafu)?;
|
||||
Ok(Self {
|
||||
f: buf,
|
||||
input_schema,
|
||||
extensions,
|
||||
})
|
||||
}
|
||||
async fn get_fn_impl(&self) -> Result<Arc<dyn PhysicalExpr>, Error> {
|
||||
let f = ScalarFunction::decode(&mut self.f.as_ref())
|
||||
.context(DecodeRelSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(crate::error::ExternalSnafu)?;
|
||||
debug!("Decoded scalar function: {:?}", f);
|
||||
|
||||
let input_schema = &self.input_schema;
|
||||
let extensions = &self.extensions;
|
||||
|
||||
from_scalar_fn_to_df_fn_impl(&f, input_schema, extensions).await
|
||||
}
|
||||
}
|
||||
|
||||
impl std::cmp::PartialEq for DfScalarFunction {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.raw_fn.eq(&other.raw_fn)
|
||||
}
|
||||
}
|
||||
|
||||
// can't derive Eq because of Arc<dyn PhysicalExpr> not eq, so implement it manually
|
||||
impl std::cmp::Eq for DfScalarFunction {}
|
||||
|
||||
impl std::cmp::PartialOrd for DfScalarFunction {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
impl std::cmp::Ord for DfScalarFunction {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.raw_fn.cmp(&other.raw_fn)
|
||||
}
|
||||
}
|
||||
impl std::hash::Hash for DfScalarFunction {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.raw_fn.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarExpr {
|
||||
pub fn with_type(self, typ: ColumnType) -> TypedExpr {
|
||||
TypedExpr::new(self, typ)
|
||||
@@ -428,6 +259,177 @@ impl ScalarExpr {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn eval_batch(&self, batch: &Batch) -> Result<VectorRef, EvalError> {
|
||||
match self {
|
||||
ScalarExpr::Column(i) => Ok(batch.batch()[*i].clone()),
|
||||
ScalarExpr::Literal(val, dt) => Ok(Helper::try_from_scalar_value(
|
||||
val.try_to_scalar_value(dt).context(DataTypeSnafu {
|
||||
msg: "Failed to convert literal to scalar value",
|
||||
})?,
|
||||
batch.row_count(),
|
||||
)
|
||||
.context(DataTypeSnafu {
|
||||
msg: "Failed to convert scalar value to vector ref when parsing literal",
|
||||
})?),
|
||||
ScalarExpr::CallUnmaterializable(_) => OptimizeSnafu {
|
||||
reason: "Can't eval unmaterializable function",
|
||||
}
|
||||
.fail()?,
|
||||
ScalarExpr::CallUnary { func, expr } => func.eval_batch(batch, expr),
|
||||
ScalarExpr::CallBinary { func, expr1, expr2 } => func.eval_batch(batch, expr1, expr2),
|
||||
ScalarExpr::CallVariadic { func, exprs } => func.eval_batch(batch, exprs),
|
||||
ScalarExpr::CallDf {
|
||||
df_scalar_fn,
|
||||
exprs,
|
||||
} => df_scalar_fn.eval_batch(batch, exprs),
|
||||
ScalarExpr::If { cond, then, els } => Self::eval_if_then(batch, cond, then, els),
|
||||
}
|
||||
}
|
||||
|
||||
fn eval_if_then(
|
||||
batch: &Batch,
|
||||
cond: &ScalarExpr,
|
||||
then: &ScalarExpr,
|
||||
els: &ScalarExpr,
|
||||
) -> Result<VectorRef, EvalError> {
|
||||
let conds = cond.eval_batch(batch)?;
|
||||
let bool_conds = conds
|
||||
.as_any()
|
||||
.downcast_ref::<BooleanVector>()
|
||||
.context({
|
||||
TypeMismatchSnafu {
|
||||
expected: ConcreteDataType::boolean_datatype(),
|
||||
actual: conds.data_type(),
|
||||
}
|
||||
})?
|
||||
.as_boolean_array();
|
||||
|
||||
let mut then_input_batch = None;
|
||||
let mut else_input_batch = None;
|
||||
let mut null_input_batch = None;
|
||||
|
||||
// instructions for how to reassembly result vector,
|
||||
// iterate over (type of vec, offset, length) and append to resulting vec
|
||||
let mut assembly_idx = vec![];
|
||||
|
||||
// append batch, returning appended batch's slice in (offset, length)
|
||||
fn append_batch(
|
||||
batch: &mut Option<Batch>,
|
||||
to_be_append: Batch,
|
||||
) -> Result<(usize, usize), EvalError> {
|
||||
let len = to_be_append.row_count();
|
||||
if let Some(batch) = batch {
|
||||
let offset = batch.row_count();
|
||||
batch.append_batch(to_be_append)?;
|
||||
Ok((offset, len))
|
||||
} else {
|
||||
*batch = Some(to_be_append);
|
||||
Ok((0, len))
|
||||
}
|
||||
}
|
||||
|
||||
let mut prev_cond: Option<Option<bool>> = None;
|
||||
let mut prev_start_idx: Option<usize> = None;
|
||||
// first put different conds' vector into different batches
|
||||
for (idx, cond) in bool_conds.iter().enumerate() {
|
||||
// if belong to same slice and not last one continue
|
||||
if prev_cond == Some(cond) {
|
||||
continue;
|
||||
} else if let Some(prev_cond_idx) = prev_start_idx {
|
||||
let prev_cond = prev_cond.unwrap();
|
||||
|
||||
// put a slice to corresponding batch
|
||||
let slice_offset = prev_cond_idx;
|
||||
let slice_length = idx - prev_cond_idx;
|
||||
let to_be_append = batch.slice(slice_offset, slice_length);
|
||||
|
||||
let to_put_back = match prev_cond {
|
||||
Some(true) => (
|
||||
Some(true),
|
||||
append_batch(&mut then_input_batch, to_be_append)?,
|
||||
),
|
||||
Some(false) => (
|
||||
Some(false),
|
||||
append_batch(&mut else_input_batch, to_be_append)?,
|
||||
),
|
||||
None => (None, append_batch(&mut null_input_batch, to_be_append)?),
|
||||
};
|
||||
assembly_idx.push(to_put_back);
|
||||
}
|
||||
prev_cond = Some(cond);
|
||||
prev_start_idx = Some(idx);
|
||||
}
|
||||
|
||||
// deal with empty and last slice case
|
||||
if let Some(slice_offset) = prev_start_idx {
|
||||
let prev_cond = prev_cond.unwrap();
|
||||
let slice_length = bool_conds.len() - slice_offset;
|
||||
let to_be_append = batch.slice(slice_offset, slice_length);
|
||||
let to_put_back = match prev_cond {
|
||||
Some(true) => (
|
||||
Some(true),
|
||||
append_batch(&mut then_input_batch, to_be_append)?,
|
||||
),
|
||||
Some(false) => (
|
||||
Some(false),
|
||||
append_batch(&mut else_input_batch, to_be_append)?,
|
||||
),
|
||||
None => (None, append_batch(&mut null_input_batch, to_be_append)?),
|
||||
};
|
||||
assembly_idx.push(to_put_back);
|
||||
}
|
||||
|
||||
let then_output_vec = then_input_batch
|
||||
.map(|batch| then.eval_batch(&batch))
|
||||
.transpose()?;
|
||||
let else_output_vec = else_input_batch
|
||||
.map(|batch| els.eval_batch(&batch))
|
||||
.transpose()?;
|
||||
let null_output_vec = null_input_batch
|
||||
.map(|null| NullVector::new(null.row_count()).slice(0, null.row_count()));
|
||||
|
||||
let dt = then_output_vec
|
||||
.as_ref()
|
||||
.map(|v| v.data_type())
|
||||
.or(else_output_vec.as_ref().map(|v| v.data_type()))
|
||||
.unwrap_or(ConcreteDataType::null_datatype());
|
||||
let mut builder = dt.create_mutable_vector(conds.len());
|
||||
for (cond, (offset, length)) in assembly_idx {
|
||||
let slice = match cond {
|
||||
Some(true) => then_output_vec.as_ref(),
|
||||
Some(false) => else_output_vec.as_ref(),
|
||||
None => null_output_vec.as_ref(),
|
||||
}
|
||||
.context(InternalSnafu {
|
||||
reason: "Expect corresponding output vector to exist",
|
||||
})?;
|
||||
// TODO(discord9): seems `extend_slice_of` doesn't support NullVector or ConstantVector
|
||||
// consider adding it maybe?
|
||||
if slice.data_type().is_null() {
|
||||
builder.push_nulls(length);
|
||||
} else if slice.is_const() {
|
||||
let arr = slice.slice(offset, length).to_arrow_array();
|
||||
let vector = Helper::try_into_vector(arr).context(DataTypeSnafu {
|
||||
msg: "Failed to convert arrow array to vector",
|
||||
})?;
|
||||
builder
|
||||
.extend_slice_of(vector.as_ref(), 0, vector.len())
|
||||
.context(DataTypeSnafu {
|
||||
msg: "Failed to build result vector for if-then expression",
|
||||
})?;
|
||||
} else {
|
||||
builder
|
||||
.extend_slice_of(slice.as_ref(), offset, length)
|
||||
.context(DataTypeSnafu {
|
||||
msg: "Failed to build result vector for if-then expression",
|
||||
})?;
|
||||
}
|
||||
}
|
||||
let result_vec = builder.to_vector();
|
||||
|
||||
Ok(result_vec)
|
||||
}
|
||||
|
||||
/// Eval this expression with the given values.
|
||||
pub fn eval(&self, values: &[Value]) -> Result<Value, EvalError> {
|
||||
match self {
|
||||
@@ -747,18 +749,11 @@ impl ScalarExpr {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::arrow::array::Scalar;
|
||||
use query::parser::QueryLanguageParser;
|
||||
use query::QueryEngine;
|
||||
use session::context::QueryContext;
|
||||
use substrait::extension_serializer;
|
||||
use substrait::substrait_proto_df::proto::expression::literal::LiteralType;
|
||||
use substrait::substrait_proto_df::proto::expression::Literal;
|
||||
use substrait::substrait_proto_df::proto::function_argument::ArgType;
|
||||
use substrait::substrait_proto_df::proto::r#type::Kind;
|
||||
use substrait::substrait_proto_df::proto::{r#type, FunctionArgument, Type};
|
||||
use datatypes::vectors::Int32Vector;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_extract_bound() {
|
||||
let test_list: [(ScalarExpr, Result<_, EvalError>); 5] = [
|
||||
@@ -849,37 +844,68 @@ mod test {
|
||||
assert!(matches!(res, Err(Error::InvalidQuery { .. })));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_df_scalar_function() {
|
||||
let raw_scalar_func = ScalarFunction {
|
||||
function_reference: 0,
|
||||
arguments: vec![FunctionArgument {
|
||||
arg_type: Some(ArgType::Value(Expression {
|
||||
rex_type: Some(RexType::Literal(Literal {
|
||||
nullable: false,
|
||||
type_variation_reference: 0,
|
||||
literal_type: Some(LiteralType::I64(-1)),
|
||||
})),
|
||||
})),
|
||||
}],
|
||||
output_type: None,
|
||||
..Default::default()
|
||||
};
|
||||
let input_schema = RelationDesc::try_new(
|
||||
RelationType::new(vec![ColumnType::new_nullable(
|
||||
ConcreteDataType::null_datatype(),
|
||||
)]),
|
||||
vec!["null_column".to_string()],
|
||||
)
|
||||
.unwrap();
|
||||
let extensions = FunctionExtensions::from_iter(vec![(0, "abs")]);
|
||||
let raw_fn = RawDfScalarFn::from_proto(&raw_scalar_func, input_schema, extensions).unwrap();
|
||||
let df_func = DfScalarFunction::try_from_raw_fn(raw_fn).await.unwrap();
|
||||
assert_eq!(
|
||||
df_func
|
||||
.eval(&[Value::Null], &[ScalarExpr::Column(0)])
|
||||
.unwrap(),
|
||||
Value::Int64(1)
|
||||
);
|
||||
#[test]
|
||||
fn test_eval_batch() {
|
||||
// TODO(discord9): add more tests
|
||||
{
|
||||
let expr = ScalarExpr::If {
|
||||
cond: Box::new(ScalarExpr::Column(0).call_binary(
|
||||
ScalarExpr::literal(Value::from(0), ConcreteDataType::int32_datatype()),
|
||||
BinaryFunc::Eq,
|
||||
)),
|
||||
then: Box::new(ScalarExpr::literal(
|
||||
Value::from(42),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
)),
|
||||
els: Box::new(ScalarExpr::literal(
|
||||
Value::from(37),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
)),
|
||||
};
|
||||
let raw = vec![
|
||||
None,
|
||||
Some(0),
|
||||
Some(1),
|
||||
None,
|
||||
None,
|
||||
Some(0),
|
||||
Some(0),
|
||||
Some(1),
|
||||
Some(1),
|
||||
];
|
||||
let raw_len = raw.len();
|
||||
let vectors = vec![Int32Vector::from(raw).slice(0, raw_len)];
|
||||
|
||||
let batch = Batch::new(vectors, raw_len);
|
||||
let expected = Int32Vector::from(vec![
|
||||
None,
|
||||
Some(42),
|
||||
Some(37),
|
||||
None,
|
||||
None,
|
||||
Some(42),
|
||||
Some(42),
|
||||
Some(37),
|
||||
Some(37),
|
||||
])
|
||||
.slice(0, raw_len);
|
||||
assert_eq!(expr.eval_batch(&batch).unwrap(), expected);
|
||||
|
||||
let raw = vec![Some(0)];
|
||||
let raw_len = raw.len();
|
||||
let vectors = vec![Int32Vector::from(raw).slice(0, raw_len)];
|
||||
|
||||
let batch = Batch::new(vectors, raw_len);
|
||||
let expected = Int32Vector::from(vec![Some(42)]).slice(0, raw_len);
|
||||
assert_eq!(expr.eval_batch(&batch).unwrap(), expected);
|
||||
|
||||
let raw: Vec<Option<i32>> = vec![];
|
||||
let raw_len = raw.len();
|
||||
let vectors = vec![Int32Vector::from(raw).slice(0, raw_len)];
|
||||
|
||||
let batch = Batch::new(vectors, raw_len);
|
||||
let expected = NullVector::new(raw_len).slice(0, raw_len);
|
||||
assert_eq!(expr.eval_batch(&batch).unwrap(), expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,24 +19,21 @@ use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::{HeartbeatRequest, Peer};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
|
||||
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
||||
use common_meta::heartbeat::handler::{
|
||||
HandlerGroupExecutor, HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutorRef,
|
||||
HeartbeatResponseHandlerContext, HeartbeatResponseHandlerExecutorRef,
|
||||
};
|
||||
use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage};
|
||||
use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use greptime_proto::v1::meta::NodeInfo;
|
||||
use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient, MetaClientBuilder};
|
||||
use meta_client::{MetaClientOptions, MetaClientType};
|
||||
use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient};
|
||||
use servers::addrs;
|
||||
use servers::heartbeat_options::HeartbeatOptions;
|
||||
use snafu::ResultExt;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::{Duration, Instant};
|
||||
use tokio::time::Duration;
|
||||
|
||||
use crate::error::{ExternalSnafu, MetaClientInitSnafu};
|
||||
use crate::error::ExternalSnafu;
|
||||
use crate::{Error, FlownodeOptions};
|
||||
|
||||
/// The flownode heartbeat task which sending `[HeartbeatRequest]` to Metasrv periodically in background.
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#![feature(let_chains)]
|
||||
#![feature(duration_abs_diff)]
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_imports)]
|
||||
#![warn(clippy::missing_docs_in_private_items)]
|
||||
#![warn(clippy::too_many_lines)]
|
||||
// allow unused for now because it should be use later
|
||||
|
||||
@@ -20,17 +20,11 @@ mod reduce;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use datatypes::arrow::ipc::Map;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::expr::{
|
||||
AggregateExpr, EvalError, GlobalId, Id, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr,
|
||||
TypedExpr,
|
||||
};
|
||||
use crate::expr::{GlobalId, Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr};
|
||||
use crate::plan::join::JoinPlan;
|
||||
pub(crate) use crate::plan::reduce::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan};
|
||||
use crate::repr::{ColumnType, DiffRow, RelationDesc, RelationType};
|
||||
use crate::repr::{DiffRow, RelationDesc};
|
||||
|
||||
/// A plan for a dataflow component. But with type to indicate the output type of the relation.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::ScalarExpr;
|
||||
use crate::plan::SafeMfpPlan;
|
||||
|
||||
|
||||
@@ -12,9 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::{AggregateExpr, Id, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr};
|
||||
use crate::expr::{AggregateExpr, SafeMfpPlan};
|
||||
|
||||
/// Describe how to extract key-value pair from a `Row`
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
|
||||
|
||||
@@ -17,14 +17,10 @@
|
||||
|
||||
mod relation;
|
||||
|
||||
use std::borrow::Borrow;
|
||||
use std::slice::SliceIndex;
|
||||
|
||||
use api::helper::{pb_value_to_value_ref, value_to_grpc_value};
|
||||
use api::v1::Row as ProtoRow;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::types::cast;
|
||||
use datatypes::types::cast::CastOption;
|
||||
use datatypes::value::Value;
|
||||
use itertools::Itertools;
|
||||
pub(crate) use relation::{ColumnType, Key, RelationDesc, RelationType};
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
use datafusion_common::DFSchema;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
@@ -22,7 +20,7 @@ use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{DatafusionSnafu, InternalSnafu, InvalidQuerySnafu, Result, UnexpectedSnafu};
|
||||
use crate::expr::{MapFilterProject, SafeMfpPlan, ScalarExpr};
|
||||
use crate::expr::{SafeMfpPlan, ScalarExpr};
|
||||
|
||||
/// a set of column indices that are "keys" for the collection.
|
||||
#[derive(Default, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
|
||||
@@ -20,35 +20,27 @@ use std::sync::Arc;
|
||||
use api::v1::{RowDeleteRequests, RowInsertRequests};
|
||||
use cache::{TABLE_FLOWNODE_SET_CACHE_NAME, TABLE_ROUTE_CACHE_NAME};
|
||||
use catalog::CatalogManagerRef;
|
||||
use client::client_manager::NodeClients;
|
||||
use common_base::Plugins;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_grpc::channel_manager::ChannelConfig;
|
||||
use common_meta::cache::{
|
||||
LayeredCacheRegistry, LayeredCacheRegistryRef, TableFlownodeSetCacheRef, TableRouteCacheRef,
|
||||
};
|
||||
use common_meta::ddl::{table_meta, ProcedureExecutorRef};
|
||||
use common_meta::heartbeat::handler::HandlerGroupExecutor;
|
||||
use common_meta::cache::{LayeredCacheRegistryRef, TableFlownodeSetCacheRef, TableRouteCacheRef};
|
||||
use common_meta::ddl::ProcedureExecutorRef;
|
||||
use common_meta::key::flow::FlowMetadataManagerRef;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::node_manager::{self, Flownode, NodeManagerRef};
|
||||
use common_meta::node_manager::{Flownode, NodeManagerRef};
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing::info;
|
||||
use futures::{FutureExt, StreamExt, TryStreamExt};
|
||||
use futures::{FutureExt, TryStreamExt};
|
||||
use greptime_proto::v1::flow::{flow_server, FlowRequest, FlowResponse, InsertRequests};
|
||||
use itertools::Itertools;
|
||||
use meta_client::client::MetaClient;
|
||||
use operator::delete::Deleter;
|
||||
use operator::insert::Inserter;
|
||||
use operator::statement::StatementExecutor;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use query::{QueryEngine, QueryEngineFactory};
|
||||
use serde::de::Unexpected;
|
||||
use servers::error::{AlreadyStartedSnafu, StartGrpcSnafu, TcpBindSnafu, TcpIncomingSnafu};
|
||||
use servers::heartbeat_options::HeartbeatOptions;
|
||||
use servers::server::Server;
|
||||
use session::context::{QueryContext, QueryContextBuilder, QueryContextRef};
|
||||
use session::context::{QueryContextBuilder, QueryContextRef};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use tokio::net::TcpListener;
|
||||
use tokio::sync::{broadcast, oneshot, Mutex};
|
||||
|
||||
@@ -16,37 +16,25 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::buf::IntoIter;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::info;
|
||||
use datafusion::optimizer::simplify_expressions::SimplifyExpressions;
|
||||
use datafusion::optimizer::{OptimizerContext, OptimizerRule};
|
||||
use datatypes::data_type::ConcreteDataType as CDT;
|
||||
use literal::{from_substrait_literal, from_substrait_type};
|
||||
use prost::Message;
|
||||
use query::parser::QueryLanguageParser;
|
||||
use query::plan::LogicalPlan;
|
||||
use query::query_engine::DefaultSerializer;
|
||||
use query::QueryEngine;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use session::context::QueryContext;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::ResultExt;
|
||||
/// note here we are using the `substrait_proto_df` crate from the `substrait` module and
|
||||
/// rename it to `substrait_proto`
|
||||
use substrait::{
|
||||
substrait_proto_df as substrait_proto, DFLogicalSubstraitConvertor, SubstraitPlan,
|
||||
};
|
||||
use substrait::{substrait_proto_df as substrait_proto, DFLogicalSubstraitConvertor};
|
||||
use substrait_proto::proto::extensions::simple_extension_declaration::MappingType;
|
||||
use substrait_proto::proto::extensions::SimpleExtensionDeclaration;
|
||||
|
||||
use crate::adapter::FlownodeContext;
|
||||
use crate::error::{
|
||||
DatafusionSnafu, Error, ExternalSnafu, InvalidQueryProstSnafu, NotImplementedSnafu,
|
||||
TableNotFoundSnafu, UnexpectedSnafu,
|
||||
};
|
||||
use crate::expr::GlobalId;
|
||||
use crate::error::{DatafusionSnafu, Error, ExternalSnafu, NotImplementedSnafu, UnexpectedSnafu};
|
||||
use crate::plan::TypedPlan;
|
||||
use crate::repr::RelationType;
|
||||
/// a simple macro to generate a not implemented error
|
||||
macro_rules! not_impl_err {
|
||||
($($arg:tt)*) => {
|
||||
@@ -202,7 +190,7 @@ mod test {
|
||||
|
||||
use catalog::RegisterTableRequest;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, NUMBERS_TABLE_ID};
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::DateTime;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::Schema;
|
||||
use datatypes::vectors::VectorRef;
|
||||
@@ -219,7 +207,8 @@ mod test {
|
||||
|
||||
use super::*;
|
||||
use crate::adapter::node_context::IdToNameMap;
|
||||
use crate::repr::ColumnType;
|
||||
use crate::expr::GlobalId;
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
|
||||
pub fn create_test_ctx() -> FlownodeContext {
|
||||
let mut schemas = HashMap::new();
|
||||
|
||||
@@ -12,49 +12,23 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use common_decimal::Decimal128;
|
||||
use common_time::{Date, Timestamp};
|
||||
use datatypes::arrow::compute::kernels::window;
|
||||
use datatypes::arrow::ipc::Binary;
|
||||
use datatypes::data_type::{ConcreteDataType as CDT, DataType};
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::value::Value;
|
||||
use hydroflow::futures::future::Map;
|
||||
use itertools::Itertools;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use substrait::variation_const::{
|
||||
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
|
||||
TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF,
|
||||
TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF,
|
||||
UNSIGNED_INTEGER_TYPE_VARIATION_REF,
|
||||
};
|
||||
use snafu::OptionExt;
|
||||
use substrait_proto::proto::aggregate_function::AggregationInvocation;
|
||||
use substrait_proto::proto::aggregate_rel::{Grouping, Measure};
|
||||
use substrait_proto::proto::expression::field_reference::ReferenceType::DirectReference;
|
||||
use substrait_proto::proto::expression::literal::LiteralType;
|
||||
use substrait_proto::proto::expression::reference_segment::ReferenceType::StructField;
|
||||
use substrait_proto::proto::expression::{
|
||||
IfThen, Literal, MaskExpression, RexType, ScalarFunction,
|
||||
};
|
||||
use substrait_proto::proto::extensions::simple_extension_declaration::MappingType;
|
||||
use substrait_proto::proto::extensions::SimpleExtensionDeclaration;
|
||||
use substrait_proto::proto::function_argument::ArgType;
|
||||
use substrait_proto::proto::r#type::Kind;
|
||||
use substrait_proto::proto::read_rel::ReadType;
|
||||
use substrait_proto::proto::rel::RelType;
|
||||
use substrait_proto::proto::{self, plan_rel, Expression, Plan as SubPlan, Rel};
|
||||
use substrait_proto::proto::{self};
|
||||
|
||||
use crate::error::{
|
||||
DatatypesSnafu, Error, EvalSnafu, InvalidQuerySnafu, NotImplementedSnafu, PlanSnafu,
|
||||
TableNotFoundSnafu,
|
||||
};
|
||||
use crate::error::{Error, NotImplementedSnafu, PlanSnafu};
|
||||
use crate::expr::{
|
||||
AggregateExpr, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, SafeMfpPlan, ScalarExpr,
|
||||
TypedExpr, UnaryFunc, UnmaterializableFunc, VariadicFunc,
|
||||
AggregateExpr, AggregateFunc, BinaryFunc, MapFilterProject, ScalarExpr, TypedExpr, UnaryFunc,
|
||||
};
|
||||
use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, Plan, ReducePlan, TypedPlan};
|
||||
use crate::repr::{self, ColumnType, RelationDesc, RelationType};
|
||||
use crate::repr::{ColumnType, RelationDesc, RelationType};
|
||||
use crate::transform::{substrait_proto, FlownodeContext, FunctionExtensions};
|
||||
|
||||
impl TypedExpr {
|
||||
@@ -472,13 +446,14 @@ mod test {
|
||||
use bytes::BytesMut;
|
||||
use common_time::{DateTime, Interval};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use pretty_assertions::{assert_eq, assert_ne};
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
use crate::expr::{DfScalarFunction, RawDfScalarFn};
|
||||
use crate::expr::{DfScalarFunction, GlobalId, RawDfScalarFn};
|
||||
use crate::plan::{Plan, TypedPlan};
|
||||
use crate::repr::{self, ColumnType, RelationType};
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
use crate::transform::test::{create_test_ctx, create_test_query_engine, sql_to_substrait};
|
||||
use crate::transform::CDT;
|
||||
/// TODO(discord9): add more illegal sql tests
|
||||
#[tokio::test]
|
||||
async fn test_missing_key_check() {
|
||||
|
||||
@@ -34,8 +34,7 @@ use substrait::variation_const::{
|
||||
};
|
||||
use substrait_proto::proto::expression::literal::LiteralType;
|
||||
use substrait_proto::proto::expression::Literal;
|
||||
use substrait_proto::proto::r#type::{self, parameter, Kind, Parameter};
|
||||
use substrait_proto::proto::Type;
|
||||
use substrait_proto::proto::r#type::Kind;
|
||||
|
||||
use crate::error::{Error, NotImplementedSnafu, PlanSnafu, UnexpectedSnafu};
|
||||
use crate::transform::substrait_proto;
|
||||
|
||||
@@ -22,11 +22,9 @@ use substrait_proto::proto::read_rel::ReadType;
|
||||
use substrait_proto::proto::rel::RelType;
|
||||
use substrait_proto::proto::{plan_rel, Plan as SubPlan, ProjectRel, Rel};
|
||||
|
||||
use crate::error::{
|
||||
Error, InternalSnafu, InvalidQuerySnafu, NotImplementedSnafu, PlanSnafu, UnexpectedSnafu,
|
||||
};
|
||||
use crate::error::{Error, InvalidQuerySnafu, NotImplementedSnafu, PlanSnafu, UnexpectedSnafu};
|
||||
use crate::expr::{MapFilterProject, ScalarExpr, TypedExpr, UnaryFunc};
|
||||
use crate::plan::{KeyValPlan, Plan, ReducePlan, TypedPlan};
|
||||
use crate::plan::{KeyValPlan, Plan, TypedPlan};
|
||||
use crate::repr::{self, RelationDesc, RelationType};
|
||||
use crate::transform::{substrait_proto, FlownodeContext, FunctionExtensions};
|
||||
|
||||
@@ -350,7 +348,7 @@ mod test {
|
||||
use super::*;
|
||||
use crate::expr::{GlobalId, ScalarExpr};
|
||||
use crate::plan::{Plan, TypedPlan};
|
||||
use crate::repr::{self, ColumnType, RelationType};
|
||||
use crate::repr::{ColumnType, RelationType};
|
||||
use crate::transform::test::{create_test_ctx, create_test_query_engine, sql_to_substrait};
|
||||
use crate::transform::CDT;
|
||||
|
||||
|
||||
@@ -19,14 +19,11 @@ use std::ops::Bound;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use itertools::Itertools;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::expr::error::InternalSnafu;
|
||||
use crate::expr::{EvalError, ScalarExpr};
|
||||
use crate::repr::{value_to_internal_ts, Diff, DiffRow, Duration, KeyValDiffRow, Row, Timestamp};
|
||||
use crate::repr::{value_to_internal_ts, DiffRow, Duration, KeyValDiffRow, Row, Timestamp};
|
||||
|
||||
/// A batch of updates, arranged by key
|
||||
pub type Batch = BTreeMap<Row, SmallVec<[DiffRow; 2]>>;
|
||||
@@ -585,6 +582,7 @@ mod test {
|
||||
use std::borrow::Borrow;
|
||||
|
||||
use datatypes::value::Value;
|
||||
use itertools::Itertools;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user