diff --git a/src/script/src/python.rs b/src/script/src/python.rs index 877d93257a..f7b1ed77c0 100644 --- a/src/script/src/python.rs +++ b/src/script/src/python.rs @@ -16,6 +16,7 @@ mod engine; pub mod error; +pub(crate) mod metric; pub(crate) mod utils; pub use self::engine::{PyEngine, PyScript}; diff --git a/src/script/src/python/engine.rs b/src/script/src/python/engine.rs index 73bdb7a575..1c497e9c55 100644 --- a/src/script/src/python/engine.rs +++ b/src/script/src/python/engine.rs @@ -203,19 +203,19 @@ impl CoprStream { ) -> Result { let mut schema = vec![]; for (ty, name) in copr.return_types.iter().zip(&copr.deco_args.ret_names) { - let ty = ty.clone().ok_or( + let ty = ty.clone().ok_or_else(|| { PyRuntimeSnafu { msg: "return type not annotated, can't generate schema", } - .build(), - )?; + .build() + })?; let is_nullable = ty.is_nullable; - let ty = ty.datatype.ok_or( + let ty = ty.datatype.ok_or_else(|| { PyRuntimeSnafu { msg: "return type not annotated, can't generate schema", } - .build(), - )?; + .build() + })?; let col_schema = ColumnSchema::new(name, ty, is_nullable); schema.push(col_schema); } diff --git a/src/script/src/python/ffi_types/copr.rs b/src/script/src/python/ffi_types/copr.rs index c62108175c..ce9fc85ad9 100644 --- a/src/script/src/python/ffi_types/copr.rs +++ b/src/script/src/python/ffi_types/copr.rs @@ -20,7 +20,6 @@ use std::result::Result as StdResult; use std::sync::{Arc, Weak}; use common_recordbatch::{RecordBatch, RecordBatches}; -use datatypes::arrow::array::Array; use datatypes::arrow::compute; use datatypes::data_type::{ConcreteDataType, DataType}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; @@ -243,15 +242,14 @@ pub(crate) fn check_args_anno_real_type( ); for (idx, arg) in args.iter().enumerate() { let anno_ty = copr.arg_types[idx].clone(); - let real_ty = arg.to_arrow_array().data_type().clone(); - let real_ty = ConcreteDataType::from_arrow_type(&real_ty); + let real_ty = arg.data_type(); let arg_name = arg_names[idx].clone(); - let col_idx = rb.schema.column_index_by_name(&arg_name).ok_or( + let col_idx = rb.schema.column_index_by_name(&arg_name).ok_or_else(|| { OtherSnafu { reason: format!("Can't find column by name {arg_name}"), } - .build(), - )?; + .build() + })?; let is_nullable: bool = rb.schema.column_schemas()[col_idx].is_nullable(); ensure!( anno_ty diff --git a/src/script/src/python/ffi_types/vector.rs b/src/script/src/python/ffi_types/vector.rs index fa1e0e4a4c..f3e0f8aefc 100644 --- a/src/script/src/python/ffi_types/vector.rs +++ b/src/script/src/python/ffi_types/vector.rs @@ -25,7 +25,7 @@ use datatypes::arrow::compute::kernels::{arithmetic, comparison}; use datatypes::arrow::datatypes::DataType as ArrowDataType; use datatypes::arrow::error::Result as ArrowResult; use datatypes::data_type::DataType; -use datatypes::prelude::Value; +use datatypes::prelude::{ConcreteDataType, Value}; use datatypes::value::{self, OrderedFloat}; use datatypes::vectors::{Helper, NullVector, VectorRef}; #[cfg(feature = "pyo3_backend")] @@ -136,6 +136,16 @@ impl AsRef for PyVector { } impl PyVector { + #[inline] + pub(crate) fn data_type(&self) -> ConcreteDataType { + self.vector.data_type() + } + + #[inline] + pub(crate) fn arrow_data_type(&self) -> ArrowDataType { + self.vector.data_type().as_arrow_type() + } + pub(crate) fn vector_and(left: &Self, right: &Self) -> Result { let left = left.to_arrow_array(); let right = right.to_arrow_array(); diff --git a/src/script/src/python/metric.rs b/src/script/src/python/metric.rs new file mode 100644 index 0000000000..16b6de288f --- /dev/null +++ b/src/script/src/python/metric.rs @@ -0,0 +1,24 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Script engine metrics +pub static METRIC_RSPY_INIT_ELAPSED: &str = "script.rspy.init_elapsed"; +pub static METRIC_RSPY_EXEC_ELAPSED: &str = "script.rspy.exec_elapsed"; +pub static METRIC_RSPY_EXEC_TOTAL_ELAPSED: &str = "script.rspy.exec_total_elapsed"; +#[cfg(feature = "pyo3_backend")] +pub static METRIC_PYO3_EXEC_ELAPSED: &str = "script.pyo3.exec_elapsed"; +#[cfg(feature = "pyo3_backend")] +pub static METRIC_PYO3_INIT_ELAPSED: &str = "script.pyo3.init_elapsed"; +#[cfg(feature = "pyo3_backend")] +pub static METRIC_PYO3_EXEC_TOTAL_ELAPSED: &str = "script.pyo3.exec_total_elapsed"; diff --git a/src/script/src/python/pyo3/builtins.rs b/src/script/src/python/pyo3/builtins.rs index 5d1e22a2f0..0c5225bd5b 100644 --- a/src/script/src/python/pyo3/builtins.rs +++ b/src/script/src/python/pyo3/builtins.rs @@ -131,7 +131,7 @@ fn dataframe(py: Python) -> PyResult { let globals = get_globals(py)?; let df = globals .get_item("__dataframe__") - .ok_or(PyKeyError::new_err("No __dataframe__ variable is found"))? + .ok_or_else(|| PyKeyError::new_err("No __dataframe__ variable is found"))? .extract::()?; Ok(df) } @@ -142,7 +142,7 @@ pub(crate) fn query_engine(py: Python) -> PyResult { let globals = get_globals(py)?; let query = globals .get_item("__query__") - .ok_or(PyKeyError::new_err("No __query__ variable is found"))? + .ok_or_else(|| PyKeyError::new_err("No __query__ variable is found"))? .extract::()?; Ok(query) } @@ -310,7 +310,7 @@ macro_rules! bind_aggr_expr { Arc::new(expressions::Column::new(stringify!($EXPR), $idx)) as _, )* stringify!($AGGR_FUNC), - $ARG_TY.to_arrow_array().data_type().to_owned()), + $ARG_TY.arrow_data_type().to_owned()), &[$($ARG.to_arrow_array()),*] ) } @@ -326,7 +326,7 @@ fn approx_distinct(py: Python<'_>, v0: &PyVector) -> PyResult { expressions::ApproxDistinct::new( Arc::new(expressions::Column::new("expr0", 0)) as _, "ApproxDistinct", - v0.to_arrow_array().data_type().to_owned(), + v0.arrow_data_type().to_owned(), ), &[v0.to_arrow_array()], ); @@ -349,7 +349,7 @@ fn approx_percentile_cont(py: Python<'_>, values: &PyVector, percent: f64) -> Py Arc::new(percent) as _, ], "ApproxPercentileCont", - (values.to_arrow_array().data_type()).to_owned(), + values.arrow_data_type().to_owned(), ) .map_err(|e| PyValueError::new_err(format!("{e:?}")))?, &[values.to_arrow_array()], diff --git a/src/script/src/python/pyo3/copr_impl.rs b/src/script/src/python/pyo3/copr_impl.rs index 25c8aa4f4c..87a2b6a0bc 100644 --- a/src/script/src/python/pyo3/copr_impl.rs +++ b/src/script/src/python/pyo3/copr_impl.rs @@ -15,6 +15,7 @@ use std::collections::HashMap; use common_recordbatch::RecordBatch; +use common_telemetry::timer; use datatypes::vectors::{Helper, VectorRef}; use pyo3::exceptions::{PyRuntimeError, PyValueError}; use pyo3::types::{PyDict, PyList, PyModule, PyTuple}; @@ -24,6 +25,7 @@ use snafu::{ensure, Backtrace, GenerateImplicitData, ResultExt}; use crate::python::error::{self, NewRecordBatchSnafu, OtherSnafu, Result}; use crate::python::ffi_types::copr::PyQueryEngine; use crate::python::ffi_types::{check_args_anno_real_type, select_from_rb, Coprocessor, PyVector}; +use crate::python::metric; use crate::python::pyo3::dataframe_impl::PyDataFrame; use crate::python::pyo3::utils::{init_cpython_interpreter, pyo3_obj_try_to_typed_val}; @@ -62,6 +64,7 @@ pub(crate) fn pyo3_exec_parsed( rb: &Option, params: &HashMap, ) -> Result { + let _t = timer!(metric::METRIC_PYO3_EXEC_TOTAL_ELAPSED); // i.e params or use `vector(..)` to construct a PyVector let arg_names = &copr.deco_args.arg_names.clone().unwrap_or(vec![]); let args: Vec = if let Some(rb) = rb { @@ -74,6 +77,8 @@ pub(crate) fn pyo3_exec_parsed( // Just in case cpython is not inited init_cpython_interpreter().unwrap(); Python::with_gil(|py| -> Result<_> { + let _t = timer!(metric::METRIC_PYO3_EXEC_ELAPSED); + let mut cols = (|| -> PyResult<_> { let dummy_decorator = " # Postponed evaluation of annotations(PEP 563) so annotation can be set freely @@ -135,7 +140,7 @@ coprocessor = copr // could generate a call in python code and use Python::run to run it, just like in RustPython // Expect either: a PyVector Or a List/Tuple of PyVector py.run(&script, Some(globals), Some(locals))?; - let result = locals.get_item("_return_from_coprocessor").ok_or(PyValueError::new_err("Can't find return value of coprocessor function"))?; + let result = locals.get_item("_return_from_coprocessor").ok_or_else(|| PyValueError::new_err("Can't find return value of coprocessor function"))?; let col_len = rb.as_ref().map(|rb| rb.num_rows()).unwrap_or(1); py_any_to_vec(result, col_len) diff --git a/src/script/src/python/pyo3/utils.rs b/src/script/src/python/pyo3/utils.rs index cdb272193e..1a0c12e86d 100644 --- a/src/script/src/python/pyo3/utils.rs +++ b/src/script/src/python/pyo3/utils.rs @@ -15,7 +15,7 @@ use std::sync::Mutex; use arrow::pyarrow::PyArrowException; -use common_telemetry::info; +use common_telemetry::{info, timer}; use datafusion_common::ScalarValue; use datafusion_expr::ColumnarValue; use datatypes::arrow::datatypes::DataType as ArrowDataType; @@ -29,6 +29,7 @@ use pyo3::types::{PyBool, PyFloat, PyInt, PyList, PyTuple}; use crate::python::ffi_types::utils::{collect_diff_types_string, new_item_field}; use crate::python::ffi_types::PyVector; +use crate::python::metric; use crate::python::pyo3::builtins::greptime_builtins; /// prevent race condition of init cpython @@ -39,6 +40,7 @@ pub(crate) fn to_py_err(err: impl ToString) -> PyErr { /// init cpython interpreter with `greptime` builtins, if already inited, do nothing pub(crate) fn init_cpython_interpreter() -> PyResult<()> { + let _t = timer!(metric::METRIC_PYO3_INIT_ELAPSED); let mut start = START_PYO3.lock().unwrap(); if !*start { pyo3::append_to_inittab!(greptime_builtins); diff --git a/src/script/src/python/rspython/builtins.rs b/src/script/src/python/rspython/builtins.rs index c2d80f1471..bc0c5ba155 100644 --- a/src/script/src/python/rspython/builtins.rs +++ b/src/script/src/python/rspython/builtins.rs @@ -610,7 +610,7 @@ pub(crate) mod greptime_builtin { ApproxDistinct, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -621,7 +621,7 @@ pub(crate) mod greptime_builtin { Median, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -635,7 +635,7 @@ pub(crate) mod greptime_builtin { ApproxMedian, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -656,7 +656,7 @@ pub(crate) mod greptime_builtin { Arc::new(percent) as _, ], "ApproxPercentileCont", - (values.to_arrow_array().data_type()).clone(), + values.arrow_data_type(), ) .map_err(|err| from_df_err(err, vm))?, &[values.to_arrow_array()], @@ -671,7 +671,7 @@ pub(crate) mod greptime_builtin { ArrayAgg, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -683,7 +683,7 @@ pub(crate) mod greptime_builtin { Avg, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -698,7 +698,7 @@ pub(crate) mod greptime_builtin { Correlation, vm, &[arg0.to_arrow_array(), arg1.to_arrow_array()], - arg0.to_arrow_array().data_type(), + arg0.arrow_data_type(), expr0, expr1 ); @@ -710,7 +710,7 @@ pub(crate) mod greptime_builtin { Count, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -725,7 +725,7 @@ pub(crate) mod greptime_builtin { Covariance, vm, &[arg0.to_arrow_array(), arg1.to_arrow_array()], - arg0.to_arrow_array().data_type(), + arg0.arrow_data_type(), expr0, expr1 ); @@ -741,7 +741,7 @@ pub(crate) mod greptime_builtin { CovariancePop, vm, &[arg0.to_arrow_array(), arg1.to_arrow_array()], - arg0.to_arrow_array().data_type(), + arg0.arrow_data_type(), expr0, expr1 ); @@ -753,7 +753,7 @@ pub(crate) mod greptime_builtin { Max, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -764,7 +764,7 @@ pub(crate) mod greptime_builtin { Min, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -775,7 +775,7 @@ pub(crate) mod greptime_builtin { Stddev, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -786,7 +786,7 @@ pub(crate) mod greptime_builtin { StddevPop, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -797,7 +797,7 @@ pub(crate) mod greptime_builtin { Sum, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -808,7 +808,7 @@ pub(crate) mod greptime_builtin { Variance, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } @@ -819,7 +819,7 @@ pub(crate) mod greptime_builtin { VariancePop, vm, &[values.to_arrow_array()], - values.to_arrow_array().data_type(), + values.arrow_data_type(), expr0 ); } diff --git a/src/script/src/python/rspython/builtins/test.rs b/src/script/src/python/rspython/builtins/test.rs index e655aa1807..953b9fe0d5 100644 --- a/src/script/src/python/rspython/builtins/test.rs +++ b/src/script/src/python/rspython/builtins/test.rs @@ -239,7 +239,7 @@ impl PyValue { let vec_f64 = vec_f64 .as_any() .downcast_ref::() - .ok_or(format!("Can't cast {vec_f64:#?} to Float64Array!"))?; + .ok_or_else(|| format!("Can't cast {vec_f64:#?} to Float64Array!"))?; let ret = vec_f64.into_iter().collect::>(); if ret.iter().all(|x| x.is_some()) { Ok(Self::FloatVec( @@ -255,14 +255,14 @@ impl PyValue { let vec_i64 = vec_int .as_any() .downcast_ref::() - .ok_or(format!("Can't cast {vec_int:#?} to Int64Array!"))?; + .ok_or_else(|| format!("Can't cast {vec_int:#?} to Int64Array!"))?; let ret: Vec = vec_i64 .into_iter() .enumerate() .map(|(idx, v)| { - v.ok_or(format!( - "No null element expected, found one in {idx} position" - )) + v.ok_or_else(|| { + format!("No null element expected, found one in {idx} position") + }) }) .collect::>()?; Ok(Self::IntVec(ret)) diff --git a/src/script/src/python/rspython/copr_impl.rs b/src/script/src/python/rspython/copr_impl.rs index fdfbc8fcca..b042445592 100644 --- a/src/script/src/python/rspython/copr_impl.rs +++ b/src/script/src/python/rspython/copr_impl.rs @@ -18,7 +18,7 @@ use std::result::Result as StdResult; use std::sync::Arc; use common_recordbatch::RecordBatch; -use common_telemetry::info; +use common_telemetry::{info, timer}; use datatypes::vectors::VectorRef; use rustpython_vm::builtins::{PyBaseExceptionRef, PyDict, PyStr, PyTuple}; use rustpython_vm::class::PyClassImpl; @@ -30,6 +30,7 @@ use snafu::{OptionExt, ResultExt}; use crate::python::error::{ensure, ret_other_error_with, NewRecordBatchSnafu, OtherSnafu, Result}; use crate::python::ffi_types::copr::PyQueryEngine; use crate::python::ffi_types::{check_args_anno_real_type, select_from_rb, Coprocessor, PyVector}; +use crate::python::metric; use crate::python::rspython::builtins::init_greptime_builtins; use crate::python::rspython::dataframe_impl::data_frame::set_dataframe_in_scope; use crate::python::rspython::dataframe_impl::init_data_frame; @@ -43,6 +44,7 @@ pub(crate) fn rspy_exec_parsed( rb: &Option, params: &HashMap, ) -> Result { + let _t = timer!(metric::METRIC_RSPY_EXEC_TOTAL_ELAPSED); // 3. get args from `rb`, and cast them into PyVector let args: Vec = if let Some(rb) = rb { let arg_names = copr.deco_args.arg_names.clone().unwrap_or(vec![]); @@ -99,6 +101,8 @@ pub(crate) fn exec_with_cached_vm( vm: &Arc, ) -> Result { vm.enter(|vm| -> Result { + let _t = timer!(metric::METRIC_RSPY_EXEC_ELAPSED); + // set arguments with given name and values let scope = vm.new_scope_with_builtins(); if let Some(rb) = rb { @@ -184,6 +188,7 @@ fn try_into_columns( /// init interpreter with type PyVector and Module: greptime pub(crate) fn init_interpreter() -> Arc { + let _t = timer!(metric::METRIC_RSPY_INIT_ELAPSED); INTERPRETER.with(|i| { i.borrow_mut() .get_or_insert_with(|| {