From 22ae983280a4ea1f6b3a3ba0539466058018fbbc Mon Sep 17 00:00:00 2001 From: Yingwen Date: Fri, 18 Nov 2022 18:38:07 +0800 Subject: [PATCH] refactor: Use re-exported arrow mod from datatypes crate (#571) --- Cargo.lock | 6 ---- src/common/function/Cargo.toml | 14 ---------- .../function/src/scalars/numpy/interp.rs | 6 ++-- .../src/scalars/timestamp/from_unixtime.rs | 6 ++-- src/common/grpc/Cargo.toml | 14 ---------- src/common/grpc/src/physical/plan.rs | 8 +++--- src/common/grpc/src/select.rs | 15 +++++----- src/common/query/Cargo.toml | 4 --- src/common/query/src/columnar_value.rs | 2 +- src/common/query/src/error.rs | 4 +-- src/common/query/src/function.rs | 2 +- .../query/src/logical_plan/accumulator.rs | 4 +-- src/common/query/src/logical_plan/mod.rs | 4 +-- src/common/query/src/logical_plan/udaf.rs | 2 +- src/common/query/src/physical_plan.rs | 2 +- src/common/query/src/signature.rs | 4 +-- src/datanode/Cargo.toml | 28 ------------------- src/datanode/src/tests/instance_test.rs | 2 +- src/datatypes/src/arrow_array.rs | 8 ++++-- src/datatypes/src/vectors.rs | 10 ------- src/datatypes/src/vectors/operations.rs | 6 ++-- src/frontend/Cargo.toml | 14 ---------- src/query/Cargo.toml | 14 ---------- src/query/src/datafusion.rs | 2 +- src/query/src/datafusion/planner.rs | 2 +- src/query/src/optimizer.rs | 10 ++++--- src/query/src/sql.rs | 2 +- src/query/tests/query_engine_test.rs | 2 +- src/script/src/python/builtins/mod.rs | 2 +- src/script/src/python/builtins/test.rs | 9 +++--- src/script/src/python/coprocessor.rs | 4 +-- 31 files changed, 57 insertions(+), 155 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 653e1d757a..84bcfa1de3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1158,7 +1158,6 @@ name = "common-function" version = "0.1.0" dependencies = [ "arc-swap", - "arrow2", "chrono-tz", "common-error", "common-function-macro", @@ -1195,7 +1194,6 @@ name = "common-grpc" version = "0.1.0" dependencies = [ "api", - "arrow2", "async-trait", "common-base", "common-error", @@ -1233,7 +1231,6 @@ dependencies = [ name = "common-query" version = "0.1.0" dependencies = [ - "arrow2", "async-trait", "common-base", "common-error", @@ -1732,7 +1729,6 @@ name = "datanode" version = "0.1.0" dependencies = [ "api", - "arrow2", "async-trait", "axum 0.6.0-rc.2", "axum-macros", @@ -2113,7 +2109,6 @@ name = "frontend" version = "0.1.0" dependencies = [ "api", - "arrow2", "async-stream", "async-trait", "catalog", @@ -4355,7 +4350,6 @@ version = "0.1.0" dependencies = [ "approx_eq", "arc-swap", - "arrow2", "async-trait", "catalog", "common-catalog", diff --git a/src/common/function/Cargo.toml b/src/common/function/Cargo.toml index 214fa0ade9..e8f1bb9581 100644 --- a/src/common/function/Cargo.toml +++ b/src/common/function/Cargo.toml @@ -21,20 +21,6 @@ paste = "1.0" snafu = { version = "0.7", features = ["backtraces"] } statrs = "0.15" -[dependencies.arrow] -features = [ - "io_csv", - "io_json", - "io_parquet", - "io_parquet_compression", - "io_ipc", - "ahash", - "compute", - "serde_types", -] -package = "arrow2" -version = "0.10" - [dev-dependencies] ron = "0.7" serde = { version = "1.0", features = ["derive"] } diff --git a/src/common/function/src/scalars/numpy/interp.rs b/src/common/function/src/scalars/numpy/interp.rs index f2c3b28bb3..68981c2556 100644 --- a/src/common/function/src/scalars/numpy/interp.rs +++ b/src/common/function/src/scalars/numpy/interp.rs @@ -14,9 +14,9 @@ use std::sync::Arc; -use arrow::array::PrimitiveArray; -use arrow::compute::cast::primitive_to_primitive; -use arrow::datatypes::DataType::Float64; +use datatypes::arrow::array::PrimitiveArray; +use datatypes::arrow::compute::cast::primitive_to_primitive; +use datatypes::arrow::datatypes::DataType::Float64; use datatypes::data_type::DataType; use datatypes::prelude::ScalarVector; use datatypes::type_id::LogicalTypeId; diff --git a/src/common/function/src/scalars/timestamp/from_unixtime.rs b/src/common/function/src/scalars/timestamp/from_unixtime.rs index f480547436..4462672c8c 100644 --- a/src/common/function/src/scalars/timestamp/from_unixtime.rs +++ b/src/common/function/src/scalars/timestamp/from_unixtime.rs @@ -17,11 +17,11 @@ use std::fmt; use std::sync::Arc; -use arrow::compute::arithmetics; -use arrow::datatypes::DataType as ArrowDatatype; -use arrow::scalar::PrimitiveScalar; use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu}; use common_query::prelude::{Signature, Volatility}; +use datatypes::arrow::compute::arithmetics; +use datatypes::arrow::datatypes::DataType as ArrowDatatype; +use datatypes::arrow::scalar::PrimitiveScalar; use datatypes::prelude::ConcreteDataType; use datatypes::vectors::{TimestampVector, VectorRef}; use snafu::ResultExt; diff --git a/src/common/grpc/Cargo.toml b/src/common/grpc/Cargo.toml index eef5357a3f..77d4920c4a 100644 --- a/src/common/grpc/Cargo.toml +++ b/src/common/grpc/Cargo.toml @@ -22,20 +22,6 @@ tokio = { version = "1.0", features = ["full"] } tonic = "0.8" tower = "0.4" -[dependencies.arrow] -package = "arrow2" -version = "0.10" -features = [ - "io_csv", - "io_json", - "io_parquet", - "io_parquet_compression", - "io_ipc", - "ahash", - "compute", - "serde_types", -] - [dev-dependencies] criterion = "0.4" rand = "0.8" diff --git a/src/common/grpc/src/physical/plan.rs b/src/common/grpc/src/physical/plan.rs index 44a8d79ec6..019b11ac2d 100644 --- a/src/common/grpc/src/physical/plan.rs +++ b/src/common/grpc/src/physical/plan.rs @@ -18,8 +18,6 @@ use std::sync::Arc; use api::v1::codec::physical_plan_node::PhysicalPlanType; use api::v1::codec::{MockInputExecNode, PhysicalPlanNode, ProjectionExecNode}; -use arrow::array::{PrimitiveArray, Utf8Array}; -use arrow::datatypes::{DataType, Field, Schema}; use async_trait::async_trait; use datafusion::execution::runtime_env::RuntimeEnv; use datafusion::field_util::SchemaExt; @@ -29,6 +27,8 @@ use datafusion::physical_plan::{ ExecutionPlan, PhysicalExpr, SendableRecordBatchStream, Statistics, }; use datafusion::record_batch::RecordBatch; +use datatypes::arrow::array::{PrimitiveArray, Utf8Array}; +use datatypes::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use snafu::{OptionExt, ResultExt}; use crate::error::{ @@ -162,11 +162,11 @@ impl ExecutionPlan for MockExecution { self } - fn schema(&self) -> arrow::datatypes::SchemaRef { + fn schema(&self) -> SchemaRef { let field1 = Field::new("id", DataType::UInt32, false); let field2 = Field::new("name", DataType::Utf8, false); let field3 = Field::new("age", DataType::UInt32, false); - Arc::new(arrow::datatypes::Schema::new(vec![field1, field2, field3])) + Arc::new(Schema::new(vec![field1, field2, field3])) } fn output_partitioning(&self) -> datafusion::physical_plan::Partitioning { diff --git a/src/common/grpc/src/select.rs b/src/common/grpc/src/select.rs index 58a3f10bfa..0801370dbd 100644 --- a/src/common/grpc/src/select.rs +++ b/src/common/grpc/src/select.rs @@ -19,12 +19,12 @@ use api::result::{build_err_result, ObjectResultBuilder}; use api::v1::codec::SelectResult; use api::v1::column::{SemanticType, Values}; use api::v1::{Column, ObjectResult}; -use arrow::array::{Array, BooleanArray, PrimitiveArray}; use common_base::BitVec; use common_error::prelude::ErrorExt; use common_error::status_code::StatusCode; use common_query::Output; use common_recordbatch::{util, RecordBatches, SendableRecordBatchStream}; +use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray}; use datatypes::arrow_array::{BinaryArray, StringArray}; use datatypes::schema::SchemaRef; use snafu::{OptionExt, ResultExt}; @@ -136,7 +136,8 @@ pub fn null_mask(arrays: &Vec>, row_count: usize) -> Vec { } macro_rules! convert_arrow_array_to_grpc_vals { - ($data_type: expr, $arrays: ident, $(($Type: pat, $CastType: ty, $field: ident, $MapFunction: expr)), +) => { + ($data_type: expr, $arrays: ident, $(($Type: pat, $CastType: ty, $field: ident, $MapFunction: expr)), +) => {{ + use datatypes::arrow::datatypes::{DataType, TimeUnit}; match $data_type { $( $Type => { @@ -155,7 +156,7 @@ macro_rules! convert_arrow_array_to_grpc_vals { )+ _ => unimplemented!(), } - }; + }}; } pub fn values(arrays: &[Arc]) -> Result { @@ -164,7 +165,6 @@ pub fn values(arrays: &[Arc]) -> Result { } let data_type = arrays[0].data_type(); - use arrow::datatypes::DataType; convert_arrow_array_to_grpc_vals!( data_type, arrays, @@ -192,7 +192,7 @@ pub fn values(arrays: &[Arc]) -> Result { (DataType::Date32, PrimitiveArray, date_values, |x| {*x as i32}), (DataType::Date64, PrimitiveArray, datetime_values,|x| {*x as i64}), - (DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, _), PrimitiveArray, ts_millis_values, |x| {*x}) + (DataType::Timestamp(TimeUnit::Millisecond, _), PrimitiveArray, ts_millis_values, |x| {*x}) ) } @@ -200,11 +200,10 @@ pub fn values(arrays: &[Arc]) -> Result { mod tests { use std::sync::Arc; - use arrow::array::{Array, BooleanArray, PrimitiveArray}; - use arrow::datatypes::{DataType, Field}; use common_recordbatch::{RecordBatch, RecordBatches}; use datafusion::field_util::SchemaExt; - use datatypes::arrow::datatypes::Schema as ArrowSchema; + use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray}; + use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use datatypes::arrow_array::StringArray; use datatypes::schema::Schema; use datatypes::vectors::{UInt32Vector, VectorRef}; diff --git a/src/common/query/Cargo.toml b/src/common/query/Cargo.toml index a5bf8960f5..7b9f87617b 100644 --- a/src/common/query/Cargo.toml +++ b/src/common/query/Cargo.toml @@ -18,10 +18,6 @@ datatypes = { path = "../../datatypes" } snafu = { version = "0.7", features = ["backtraces"] } statrs = "0.15" -[dependencies.arrow] -package = "arrow2" -version = "0.10" - [dev-dependencies] common-base = { path = "../base" } tokio = { version = "1.0", features = ["full"] } diff --git a/src/common/query/src/columnar_value.rs b/src/common/query/src/columnar_value.rs index 849b8b24b7..12f3815bd0 100644 --- a/src/common/query/src/columnar_value.rs +++ b/src/common/query/src/columnar_value.rs @@ -80,7 +80,7 @@ impl From for DfColumnarValue { mod tests { use std::sync::Arc; - use arrow::datatypes::DataType as ArrowDataType; + use datatypes::arrow::datatypes::DataType as ArrowDataType; use datatypes::vectors::BooleanVector; use super::*; diff --git a/src/common/query/src/error.rs b/src/common/query/src/error.rs index 90a127a64c..82b0c04d66 100644 --- a/src/common/query/src/error.rs +++ b/src/common/query/src/error.rs @@ -14,9 +14,9 @@ use std::any::Any; -use arrow::datatypes::DataType as ArrowDatatype; use common_error::prelude::*; use datafusion_common::DataFusionError; +use datatypes::arrow::datatypes::DataType as ArrowDatatype; use datatypes::error::Error as DataTypeError; use datatypes::prelude::ConcreteDataType; use statrs::StatsError; @@ -180,7 +180,7 @@ impl From for Error { #[cfg(test)] mod tests { - use arrow::error::ArrowError; + use datatypes::arrow::error::ArrowError; use snafu::GenerateImplicitData; use super::*; diff --git a/src/common/query/src/function.rs b/src/common/query/src/function.rs index 635dc20cd9..01bfffce7f 100644 --- a/src/common/query/src/function.rs +++ b/src/common/query/src/function.rs @@ -14,8 +14,8 @@ use std::sync::Arc; -use arrow::datatypes::DataType as ArrowDataType; use datafusion_expr::ReturnTypeFunction as DfReturnTypeFunction; +use datatypes::arrow::datatypes::DataType as ArrowDataType; use datatypes::prelude::{ConcreteDataType, DataType}; use datatypes::vectors::VectorRef; use snafu::ResultExt; diff --git a/src/common/query/src/logical_plan/accumulator.rs b/src/common/query/src/logical_plan/accumulator.rs index 379a582dc1..717214f3ff 100644 --- a/src/common/query/src/logical_plan/accumulator.rs +++ b/src/common/query/src/logical_plan/accumulator.rs @@ -17,10 +17,10 @@ use std::fmt::Debug; use std::sync::Arc; -use arrow::array::ArrayRef; use common_time::timestamp::TimeUnit; use datafusion_common::Result as DfResult; use datafusion_expr::Accumulator as DfAccumulator; +use datatypes::arrow::array::ArrayRef; use datatypes::prelude::*; use datatypes::value::ListValue; use datatypes::vectors::{Helper as VectorHelper, VectorRef}; @@ -266,9 +266,9 @@ fn try_convert_list_value(list: ListValue) -> Result { #[cfg(test)] mod tests { - use arrow::datatypes::DataType; use common_base::bytes::{Bytes, StringBytes}; use datafusion_common::ScalarValue; + use datatypes::arrow::datatypes::DataType; use datatypes::value::{ListValue, OrderedFloat}; use super::*; diff --git a/src/common/query/src/logical_plan/mod.rs b/src/common/query/src/logical_plan/mod.rs index 89b0e95ccd..5f57cd96aa 100644 --- a/src/common/query/src/logical_plan/mod.rs +++ b/src/common/query/src/logical_plan/mod.rs @@ -72,12 +72,12 @@ pub fn create_aggregate_function( mod tests { use std::sync::Arc; - use arrow::array::BooleanArray; - use arrow::datatypes::DataType; use datafusion_expr::{ ColumnarValue as DfColumnarValue, ScalarUDF as DfScalarUDF, TypeSignature as DfTypeSignature, }; + use datatypes::arrow::array::BooleanArray; + use datatypes::arrow::datatypes::DataType; use datatypes::prelude::*; use datatypes::vectors::{BooleanVector, VectorRef}; diff --git a/src/common/query/src/logical_plan/udaf.rs b/src/common/query/src/logical_plan/udaf.rs index 9550946ef9..6fb4a2f68a 100644 --- a/src/common/query/src/logical_plan/udaf.rs +++ b/src/common/query/src/logical_plan/udaf.rs @@ -19,11 +19,11 @@ use std::fmt::{self, Debug, Formatter}; use std::sync::Arc; -use arrow::datatypes::DataType as ArrowDataType; use datafusion_expr::{ AccumulatorFunctionImplementation as DfAccumulatorFunctionImplementation, AggregateUDF as DfAggregateUdf, StateTypeFunction as DfStateTypeFunction, }; +use datatypes::arrow::datatypes::DataType as ArrowDataType; use datatypes::prelude::*; use crate::function::{ diff --git a/src/common/query/src/physical_plan.rs b/src/common/query/src/physical_plan.rs index 710554a957..fa566b634f 100644 --- a/src/common/query/src/physical_plan.rs +++ b/src/common/query/src/physical_plan.rs @@ -199,7 +199,6 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter { #[cfg(test)] mod test { - use arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use common_recordbatch::{RecordBatch, RecordBatches}; use datafusion::arrow_print; use datafusion::datasource::TableProvider as DfTableProvider; @@ -209,6 +208,7 @@ mod test { use datafusion::prelude::ExecutionContext; use datafusion_common::field_util::SchemaExt; use datafusion_expr::Expr; + use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use datatypes::schema::Schema; use datatypes::vectors::Int32Vector; diff --git a/src/common/query/src/signature.rs b/src/common/query/src/signature.rs index 3f2eae8822..332ddbd83a 100644 --- a/src/common/query/src/signature.rs +++ b/src/common/query/src/signature.rs @@ -15,9 +15,9 @@ //! Signature module contains foundational types that are used to represent signatures, types, //! and return types of functions. //! Copied and modified from datafusion. -use arrow::datatypes::DataType as ArrowDataType; pub use datafusion::physical_plan::functions::Volatility; use datafusion_expr::{Signature as DfSignature, TypeSignature as DfTypeSignature}; +use datatypes::arrow::datatypes::DataType as ArrowDataType; use datatypes::data_type::DataType; use datatypes::prelude::ConcreteDataType; @@ -143,7 +143,7 @@ impl From for DfSignature { #[cfg(test)] mod tests { - use arrow::datatypes::DataType; + use datatypes::arrow::datatypes::DataType; use super::*; diff --git a/src/datanode/Cargo.toml b/src/datanode/Cargo.toml index 4b95e2a18b..5538239b11 100644 --- a/src/datanode/Cargo.toml +++ b/src/datanode/Cargo.toml @@ -54,20 +54,6 @@ tower = { version = "0.4", features = ["full"] } tower-http = { version = "0.3", features = ["full"] } frontend = { path = "../frontend" } -[dependencies.arrow] -package = "arrow2" -version = "0.10" -features = [ - "io_csv", - "io_json", - "io_parquet", - "io_parquet_compression", - "io_ipc", - "ahash", - "compute", - "serde_types", -] - [dev-dependencies] axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" } client = { path = "../client" } @@ -77,17 +63,3 @@ datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = ] } datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" } tempdir = "0.3" - -[dev-dependencies.arrow] -package = "arrow2" -version = "0.10" -features = [ - "io_csv", - "io_json", - "io_parquet", - "io_parquet_compression", - "io_ipc", - "ahash", - "compute", - "serde_types", -] diff --git a/src/datanode/src/tests/instance_test.rs b/src/datanode/src/tests/instance_test.rs index c8c7631f87..6914058ffb 100644 --- a/src/datanode/src/tests/instance_test.rs +++ b/src/datanode/src/tests/instance_test.rs @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -use arrow::array::{Int64Array, UInt64Array, Utf8Array}; use common_query::Output; use common_recordbatch::util; use datafusion::arrow_print; use datafusion_common::record_batch::RecordBatch as DfRecordBatch; +use datatypes::arrow::array::{Int64Array, UInt64Array, Utf8Array}; use datatypes::arrow_array::StringArray; use datatypes::prelude::ConcreteDataType; diff --git a/src/datatypes/src/arrow_array.rs b/src/datatypes/src/arrow_array.rs index 1c46932ab8..ca2cb6cc48 100644 --- a/src/datatypes/src/arrow_array.rs +++ b/src/datatypes/src/arrow_array.rs @@ -100,7 +100,9 @@ pub fn arrow_array_get(array: &dyn Array, idx: usize) -> Result { #[cfg(test)] mod test { use arrow::array::{ - Int64Array as ArrowI64Array, MutableListArray, MutablePrimitiveArray, TryExtend, *, + BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, + MutableListArray, MutablePrimitiveArray, TryExtend, UInt16Array, UInt32Array, UInt64Array, + UInt8Array, }; use arrow::buffer::Buffer; use arrow::datatypes::{DataType, TimeUnit as ArrowTimeUnit}; @@ -126,7 +128,7 @@ mod test { assert_eq!(Value::Int32(2), arrow_array_get(&array1, 1).unwrap()); let array1 = UInt32Array::from_vec(vec![1, 2, 3, 4]); assert_eq!(Value::UInt32(2), arrow_array_get(&array1, 1).unwrap()); - let array = ArrowI64Array::from_vec(vec![1, 2, 3, 4]); + let array = Int64Array::from_vec(vec![1, 2, 3, 4]); assert_eq!(Value::Int64(2), arrow_array_get(&array, 1).unwrap()); let array1 = UInt64Array::from_vec(vec![1, 2, 3, 4]); assert_eq!(Value::UInt64(2), arrow_array_get(&array1, 1).unwrap()); @@ -159,7 +161,7 @@ mod test { ); assert_eq!(Value::Null, arrow_array_get(&array3, 1).unwrap()); - let vector = TimestampVector::new(ArrowI64Array::from_vec(vec![1, 2, 3, 4])); + let vector = TimestampVector::new(Int64Array::from_vec(vec![1, 2, 3, 4])); let array = vector.to_boxed_arrow_array(); let value = arrow_array_get(&*array, 1).unwrap(); assert_eq!( diff --git a/src/datatypes/src/vectors.rs b/src/datatypes/src/vectors.rs index 8395daaf1c..6c9402849f 100644 --- a/src/datatypes/src/vectors.rs +++ b/src/datatypes/src/vectors.rs @@ -28,16 +28,6 @@ pub mod primitive; mod string; mod timestamp; -pub mod all { - //! All vector types. - pub use crate::vectors::{ - BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, Float32Vector, - Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector, ListVector, NullVector, - PrimitiveVector, StringVector, TimestampVector, UInt16Vector, UInt32Vector, UInt64Vector, - UInt8Vector, - }; -} - use std::any::Any; use std::fmt::Debug; use std::sync::Arc; diff --git a/src/datatypes/src/vectors/operations.rs b/src/datatypes/src/vectors/operations.rs index 6caf8fe300..ea09c5ef79 100644 --- a/src/datatypes/src/vectors/operations.rs +++ b/src/datatypes/src/vectors/operations.rs @@ -20,8 +20,10 @@ use arrow::bitmap::MutableBitmap; use crate::error::Result; use crate::types::PrimitiveElement; -use crate::vectors::all::*; -use crate::vectors::{Vector, VectorRef}; +use crate::vectors::{ + BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, ListVector, + NullVector, PrimitiveVector, StringVector, TimestampVector, Vector, VectorRef, +}; /// Vector compute operations. pub trait VectorOp { diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index 0324f7d855..401ea70aaa 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -46,20 +46,6 @@ store-api = { path = "../store-api" } table = { path = "../table" } tokio = { version = "1.18", features = ["full"] } -[dependencies.arrow] -package = "arrow2" -version = "0.10" -features = [ - "io_csv", - "io_json", - "io_parquet", - "io_parquet_compression", - "io_ipc", - "ahash", - "compute", - "serde_types", -] - [dev-dependencies] datanode = { path = "../datanode" } futures = "0.3" diff --git a/src/query/Cargo.toml b/src/query/Cargo.toml index a5cc83d78d..f4163689f1 100644 --- a/src/query/Cargo.toml +++ b/src/query/Cargo.toml @@ -32,20 +32,6 @@ sql = { path = "../sql" } table = { path = "../table" } tokio = "1.0" -[dependencies.arrow] -package = "arrow2" -version = "0.10" -features = [ - "io_csv", - "io_json", - "io_parquet", - "io_parquet_compression", - "io_ipc", - "ahash", - "compute", - "serde_types", -] - [dev-dependencies] approx_eq = "0.1" common-function-macro = { path = "../common/function-macro" } diff --git a/src/query/src/datafusion.rs b/src/query/src/datafusion.rs index caf8fce156..1139416803 100644 --- a/src/query/src/datafusion.rs +++ b/src/query/src/datafusion.rs @@ -244,13 +244,13 @@ impl QueryExecutor for DatafusionQueryEngine { mod tests { use std::sync::Arc; - use arrow::array::UInt64Array; use catalog::local::{MemoryCatalogProvider, MemorySchemaProvider}; use catalog::{CatalogList, CatalogProvider, SchemaProvider}; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use common_query::Output; use common_recordbatch::util; use datafusion::field_util::{FieldExt, SchemaExt}; + use datatypes::arrow::array::UInt64Array; use table::table::numbers::NumbersTable; use crate::query_engine::{QueryEngineFactory, QueryEngineRef}; diff --git a/src/query/src/datafusion/planner.rs b/src/query/src/datafusion/planner.rs index 08c62e9844..6d5dcae527 100644 --- a/src/query/src/datafusion/planner.rs +++ b/src/query/src/datafusion/planner.rs @@ -14,13 +14,13 @@ use std::sync::Arc; -use arrow::datatypes::DataType; use common_query::logical_plan::create_aggregate_function; use datafusion::catalog::TableReference; use datafusion::datasource::TableProvider; use datafusion::physical_plan::udaf::AggregateUDF; use datafusion::physical_plan::udf::ScalarUDF; use datafusion::sql::planner::{ContextProvider, SqlToRel}; +use datatypes::arrow::datatypes::DataType; use snafu::ResultExt; use sql::statements::query::Query; use sql::statements::statement::Statement; diff --git a/src/query/src/optimizer.rs b/src/query/src/optimizer.rs index 33adbdef70..e8841f95f1 100644 --- a/src/query/src/optimizer.rs +++ b/src/query/src/optimizer.rs @@ -15,9 +15,6 @@ use std::str::FromStr; use std::sync::Arc; -use arrow::compute; -use arrow::compute::cast::CastOptions; -use arrow::datatypes::DataType; use common_telemetry::debug; use common_time::timestamp::{TimeUnit, Timestamp}; use datafusion::execution::context::ExecutionProps; @@ -28,6 +25,9 @@ use datafusion::logical_plan::{ use datafusion::optimizer::optimizer::OptimizerRule; use datafusion::optimizer::utils; use datafusion_common::{DFSchemaRef, DataFusionError, Result, ScalarValue}; +use datatypes::arrow::compute; +use datatypes::arrow::compute::cast::CastOptions; +use datatypes::arrow::datatypes::DataType; /// TypeConversionRule converts some literal values in logical plan to other types according /// to data type of corresponding columns. @@ -343,12 +343,14 @@ mod tests { #[test] fn test_convert_timestamp_str() { + use datatypes::arrow::datatypes::TimeUnit as ArrowTimeUnit; + let schema_ref = Arc::new( DFSchema::new_with_metadata( vec![DFField::new( None, "ts", - DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, None), + DataType::Timestamp(ArrowTimeUnit::Millisecond, None), true, )], HashMap::new(), diff --git a/src/query/src/sql.rs b/src/query/src/sql.rs index 149ea96ad1..51b81d01af 100644 --- a/src/query/src/sql.rs +++ b/src/query/src/sql.rs @@ -231,7 +231,6 @@ fn describe_column_semantic_types( mod test { use std::sync::Arc; - use arrow::array::PrimitiveArray; use catalog::local::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider}; use catalog::{CatalogList, CatalogManagerRef, CatalogProvider, SchemaProvider}; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; @@ -239,6 +238,7 @@ mod test { use common_query::Output; use common_recordbatch::{RecordBatch, RecordBatches}; use common_time::timestamp::TimeUnit; + use datatypes::arrow::array::PrimitiveArray; use datatypes::prelude::ConcreteDataType; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, Schema, SchemaRef}; use datatypes::vectors::{StringVector, TimestampVector, UInt32Vector, VectorRef}; diff --git a/src/query/tests/query_engine_test.rs b/src/query/tests/query_engine_test.rs index 291c31057f..26afd8c9cc 100644 --- a/src/query/tests/query_engine_test.rs +++ b/src/query/tests/query_engine_test.rs @@ -16,7 +16,6 @@ mod pow; use std::sync::Arc; -use arrow::array::UInt32Array; use catalog::local::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider}; use catalog::{CatalogList, CatalogProvider, SchemaProvider}; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; @@ -26,6 +25,7 @@ use common_recordbatch::error::Result as RecordResult; use common_recordbatch::{util, RecordBatch}; use datafusion::field_util::{FieldExt, SchemaExt}; use datafusion::logical_plan::LogicalPlanBuilder; +use datatypes::arrow::array::UInt32Array; use datatypes::for_all_primitive_types; use datatypes::prelude::*; use datatypes::schema::{ColumnSchema, Schema}; diff --git a/src/script/src/python/builtins/mod.rs b/src/script/src/python/builtins/mod.rs index ef85d088fe..d6ebc5856f 100644 --- a/src/script/src/python/builtins/mod.rs +++ b/src/script/src/python/builtins/mod.rs @@ -903,7 +903,7 @@ pub(crate) mod greptime_builtin { duration: i64, vm: &VirtualMachine, ) -> PyResult>> { - use arrow::datatypes::DataType; + use datatypes::arrow::datatypes::DataType; match (oldest.data_type(), newest.data_type()) { (DataType::Int64, DataType::Int64) => (), _ => { diff --git a/src/script/src/python/builtins/test.rs b/src/script/src/python/builtins/test.rs index 82c80e4a29..2f008611da 100644 --- a/src/script/src/python/builtins/test.rs +++ b/src/script/src/python/builtins/test.rs @@ -18,9 +18,9 @@ use std::io::Read; use std::path::Path; use std::sync::Arc; -use arrow::array::{Float64Array, Int64Array, PrimitiveArray}; -use arrow::compute::cast::CastOptions; -use arrow::datatypes::DataType; +use datatypes::arrow::array::{Float64Array, Int64Array, PrimitiveArray}; +use datatypes::arrow::compute::cast::CastOptions; +use datatypes::arrow::datatypes::DataType; use datatypes::vectors::VectorRef; use ron::from_str as from_ron_string; use rustpython_vm::builtins::{PyFloat, PyInt, PyList}; @@ -30,9 +30,10 @@ use rustpython_vm::scope::Scope; use rustpython_vm::{AsObject, PyObjectRef, VirtualMachine}; use serde::{Deserialize, Serialize}; -use super::{greptime_builtin, *}; +use super::*; use crate::python::utils::{format_py_error, is_instance}; use crate::python::PyVector; + #[test] fn convert_scalar_to_py_obj_and_back() { rustpython_vm::Interpreter::with_init(Default::default(), |vm| { diff --git a/src/script/src/python/coprocessor.rs b/src/script/src/python/coprocessor.rs index 483ecfe2f8..8dad524adf 100644 --- a/src/script/src/python/coprocessor.rs +++ b/src/script/src/python/coprocessor.rs @@ -327,8 +327,8 @@ fn set_items_in_scope( /// ```ignore /// use std::sync::Arc; /// use datafusion_common::record_batch::RecordBatch as DfRecordBatch; -/// use arrow::array::PrimitiveArray; -/// use arrow::datatypes::{DataType, Field, Schema}; +/// use datatypes::arrow::array::PrimitiveArray; +/// use datatypes::arrow::datatypes::{DataType, Field, Schema}; /// use common_function::scalars::python::exec_coprocessor; /// let python_source = r#" /// @copr(args=["cpu", "mem"], returns=["perf", "what"])