refactor: Refactor usage of BoxedError (#48)

* feat: Define a general boxed error

* refactor: common_function use Error in common_query

* feat: Add tests to define_opaque_error macro

* refactor: Refactor table and table engine error

* refactor: recordbatch remove arrow dev-dependency

* refactor: datanode crate use common_error::BoxedError

* chore: Fix clippy

* feat: Returning source status code when using BoxedError

* test: Fix opaque error test

* test: Add tests for table::Error & table_engine::Error

* test: Add test for RecordBatch::new()

* test: Remove generated tests from define_opaque_error

* chore: Address cr comment
This commit is contained in:
evenyag
2022-06-21 15:24:45 +08:00
committed by GitHub
parent 4071b0cff2
commit 6ec870625f
18 changed files with 268 additions and 191 deletions

View File

@@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
common-error = { path = "../error" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git" , branch = "arrow2", features = ["simd"]}
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git" , branch = "arrow2"}
datatypes = { path = "../../datatypes" }
@@ -12,13 +13,6 @@ paste = "1.0"
serde = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
[dev-dependencies.arrow]
package = "arrow2"
version="0.10"
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
[dev-dependencies]
serde_json = "1.0"
tokio = { version = "1.18", features = ["full"] }

View File

@@ -1,20 +1,40 @@
use datatypes::arrow::error::ArrowError;
use snafu::{Backtrace, Snafu};
//! Error of record batch.
use std::any::Any;
// TODO(dennis): use ErrorExt instead.
pub type BoxedError = Box<dyn std::error::Error + Send + Sync>;
use common_error::prelude::*;
common_error::define_opaque_error!(Error);
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Arrow error: {}", source))]
Arrow {
source: ArrowError,
pub enum InnerError {
#[snafu(display("Fail to create datafusion record batch, source: {}", source))]
NewDfRecordBatch {
source: datatypes::arrow::error::ArrowError,
backtrace: Backtrace,
},
#[snafu(display("Storage error: {}, source: {}", msg, source))]
Storage { source: BoxedError, msg: String },
}
pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for InnerError {
fn status_code(&self) -> StatusCode {
match self {
InnerError::NewDfRecordBatch { .. } => StatusCode::InvalidArguments,
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}
impl From<InnerError> for Error {
fn from(e: InnerError) -> Error {
Error::new(e)
}
}

View File

@@ -1,8 +1,11 @@
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
use datatypes::schema::SchemaRef;
use datatypes::vectors::Helper;
use datatypes::vectors::{Helper, VectorRef};
use serde::ser::{Error, SerializeStruct};
use serde::{Serialize, Serializer};
use snafu::ResultExt;
use crate::error::{self, Result};
#[derive(Clone, Debug, PartialEq)]
pub struct RecordBatch {
@@ -10,8 +13,25 @@ pub struct RecordBatch {
pub df_recordbatch: DfRecordBatch,
}
impl RecordBatch {
pub fn new<I: IntoIterator<Item = VectorRef>>(
schema: SchemaRef,
columns: I,
) -> Result<RecordBatch> {
let arrow_arrays = columns.into_iter().map(|v| v.to_arrow_array()).collect();
let df_recordbatch = DfRecordBatch::try_new(schema.arrow_schema().clone(), arrow_arrays)
.context(error::NewDfRecordBatchSnafu)?;
Ok(RecordBatch {
schema,
df_recordbatch,
})
}
}
impl Serialize for RecordBatch {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: Serializer,
{
@@ -23,7 +43,7 @@ impl Serialize for RecordBatch {
let vec = df_columns
.iter()
.map(|c| Helper::try_into_vector(c.clone())?.serialize_to_json())
.collect::<Result<Vec<_>, _>>()
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(S::Error::custom)?;
s.serialize_field("columns", &vec)?;
@@ -35,14 +55,38 @@ impl Serialize for RecordBatch {
mod tests {
use std::sync::Arc;
use arrow::array::UInt32Array;
use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
use datafusion_common::field_util::SchemaExt;
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
use datatypes::arrow::array::UInt32Array;
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
use datatypes::schema::Schema;
use datatypes::vectors::{UInt32Vector, Vector};
use super::*;
#[test]
fn test_new_record_batch() {
let arrow_schema = Arc::new(ArrowSchema::new(vec![
Field::new("c1", DataType::UInt32, false),
Field::new("c2", DataType::UInt32, false),
]));
let schema = Arc::new(Schema::try_from(arrow_schema).unwrap());
let v = Arc::new(UInt32Vector::from_slice(&[1, 2, 3]));
let columns: Vec<VectorRef> = vec![v.clone(), v.clone()];
let batch = RecordBatch::new(schema.clone(), columns).unwrap();
let expect = v.to_arrow_array();
for column in batch.df_recordbatch.columns() {
let array = column.as_any().downcast_ref::<UInt32Array>().unwrap();
assert_eq!(
expect.as_any().downcast_ref::<UInt32Array>().unwrap(),
array
);
}
assert_eq!(schema, batch.schema);
}
#[test]
pub fn test_serialize_recordbatch() {
let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
@@ -64,12 +108,10 @@ mod tests {
df_recordbatch: df_batch,
};
let mut output = vec![];
let mut serializer = serde_json::Serializer::new(&mut output);
batch.serialize(&mut serializer).unwrap();
let output = serde_json::to_string(&batch).unwrap();
assert_eq!(
r#"{"schema":{"fields":[{"name":"number","data_type":"UInt32","is_nullable":false,"metadata":{}}],"metadata":{}},"columns":[[0,1,2,3,4,5,6,7,8,9]]}"#,
String::from_utf8_lossy(&output)
output
);
}
}

View File

@@ -12,10 +12,10 @@ mod tests {
use std::pin::Pin;
use std::sync::Arc;
use arrow::array::UInt32Array;
use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
use datafusion_common::field_util::SchemaExt;
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
use datatypes::arrow::array::UInt32Array;
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
use datatypes::schema::Schema;
use datatypes::schema::SchemaRef;
use futures::task::{Context, Poll};