feat: frontend instance (#238)

* feat: frontend instance

* no need to carry column length in `Column` proto

* add more tests

* rebase develop

* create a new variant with already provisioned RecordBatches in Output

* resolve code review comments

* new frontend instance does not connect datanode grpc

* add more tests

* add more tests

* rebase develop

Co-authored-by: luofucong <luofucong@greptime.com>
This commit is contained in:
LFC
2022-09-13 17:10:22 +08:00
committed by GitHub
parent bdd5bdd917
commit ec99eb0cd0
71 changed files with 2324 additions and 362 deletions

View File

@@ -27,13 +27,21 @@ pub enum InnerError {
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Failed to create RecordBatches, reason: {}", reason))]
CreateRecordBatches {
reason: String,
backtrace: Backtrace,
},
}
impl ErrorExt for InnerError {
fn status_code(&self) -> StatusCode {
match self {
InnerError::NewDfRecordBatch { .. } => StatusCode::InvalidArguments,
InnerError::DataTypes { .. } => StatusCode::Internal,
InnerError::DataTypes { .. } | InnerError::CreateRecordBatches { .. } => {
StatusCode::Internal
}
InnerError::External { source } => source.status_code(),
}
}

View File

@@ -9,6 +9,7 @@ use error::Result;
use futures::task::{Context, Poll};
use futures::Stream;
pub use recordbatch::RecordBatch;
use snafu::ensure;
pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
fn schema(&self) -> SchemaRef;
@@ -43,3 +44,76 @@ impl Stream for EmptyRecordBatchStream {
Poll::Ready(None)
}
}
#[derive(Debug)]
pub struct RecordBatches {
schema: SchemaRef,
batches: Vec<RecordBatch>,
}
impl RecordBatches {
pub fn try_new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Result<Self> {
for batch in batches.iter() {
ensure!(
batch.schema == schema,
error::CreateRecordBatchesSnafu {
reason: format!(
"expect RecordBatch schema equals {:?}, actual: {:?}",
schema, batch.schema
)
}
)
}
Ok(Self { schema, batches })
}
pub fn schema(&self) -> SchemaRef {
self.schema.clone()
}
pub fn to_vec(self) -> Vec<RecordBatch> {
self.batches
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datatypes::prelude::{ConcreteDataType, VectorRef};
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::vectors::{BooleanVector, Int32Vector, StringVector};
use super::*;
#[test]
fn test_recordbatches() {
let column_a = ColumnSchema::new("a", ConcreteDataType::int32_datatype(), false);
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
let column_c = ColumnSchema::new("c", ConcreteDataType::boolean_datatype(), false);
let va: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
let vb: VectorRef = Arc::new(StringVector::from(vec!["hello", "world"]));
let vc: VectorRef = Arc::new(BooleanVector::from(vec![true, false]));
let schema1 = Arc::new(Schema::new(vec![column_a.clone(), column_b]));
let batch1 = RecordBatch::new(schema1.clone(), vec![va.clone(), vb]).unwrap();
let schema2 = Arc::new(Schema::new(vec![column_a, column_c]));
let batch2 = RecordBatch::new(schema2.clone(), vec![va, vc]).unwrap();
let result = RecordBatches::try_new(schema1.clone(), vec![batch1.clone(), batch2]);
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
format!(
"Failed to create RecordBatches, reason: expect RecordBatch schema equals {:?}, actual: {:?}",
schema1, schema2
)
);
let batches = RecordBatches::try_new(schema1.clone(), vec![batch1.clone()]).unwrap();
assert_eq!(schema1, batches.schema());
assert_eq!(vec![batch1], batches.to_vec());
}
}

View File

@@ -74,6 +74,7 @@ pub fn init_global_logging(
.with_target("datafusion", Level::WARN)
.with_target("reqwest", Level::WARN)
.with_target("sqlparser", Level::WARN)
.with_target("h2", Level::INFO)
.with_default(
directives
.parse::<filter::LevelFilter>()