feat: flight aboard (#840)

feat: replace old GRPC interface with Arrow Flight
This commit is contained in:
LFC
2023-01-09 17:06:24 +08:00
committed by GitHub
parent 9e58311ecd
commit 72f05a3137
56 changed files with 1268 additions and 2210 deletions

View File

@@ -24,6 +24,9 @@ pub mod prelude {
pub use crate::ext::{BoxedError, ErrorExt};
pub use crate::format::DebugFormat;
pub use crate::status_code::StatusCode;
pub const INNER_ERROR_CODE: &str = "INNER_ERROR_CODE";
pub const INNER_ERROR_MSG: &str = "INNER_ERROR_MSG";
}
pub use snafu;

View File

@@ -18,7 +18,7 @@ datafusion.workspace = true
datatypes = { path = "../../datatypes" }
flatbuffers = "22"
futures = "0.3"
prost = "0.11"
prost.workspace = true
snafu = { version = "0.7", features = ["backtraces"] }
tokio.workspace = true
tonic.workspace = true

View File

@@ -13,34 +13,25 @@
// limitations under the License.
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::Arc;
use api::result::ObjectResultBuilder;
use api::v1::{FlightDataExt, ObjectResult};
use api::v1::FlightDataExt;
use arrow_flight::utils::{flight_data_from_arrow_batch, flight_data_to_arrow_batch};
use arrow_flight::{FlightData, IpcMessage, SchemaAsIpc};
use common_error::prelude::StatusCode;
use common_recordbatch::{RecordBatch, RecordBatches};
use datatypes::arrow;
use datatypes::arrow::datatypes::Schema as ArrowSchema;
use datatypes::arrow::ipc::{root_as_message, writer, MessageHeader};
use datatypes::schema::{Schema, SchemaRef};
use flatbuffers::FlatBufferBuilder;
use futures::TryStreamExt;
use prost::Message;
use snafu::{OptionExt, ResultExt};
use tonic::codegen::futures_core::Stream;
use tonic::Response;
use crate::error::{
ConvertArrowSchemaSnafu, CreateRecordBatchSnafu, DecodeFlightDataSnafu, InvalidFlightDataSnafu,
Result,
};
type TonicResult<T> = std::result::Result<T, tonic::Status>;
type TonicStream<T> = Pin<Box<dyn Stream<Item = TonicResult<T>> + Send + Sync + 'static>>;
#[derive(Debug, Clone)]
pub enum FlightMessage {
Schema(SchemaRef),
@@ -147,37 +138,6 @@ impl FlightDecoder {
}
}
// TODO(LFC): Remove it once we completely get rid of old GRPC interface.
pub async fn flight_data_to_object_result(
response: Response<TonicStream<FlightData>>,
) -> Result<ObjectResult> {
let stream = response.into_inner();
let result: TonicResult<Vec<FlightData>> = stream.try_collect().await;
match result {
Ok(flight_data) => Ok(ObjectResultBuilder::new()
.status_code(StatusCode::Success as u32)
.flight_data(flight_data)
.build()),
Err(e) => Ok(ObjectResultBuilder::new()
.status_code(StatusCode::Internal as _)
.err_msg(e.to_string())
.build()),
}
}
pub fn raw_flight_data_to_message(raw_data: Vec<Vec<u8>>) -> Result<Vec<FlightMessage>> {
let flight_data = raw_data
.into_iter()
.map(|x| FlightData::decode(x.as_slice()).context(DecodeFlightDataSnafu))
.collect::<Result<Vec<FlightData>>>()?;
let decoder = &mut FlightDecoder::default();
flight_data
.into_iter()
.map(|x| decoder.try_decode(x))
.collect()
}
pub fn flight_messages_to_recordbatches(messages: Vec<FlightMessage>) -> Result<RecordBatches> {
if messages.is_empty() {
Ok(RecordBatches::empty())

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{Debug, Formatter};
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
pub mod columnar_value;
@@ -29,4 +31,16 @@ pub enum Output {
Stream(SendableRecordBatchStream),
}
impl Debug for Output {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Output::AffectedRows(rows) => write!(f, "Output::AffectedRows({rows})"),
Output::RecordBatches(recordbatches) => {
write!(f, "Output::RecordBatches({recordbatches:?})")
}
Output::Stream(_) => write!(f, "Output::Stream(<stream>)"),
}
}
}
pub use datafusion::physical_plan::ExecutionPlan as DfPhysicalPlan;