feat: Adds RegionScanner trait (#3948)

* feat: define region scanner

* feat: single partition scanner

* feat: use single partition scanner

* feat: implement ExecutionPlan wip

* feat: mito engine returns single partition scanner

* feat: implement DisplayAs for region server

* feat: dummy table provider use handle_partitioned_query()

* test: update sqlness test

* feat: table provider use ReadFromRegion

* refactor: remove StreamScanAdapter

* chore: update lock

* style: fix clippy

* refactor: remove handle_query from the RegionEngine trait

* chore: address CR comments

* refactor: rename methods

* refactor: rename ReadFromRegion to RegionScanExec
This commit is contained in:
Yingwen
2024-05-20 19:52:00 +08:00
committed by GitHub
parent 19543f9819
commit 179c8c716c
32 changed files with 371 additions and 209 deletions

View File

@@ -17,6 +17,7 @@ common-macro.workspace = true
common-query.workspace = true
common-recordbatch.workspace = true
common-wal.workspace = true
datafusion-physical-plan.workspace = true
datatypes.workspace = true
derive_builder.workspace = true
futures.workspace = true

View File

@@ -15,15 +15,19 @@
//! Region Engine's definition
use std::any::Any;
use std::fmt::Display;
use std::sync::Arc;
use std::fmt::{Debug, Display};
use std::sync::{Arc, Mutex};
use api::greptime_proto::v1::meta::{GrantedRegion as PbGrantedRegion, RegionRole as PbRegionRole};
use api::region::RegionResponse;
use async_trait::async_trait;
use common_error::ext::BoxedError;
use common_query::error::ExecuteRepeatedlySnafu;
use common_recordbatch::SendableRecordBatchStream;
use datafusion_physical_plan::{DisplayAs, DisplayFormatType};
use datatypes::schema::SchemaRef;
use serde::{Deserialize, Serialize};
use snafu::OptionExt;
use crate::logstore::entry;
use crate::metadata::RegionMetadataRef;
@@ -120,6 +124,57 @@ impl From<PbRegionRole> for RegionRole {
}
}
/// Output partition properties of the [RegionScanner].
#[derive(Debug)]
pub enum ScannerPartitioning {
/// Unknown partitioning scheme with a known number of partitions
Unknown(usize),
}
impl ScannerPartitioning {
/// Returns the number of partitions.
pub fn num_partitions(&self) -> usize {
match self {
ScannerPartitioning::Unknown(num_partitions) => *num_partitions,
}
}
}
/// Properties of the [RegionScanner].
#[derive(Debug)]
pub struct ScannerProperties {
/// Partitions to scan.
partitioning: ScannerPartitioning,
}
impl ScannerProperties {
/// Creates a new [ScannerProperties] with the given partitioning.
pub fn new(partitioning: ScannerPartitioning) -> Self {
Self { partitioning }
}
/// Returns properties of partitions to scan.
pub fn partitioning(&self) -> &ScannerPartitioning {
&self.partitioning
}
}
/// A scanner that provides a way to scan the region concurrently.
/// The scanner splits the region into partitions so that each partition can be scanned concurrently.
/// You can use this trait to implement an [ExecutionPlan](datafusion_physical_plan::ExecutionPlan).
pub trait RegionScanner: Debug + DisplayAs + Send + Sync {
/// Returns the properties of the scanner.
fn properties(&self) -> &ScannerProperties;
/// Returns the schema of the record batches.
fn schema(&self) -> SchemaRef;
/// Scans the partition and returns a stream of record batches.
fn scan_partition(&self, partition: usize) -> Result<SendableRecordBatchStream, BoxedError>;
}
pub type RegionScannerRef = Arc<dyn RegionScanner>;
#[async_trait]
pub trait RegionEngine: Send + Sync {
/// Name of this engine
@@ -132,12 +187,12 @@ pub trait RegionEngine: Send + Sync {
request: RegionRequest,
) -> Result<RegionResponse, BoxedError>;
/// Handles substrait query and return a stream of record batches
/// Handles query and return a scanner that can be used to scan the region concurrently.
async fn handle_query(
&self,
region_id: RegionId,
request: ScanRequest,
) -> Result<SendableRecordBatchStream, BoxedError>;
) -> Result<RegionScannerRef, BoxedError>;
/// Retrieves region's metadata.
async fn get_metadata(&self, region_id: RegionId) -> Result<RegionMetadataRef, BoxedError>;
@@ -172,3 +227,52 @@ pub trait RegionEngine: Send + Sync {
}
pub type RegionEngineRef = Arc<dyn RegionEngine>;
/// A [RegionScanner] that only scans a single partition.
pub struct SinglePartitionScanner {
stream: Mutex<Option<SendableRecordBatchStream>>,
schema: SchemaRef,
properties: ScannerProperties,
}
impl SinglePartitionScanner {
/// Creates a new [SinglePartitionScanner] with the given stream.
pub fn new(stream: SendableRecordBatchStream) -> Self {
let schema = stream.schema();
Self {
stream: Mutex::new(Some(stream)),
schema,
properties: ScannerProperties::new(ScannerPartitioning::Unknown(1)),
}
}
}
impl Debug for SinglePartitionScanner {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "SinglePartitionScanner: <SendableRecordBatchStream>")
}
}
impl RegionScanner for SinglePartitionScanner {
fn properties(&self) -> &ScannerProperties {
&self.properties
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn scan_partition(&self, _partition: usize) -> Result<SendableRecordBatchStream, BoxedError> {
let mut stream = self.stream.lock().unwrap();
stream
.take()
.context(ExecuteRepeatedlySnafu)
.map_err(BoxedError::new)
}
}
impl DisplayAs for SinglePartitionScanner {
fn fmt_as(&self, _t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{:?}", self)
}
}