mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-26 18:00:41 +00:00
feat: Adds RegionScanner trait (#3948)
* feat: define region scanner * feat: single partition scanner * feat: use single partition scanner * feat: implement ExecutionPlan wip * feat: mito engine returns single partition scanner * feat: implement DisplayAs for region server * feat: dummy table provider use handle_partitioned_query() * test: update sqlness test * feat: table provider use ReadFromRegion * refactor: remove StreamScanAdapter * chore: update lock * style: fix clippy * refactor: remove handle_query from the RegionEngine trait * chore: address CR comments * refactor: rename methods * refactor: rename ReadFromRegion to RegionScanExec
This commit is contained in:
@@ -17,6 +17,7 @@ common-macro.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-wal.workspace = true
|
||||
datafusion-physical-plan.workspace = true
|
||||
datatypes.workspace = true
|
||||
derive_builder.workspace = true
|
||||
futures.workspace = true
|
||||
|
||||
@@ -15,15 +15,19 @@
|
||||
//! Region Engine's definition
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use api::greptime_proto::v1::meta::{GrantedRegion as PbGrantedRegion, RegionRole as PbRegionRole};
|
||||
use api::region::RegionResponse;
|
||||
use async_trait::async_trait;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::error::ExecuteRepeatedlySnafu;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use datafusion_physical_plan::{DisplayAs, DisplayFormatType};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::logstore::entry;
|
||||
use crate::metadata::RegionMetadataRef;
|
||||
@@ -120,6 +124,57 @@ impl From<PbRegionRole> for RegionRole {
|
||||
}
|
||||
}
|
||||
|
||||
/// Output partition properties of the [RegionScanner].
|
||||
#[derive(Debug)]
|
||||
pub enum ScannerPartitioning {
|
||||
/// Unknown partitioning scheme with a known number of partitions
|
||||
Unknown(usize),
|
||||
}
|
||||
|
||||
impl ScannerPartitioning {
|
||||
/// Returns the number of partitions.
|
||||
pub fn num_partitions(&self) -> usize {
|
||||
match self {
|
||||
ScannerPartitioning::Unknown(num_partitions) => *num_partitions,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Properties of the [RegionScanner].
|
||||
#[derive(Debug)]
|
||||
pub struct ScannerProperties {
|
||||
/// Partitions to scan.
|
||||
partitioning: ScannerPartitioning,
|
||||
}
|
||||
|
||||
impl ScannerProperties {
|
||||
/// Creates a new [ScannerProperties] with the given partitioning.
|
||||
pub fn new(partitioning: ScannerPartitioning) -> Self {
|
||||
Self { partitioning }
|
||||
}
|
||||
|
||||
/// Returns properties of partitions to scan.
|
||||
pub fn partitioning(&self) -> &ScannerPartitioning {
|
||||
&self.partitioning
|
||||
}
|
||||
}
|
||||
|
||||
/// A scanner that provides a way to scan the region concurrently.
|
||||
/// The scanner splits the region into partitions so that each partition can be scanned concurrently.
|
||||
/// You can use this trait to implement an [ExecutionPlan](datafusion_physical_plan::ExecutionPlan).
|
||||
pub trait RegionScanner: Debug + DisplayAs + Send + Sync {
|
||||
/// Returns the properties of the scanner.
|
||||
fn properties(&self) -> &ScannerProperties;
|
||||
|
||||
/// Returns the schema of the record batches.
|
||||
fn schema(&self) -> SchemaRef;
|
||||
|
||||
/// Scans the partition and returns a stream of record batches.
|
||||
fn scan_partition(&self, partition: usize) -> Result<SendableRecordBatchStream, BoxedError>;
|
||||
}
|
||||
|
||||
pub type RegionScannerRef = Arc<dyn RegionScanner>;
|
||||
|
||||
#[async_trait]
|
||||
pub trait RegionEngine: Send + Sync {
|
||||
/// Name of this engine
|
||||
@@ -132,12 +187,12 @@ pub trait RegionEngine: Send + Sync {
|
||||
request: RegionRequest,
|
||||
) -> Result<RegionResponse, BoxedError>;
|
||||
|
||||
/// Handles substrait query and return a stream of record batches
|
||||
/// Handles query and return a scanner that can be used to scan the region concurrently.
|
||||
async fn handle_query(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: ScanRequest,
|
||||
) -> Result<SendableRecordBatchStream, BoxedError>;
|
||||
) -> Result<RegionScannerRef, BoxedError>;
|
||||
|
||||
/// Retrieves region's metadata.
|
||||
async fn get_metadata(&self, region_id: RegionId) -> Result<RegionMetadataRef, BoxedError>;
|
||||
@@ -172,3 +227,52 @@ pub trait RegionEngine: Send + Sync {
|
||||
}
|
||||
|
||||
pub type RegionEngineRef = Arc<dyn RegionEngine>;
|
||||
|
||||
/// A [RegionScanner] that only scans a single partition.
|
||||
pub struct SinglePartitionScanner {
|
||||
stream: Mutex<Option<SendableRecordBatchStream>>,
|
||||
schema: SchemaRef,
|
||||
properties: ScannerProperties,
|
||||
}
|
||||
|
||||
impl SinglePartitionScanner {
|
||||
/// Creates a new [SinglePartitionScanner] with the given stream.
|
||||
pub fn new(stream: SendableRecordBatchStream) -> Self {
|
||||
let schema = stream.schema();
|
||||
Self {
|
||||
stream: Mutex::new(Some(stream)),
|
||||
schema,
|
||||
properties: ScannerProperties::new(ScannerPartitioning::Unknown(1)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for SinglePartitionScanner {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "SinglePartitionScanner: <SendableRecordBatchStream>")
|
||||
}
|
||||
}
|
||||
|
||||
impl RegionScanner for SinglePartitionScanner {
|
||||
fn properties(&self) -> &ScannerProperties {
|
||||
&self.properties
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn scan_partition(&self, _partition: usize) -> Result<SendableRecordBatchStream, BoxedError> {
|
||||
let mut stream = self.stream.lock().unwrap();
|
||||
stream
|
||||
.take()
|
||||
.context(ExecuteRepeatedlySnafu)
|
||||
.map_err(BoxedError::new)
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for SinglePartitionScanner {
|
||||
fn fmt_as(&self, _t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user