Skip to main content

query/
dummy_catalog.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Dummy catalog for region server.
16
17use std::any::Any;
18use std::collections::HashMap;
19use std::fmt;
20use std::sync::{Arc, Mutex};
21
22use api::v1::SemanticType;
23use async_trait::async_trait;
24use catalog::error::Result as CatalogResult;
25use catalog::{CatalogManager, CatalogManagerRef};
26use common_recordbatch::OrderOption;
27use common_recordbatch::filter::SimpleFilterEvaluator;
28use datafusion::catalog::{CatalogProvider, CatalogProviderList, SchemaProvider, Session};
29use datafusion::datasource::TableProvider;
30use datafusion::physical_plan::ExecutionPlan;
31use datafusion_common::DataFusionError;
32use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
33use datatypes::arrow::datatypes::SchemaRef;
34use datatypes::types::json_type::JsonNativeType;
35use futures::stream::BoxStream;
36use session::context::{QueryContext, QueryContextRef};
37use snafu::ResultExt;
38use store_api::metadata::RegionMetadataRef;
39use store_api::region_engine::RegionEngineRef;
40use store_api::storage::{
41    RegionId, ScanRequest, TimeSeriesDistribution, TimeSeriesRowSelector, VectorSearchRequest,
42};
43use table::TableRef;
44use table::metadata::{TableId, TableInfoRef};
45use table::table::scan::RegionScanExec;
46
47use crate::error::{GetRegionMetadataSnafu, Result};
48use crate::options::FlowQueryExtensions;
49
50/// Resolve to the given region (specified by [RegionId]) unconditionally.
51#[derive(Clone, Debug)]
52pub struct DummyCatalogList {
53    catalog: DummyCatalogProvider,
54}
55
56impl DummyCatalogList {
57    /// Creates a new catalog list with the given table provider.
58    pub fn with_table_provider(table_provider: Arc<dyn TableProvider>) -> Self {
59        let schema_provider = DummySchemaProvider {
60            table: table_provider,
61        };
62        let catalog_provider = DummyCatalogProvider {
63            schema: schema_provider,
64        };
65        Self {
66            catalog: catalog_provider,
67        }
68    }
69}
70
71impl CatalogProviderList for DummyCatalogList {
72    fn as_any(&self) -> &dyn Any {
73        self
74    }
75
76    fn register_catalog(
77        &self,
78        _name: String,
79        _catalog: Arc<dyn CatalogProvider>,
80    ) -> Option<Arc<dyn CatalogProvider>> {
81        None
82    }
83
84    fn catalog_names(&self) -> Vec<String> {
85        vec![]
86    }
87
88    fn catalog(&self, _name: &str) -> Option<Arc<dyn CatalogProvider>> {
89        Some(Arc::new(self.catalog.clone()))
90    }
91}
92
93/// A dummy catalog provider for [DummyCatalogList].
94#[derive(Clone, Debug)]
95struct DummyCatalogProvider {
96    schema: DummySchemaProvider,
97}
98
99impl CatalogProvider for DummyCatalogProvider {
100    fn as_any(&self) -> &dyn Any {
101        self
102    }
103
104    fn schema_names(&self) -> Vec<String> {
105        vec![]
106    }
107
108    fn schema(&self, _name: &str) -> Option<Arc<dyn SchemaProvider>> {
109        Some(Arc::new(self.schema.clone()))
110    }
111}
112
113/// A dummy schema provider for [DummyCatalogList].
114#[derive(Clone, Debug)]
115struct DummySchemaProvider {
116    table: Arc<dyn TableProvider>,
117}
118
119#[async_trait]
120impl SchemaProvider for DummySchemaProvider {
121    fn as_any(&self) -> &dyn Any {
122        self
123    }
124
125    fn table_names(&self) -> Vec<String> {
126        vec![]
127    }
128
129    async fn table(
130        &self,
131        _name: &str,
132    ) -> datafusion::error::Result<Option<Arc<dyn TableProvider>>> {
133        Ok(Some(self.table.clone()))
134    }
135
136    fn table_exist(&self, _name: &str) -> bool {
137        true
138    }
139}
140
141/// For [TableProvider] and [DummyCatalogList]
142#[derive(Clone)]
143pub struct DummyTableProvider {
144    region_id: RegionId,
145    engine: RegionEngineRef,
146    metadata: RegionMetadataRef,
147    /// Keeping a mutable request makes it possible to change in the optimize phase.
148    scan_request: Arc<Mutex<ScanRequest>>,
149    query_ctx: Option<QueryContextRef>,
150}
151
152impl fmt::Debug for DummyTableProvider {
153    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
154        f.debug_struct("DummyTableProvider")
155            .field("region_id", &self.region_id)
156            .field("metadata", &self.metadata)
157            .field("scan_request", &self.scan_request)
158            .finish()
159    }
160}
161
162#[async_trait]
163impl TableProvider for DummyTableProvider {
164    fn as_any(&self) -> &dyn Any {
165        self
166    }
167
168    fn schema(&self) -> SchemaRef {
169        self.metadata.schema.arrow_schema().clone()
170    }
171
172    fn table_type(&self) -> TableType {
173        TableType::Base
174    }
175
176    async fn scan(
177        &self,
178        _state: &dyn Session,
179        projection: Option<&Vec<usize>>,
180        filters: &[Expr],
181        limit: Option<usize>,
182    ) -> datafusion::error::Result<Arc<dyn ExecutionPlan>> {
183        let mut request = self.scan_request.lock().unwrap().clone();
184        request.projection_input = projection.map(|p| p.clone().into());
185        request.filters = filters.to_vec();
186        request.limit = limit;
187
188        if let Some(query_ctx) = &self.query_ctx {
189            let is_sink_scan = is_sink_scan(query_ctx, self.region_id)
190                .map_err(|e| DataFusionError::External(Box::new(e)))?;
191            apply_cached_snapshot_to_request(query_ctx, self.region_id, is_sink_scan, &mut request);
192        }
193
194        let scanner = self
195            .engine
196            .handle_query(self.region_id, request.clone())
197            .await
198            .map_err(|e| DataFusionError::External(Box::new(e)))?;
199
200        if request.snapshot_on_scan
201            && let Some(query_ctx) = &self.query_ctx
202            && let Some(snapshot_sequence) = scanner.snapshot_sequence()
203        {
204            bind_snapshot_bound_region_seq(query_ctx, self.region_id, snapshot_sequence)
205                .map_err(|e| DataFusionError::External(Box::new(e)))?;
206        }
207
208        let query_memory_tracker = self.engine.query_memory_tracker();
209        let mut scan_exec = RegionScanExec::new(scanner, request, query_memory_tracker)?;
210        if let Some(query_ctx) = &self.query_ctx {
211            scan_exec.set_explain_verbose(query_ctx.explain_verbose());
212        }
213        Ok(Arc::new(scan_exec))
214    }
215
216    fn supports_filters_pushdown(
217        &self,
218        filters: &[&Expr],
219    ) -> datafusion::error::Result<Vec<TableProviderFilterPushDown>> {
220        let supported = filters
221            .iter()
222            .map(|e| {
223                // Simple filter on primary key columns are precisely evaluated.
224                if let Some(simple_filter) = SimpleFilterEvaluator::try_new(e) {
225                    if self
226                        .metadata
227                        .column_by_name(simple_filter.column_name())
228                        .and_then(|c| {
229                            (c.semantic_type == SemanticType::Tag
230                                || c.semantic_type == SemanticType::Timestamp)
231                                .then_some(())
232                        })
233                        .is_some()
234                    {
235                        TableProviderFilterPushDown::Exact
236                    } else {
237                        TableProviderFilterPushDown::Inexact
238                    }
239                } else {
240                    TableProviderFilterPushDown::Inexact
241                }
242            })
243            .collect();
244        Ok(supported)
245    }
246}
247
248impl DummyTableProvider {
249    /// Creates a new provider.
250    pub fn new(region_id: RegionId, engine: RegionEngineRef, metadata: RegionMetadataRef) -> Self {
251        Self {
252            region_id,
253            engine,
254            metadata,
255            scan_request: Default::default(),
256            query_ctx: None,
257        }
258    }
259
260    pub fn region_metadata(&self) -> RegionMetadataRef {
261        self.metadata.clone()
262    }
263
264    /// Sets the ordering hint of the query to the provider.
265    pub fn with_ordering_hint(&self, order_opts: &[OrderOption]) {
266        self.scan_request.lock().unwrap().output_ordering = Some(order_opts.to_vec());
267    }
268
269    /// Sets the distribution hint of the query to the provider.
270    pub fn with_distribution(&self, distribution: TimeSeriesDistribution) {
271        self.scan_request.lock().unwrap().distribution = Some(distribution);
272    }
273
274    /// Sets the time series selector hint of the query to the provider.
275    pub fn with_time_series_selector_hint(&self, selector: TimeSeriesRowSelector) {
276        self.scan_request.lock().unwrap().series_row_selector = Some(selector);
277    }
278
279    pub fn with_vector_search_hint(&self, hint: VectorSearchRequest) {
280        self.scan_request.lock().unwrap().vector_search = Some(hint);
281    }
282
283    pub fn get_vector_search_hint(&self) -> Option<VectorSearchRequest> {
284        self.scan_request.lock().unwrap().vector_search.clone()
285    }
286
287    pub fn with_sequence(&self, sequence: u64) {
288        self.scan_request.lock().unwrap().memtable_max_sequence = Some(sequence);
289    }
290
291    pub(crate) fn with_json_type_hint(&self, hint: HashMap<String, JsonNativeType>) {
292        self.scan_request.lock().unwrap().json_type_hint = hint;
293    }
294
295    /// Gets the scan request of the provider.
296    #[cfg(test)]
297    pub fn scan_request(&self) -> ScanRequest {
298        self.scan_request.lock().unwrap().clone()
299    }
300}
301
302pub struct DummyTableProviderFactory;
303
304impl DummyTableProviderFactory {
305    pub async fn create_table_provider(
306        &self,
307        region_id: RegionId,
308        engine: RegionEngineRef,
309        query_ctx: Option<QueryContextRef>,
310    ) -> Result<DummyTableProvider> {
311        let metadata =
312            engine
313                .get_metadata(region_id)
314                .await
315                .with_context(|_| GetRegionMetadataSnafu {
316                    engine: engine.name(),
317                    region_id,
318                })?;
319
320        let scan_request = if let Some(ctx) = query_ctx.as_ref() {
321            scan_request_from_query_context(region_id, ctx)?
322        } else {
323            ScanRequest::default()
324        };
325
326        Ok(DummyTableProvider {
327            region_id,
328            engine,
329            metadata,
330            scan_request: Arc::new(Mutex::new(scan_request)),
331            query_ctx,
332        })
333    }
334}
335
336fn scan_request_from_query_context(
337    region_id: RegionId,
338    query_ctx: &QueryContext,
339) -> Result<ScanRequest> {
340    let decision = decide_flow_scan(query_ctx, region_id)?;
341    Ok(build_scan_request(query_ctx, region_id, &decision))
342}
343
344#[derive(Debug, Clone, PartialEq, Eq)]
345struct FlowScanDecision {
346    /// Whether this region is the flow sink-table scan.
347    /// Sink scans intentionally bypass incremental and snapshot-binding semantics.
348    is_sink_scan: bool,
349    /// Whether this scan should bind a memtable upper bound when opening the scan.
350    /// This is only the initial intent; if a cached bound already exists in `query_ctx`,
351    /// we reuse that cached bound instead and clear this flag.
352    snapshot_on_scan: bool,
353    /// Optional lower exclusive memtable sequence bound for incremental reads.
354    /// When set, only rows with sequence strictly greater than this bound are read from memtables.
355    memtable_min_sequence: Option<u64>,
356    /// Optional cached per-region snapshot already bound in `query_ctx`.
357    /// When present, this becomes the effective memtable upper bound and suppresses
358    /// binding a new snapshot on scan open.
359    memtable_max_sequence: Option<u64>,
360}
361
362impl FlowScanDecision {
363    fn plain_scan() -> Self {
364        Self {
365            is_sink_scan: true,
366            snapshot_on_scan: false,
367            memtable_min_sequence: None,
368            memtable_max_sequence: None,
369        }
370    }
371}
372
373fn decide_flow_scan(query_ctx: &QueryContext, region_id: RegionId) -> Result<FlowScanDecision> {
374    let Some(flow_extensions) =
375        FlowQueryExtensions::parse_flow_extensions(&query_ctx.extensions())?
376    else {
377        return Ok(FlowScanDecision {
378            is_sink_scan: false,
379            snapshot_on_scan: false,
380            memtable_min_sequence: None,
381            memtable_max_sequence: query_ctx.get_snapshot(region_id.as_u64()),
382        });
383    };
384
385    // Sink-table scans intentionally bypass all flow scan semantics. They should
386    // behave like plain reads and must not participate in incremental lower bounds
387    // or per-region snapshot binding/reuse.
388    if flow_extensions.sink_table_id == Some(region_id.table_id()) {
389        return Ok(FlowScanDecision::plain_scan());
390    }
391
392    let apply_incremental = flow_extensions.validate_for_scan(region_id)?;
393
394    let memtable_min_sequence = if apply_incremental {
395        flow_extensions
396            .incremental_after_seqs
397            .as_ref()
398            .and_then(|seqs| seqs.get(&region_id.as_u64()))
399            .copied()
400    } else {
401        None
402    };
403
404    let memtable_max_sequence = query_ctx.get_snapshot(region_id.as_u64());
405
406    Ok(FlowScanDecision {
407        is_sink_scan: false,
408        snapshot_on_scan: memtable_max_sequence.is_none()
409            && flow_extensions.should_collect_region_watermark(),
410        memtable_min_sequence,
411        memtable_max_sequence,
412    })
413}
414
415fn build_scan_request(
416    query_ctx: &QueryContext,
417    region_id: RegionId,
418    decision: &FlowScanDecision,
419) -> ScanRequest {
420    // Build the initial scan request from the final decision known at provider creation
421    // time. A later scan may still refresh `memtable_max_sequence` if another source scan
422    // has bound a snapshot into `query_ctx` after this provider was created.
423    ScanRequest {
424        sst_min_sequence: (!decision.is_sink_scan)
425            .then(|| query_ctx.sst_min_sequence(region_id.as_u64()))
426            .flatten(),
427        snapshot_on_scan: decision.snapshot_on_scan,
428        memtable_min_sequence: decision.memtable_min_sequence,
429        memtable_max_sequence: decision.memtable_max_sequence,
430        ..Default::default()
431    }
432}
433
434fn is_sink_scan(query_ctx: &QueryContext, region_id: RegionId) -> Result<bool> {
435    Ok(
436        FlowQueryExtensions::parse_flow_extensions(&query_ctx.extensions())?
437            .is_some_and(|exts| exts.sink_table_id == Some(region_id.table_id())),
438    )
439}
440
441fn apply_cached_snapshot_to_request(
442    query_ctx: &QueryContext,
443    region_id: RegionId,
444    is_sink_scan: bool,
445    scan_request: &mut ScanRequest,
446) {
447    if is_sink_scan {
448        return;
449    }
450
451    if let Some(snapshot_sequence) = query_ctx.get_snapshot(region_id.as_u64()) {
452        // Reuse the previously bound per-region snapshot instead of rebinding a new
453        // upper bound on scan open. This refresh is still needed at scan time because
454        // the provider's cached request may have been built before another source scan
455        // bound the shared query-level snapshot into `query_ctx`.
456        scan_request.memtable_max_sequence = Some(snapshot_sequence);
457        scan_request.snapshot_on_scan = false;
458    }
459}
460
461fn bind_snapshot_bound_region_seq(
462    query_ctx: &QueryContext,
463    region_id: RegionId,
464    snapshot_sequence: u64,
465) -> Result<u64> {
466    if let Some(existing) = query_ctx.get_snapshot(region_id.as_u64()) {
467        if existing != snapshot_sequence {
468            return crate::error::ConflictingSnapshotSequenceSnafu {
469                region_id,
470                existing,
471                new: snapshot_sequence,
472            }
473            .fail();
474        }
475        Ok(existing)
476    } else {
477        query_ctx.set_snapshot(region_id.as_u64(), snapshot_sequence);
478        Ok(snapshot_sequence)
479    }
480}
481
482#[async_trait]
483impl TableProviderFactory for DummyTableProviderFactory {
484    async fn create(
485        &self,
486        region_id: RegionId,
487        engine: RegionEngineRef,
488        ctx: Option<QueryContextRef>,
489    ) -> Result<Arc<dyn TableProvider>> {
490        let provider = self.create_table_provider(region_id, engine, ctx).await?;
491        Ok(Arc::new(provider))
492    }
493}
494
495#[async_trait]
496pub trait TableProviderFactory: Send + Sync {
497    async fn create(
498        &self,
499        region_id: RegionId,
500        engine: RegionEngineRef,
501        ctx: Option<QueryContextRef>,
502    ) -> Result<Arc<dyn TableProvider>>;
503}
504
505pub type TableProviderFactoryRef = Arc<dyn TableProviderFactory>;
506
507/// A dummy catalog manager that always returns empty results.
508///
509/// Used to fill the arg of `QueryEngineFactory::new_with_plugins` in datanode.
510pub struct DummyCatalogManager;
511
512impl DummyCatalogManager {
513    /// Returns a new `CatalogManagerRef` instance.
514    pub fn arc() -> CatalogManagerRef {
515        Arc::new(Self)
516    }
517}
518
519#[async_trait::async_trait]
520impl CatalogManager for DummyCatalogManager {
521    fn as_any(&self) -> &dyn Any {
522        self
523    }
524
525    async fn catalog_names(&self) -> CatalogResult<Vec<String>> {
526        Ok(vec![])
527    }
528
529    async fn schema_names(
530        &self,
531        _catalog: &str,
532        _query_ctx: Option<&QueryContext>,
533    ) -> CatalogResult<Vec<String>> {
534        Ok(vec![])
535    }
536
537    async fn table_names(
538        &self,
539        _catalog: &str,
540        _schema: &str,
541        _query_ctx: Option<&QueryContext>,
542    ) -> CatalogResult<Vec<String>> {
543        Ok(vec![])
544    }
545
546    async fn catalog_exists(&self, _catalog: &str) -> CatalogResult<bool> {
547        Ok(false)
548    }
549
550    async fn schema_exists(
551        &self,
552        _catalog: &str,
553        _schema: &str,
554        _query_ctx: Option<&QueryContext>,
555    ) -> CatalogResult<bool> {
556        Ok(false)
557    }
558
559    async fn table_exists(
560        &self,
561        _catalog: &str,
562        _schema: &str,
563        _table: &str,
564        _query_ctx: Option<&QueryContext>,
565    ) -> CatalogResult<bool> {
566        Ok(false)
567    }
568
569    async fn table(
570        &self,
571        _catalog: &str,
572        _schema: &str,
573        _table_name: &str,
574        _query_ctx: Option<&QueryContext>,
575    ) -> CatalogResult<Option<TableRef>> {
576        Ok(None)
577    }
578
579    async fn table_id(
580        &self,
581        _catalog: &str,
582        _schema: &str,
583        _table_name: &str,
584        _query_ctx: Option<&QueryContext>,
585    ) -> CatalogResult<Option<TableId>> {
586        Ok(None)
587    }
588
589    async fn table_info_by_id(&self, _table_id: TableId) -> CatalogResult<Option<TableInfoRef>> {
590        Ok(None)
591    }
592
593    async fn tables_by_ids(
594        &self,
595        _catalog: &str,
596        _schema: &str,
597        _table_ids: &[TableId],
598    ) -> CatalogResult<Vec<TableRef>> {
599        Ok(vec![])
600    }
601
602    fn tables<'a>(
603        &'a self,
604        _catalog: &'a str,
605        _schema: &'a str,
606        _query_ctx: Option<&'a QueryContext>,
607    ) -> BoxStream<'a, CatalogResult<TableRef>> {
608        Box::pin(futures::stream::empty())
609    }
610}
611
612#[cfg(test)]
613mod tests {
614    use std::collections::HashMap;
615    use std::sync::{Arc, RwLock};
616
617    use common_error::ext::ErrorExt;
618    use common_error::status_code::StatusCode;
619    use session::context::QueryContextBuilder;
620
621    use super::*;
622    use crate::error::Error;
623    use crate::options::{FLOW_INCREMENTAL_AFTER_SEQS, FLOW_INCREMENTAL_MODE, FLOW_SINK_TABLE_ID};
624
625    fn test_region_id() -> RegionId {
626        RegionId::new(1024, 1)
627    }
628
629    #[test]
630    fn test_scan_request_from_query_context_uses_snapshot_bound_intent() {
631        let region_id = test_region_id();
632        let query_ctx = QueryContextBuilder::default()
633            .extensions(HashMap::from([(
634                "flow.return_region_seq".to_string(),
635                "true".to_string(),
636            )]))
637            .snapshot_seqs(Arc::new(RwLock::new(HashMap::from([(
638                region_id.as_u64(),
639                42_u64,
640            )]))))
641            .sst_min_sequences(Arc::new(RwLock::new(HashMap::from([(
642                region_id.as_u64(),
643                7_u64,
644            )]))))
645            .build();
646
647        let request = scan_request_from_query_context(region_id, &query_ctx).unwrap();
648
649        assert!(!request.snapshot_on_scan);
650        assert_eq!(request.memtable_max_sequence, Some(42));
651        assert_eq!(request.sst_min_sequence, Some(7));
652    }
653
654    #[test]
655    fn test_scan_request_from_incremental_context_uses_snapshot_bound_intent() {
656        let region_id = test_region_id();
657        let query_ctx = QueryContextBuilder::default()
658            .extensions(HashMap::from([(
659                "flow.incremental_after_seqs".to_string(),
660                format!(r#"{{"{}":10}}"#, region_id.as_u64()),
661            )]))
662            .build();
663
664        let request = scan_request_from_query_context(region_id, &query_ctx).unwrap();
665
666        assert!(request.snapshot_on_scan);
667        assert_eq!(request.memtable_min_sequence, Some(10));
668        assert_eq!(request.memtable_max_sequence, None);
669    }
670
671    #[test]
672    fn test_scan_request_from_query_context_keeps_snapshot_fields() {
673        let region_id = test_region_id();
674        let query_ctx = QueryContextBuilder::default()
675            .snapshot_seqs(Arc::new(RwLock::new(HashMap::from([(
676                region_id.as_u64(),
677                100,
678            )]))))
679            .sst_min_sequences(Arc::new(RwLock::new(HashMap::from([(
680                region_id.as_u64(),
681                90,
682            )]))))
683            .build();
684
685        let request = scan_request_from_query_context(region_id, &query_ctx).unwrap();
686        assert_eq!(request.memtable_max_sequence, Some(100));
687        assert_eq!(request.sst_min_sequence, Some(90));
688        assert_eq!(request.memtable_min_sequence, None);
689        assert!(!request.snapshot_on_scan);
690    }
691
692    #[test]
693    fn test_scan_request_from_query_context_reuses_existing_snapshot_for_incremental_scan() {
694        let region_id = test_region_id();
695        let query_ctx = QueryContextBuilder::default()
696            .extensions(HashMap::from([(
697                FLOW_INCREMENTAL_AFTER_SEQS.to_string(),
698                format!(r#"{{"{}":10}}"#, region_id.as_u64()),
699            )]))
700            .snapshot_seqs(Arc::new(RwLock::new(HashMap::from([(
701                region_id.as_u64(),
702                42_u64,
703            )]))))
704            .build();
705
706        let request = scan_request_from_query_context(region_id, &query_ctx).unwrap();
707
708        assert_eq!(request.memtable_min_sequence, Some(10));
709        assert_eq!(request.memtable_max_sequence, Some(42));
710        assert!(!request.snapshot_on_scan);
711    }
712
713    #[test]
714    fn test_apply_cached_snapshot_to_request_updates_cached_scan_request() {
715        let region_id = test_region_id();
716        let query_ctx = QueryContextBuilder::default()
717            .snapshot_seqs(Arc::new(RwLock::new(HashMap::from([(
718                region_id.as_u64(),
719                88_u64,
720            )]))))
721            .build();
722        let mut request = ScanRequest {
723            snapshot_on_scan: true,
724            ..Default::default()
725        };
726
727        apply_cached_snapshot_to_request(&query_ctx, region_id, false, &mut request);
728
729        assert_eq!(request.memtable_max_sequence, Some(88));
730        assert!(!request.snapshot_on_scan);
731    }
732
733    #[test]
734    fn test_apply_cached_snapshot_to_request_skips_sink_scan() {
735        let region_id = test_region_id();
736        let query_ctx = QueryContextBuilder::default()
737            .snapshot_seqs(Arc::new(RwLock::new(HashMap::from([(
738                region_id.as_u64(),
739                88_u64,
740            )]))))
741            .build();
742        let mut request = ScanRequest {
743            snapshot_on_scan: true,
744            ..Default::default()
745        };
746
747        apply_cached_snapshot_to_request(&query_ctx, region_id, true, &mut request);
748
749        assert_eq!(request.memtable_max_sequence, None);
750        assert!(request.snapshot_on_scan);
751    }
752
753    #[test]
754    fn test_bind_snapshot_bound_region_seq_reuses_existing_snapshot() {
755        let region_id = test_region_id();
756        let query_ctx = QueryContextBuilder::default()
757            .snapshot_seqs(Arc::new(RwLock::new(HashMap::from([(
758                region_id.as_u64(),
759                42_u64,
760            )]))))
761            .build();
762
763        let err = bind_snapshot_bound_region_seq(&query_ctx, region_id, 99).unwrap_err();
764
765        assert!(matches!(err, Error::ConflictingSnapshotSequence { .. }));
766        assert_eq!(query_ctx.get_snapshot(region_id.as_u64()), Some(42));
767    }
768
769    #[test]
770    fn test_bind_snapshot_bound_region_seq_sets_snapshot_once() {
771        let region_id = test_region_id();
772        let query_ctx = QueryContextBuilder::default().build();
773
774        let seq = bind_snapshot_bound_region_seq(&query_ctx, region_id, 99).unwrap();
775
776        assert_eq!(seq, 99);
777        assert_eq!(query_ctx.get_snapshot(region_id.as_u64()), Some(99));
778    }
779
780    #[test]
781    fn test_scan_request_from_query_context_applies_incremental_after_seq_for_source_scan() {
782        let region_id = test_region_id();
783        let query_ctx = QueryContextBuilder::default()
784            .extensions(HashMap::from([
785                (
786                    FLOW_INCREMENTAL_MODE.to_string(),
787                    "memtable_only".to_string(),
788                ),
789                (
790                    FLOW_INCREMENTAL_AFTER_SEQS.to_string(),
791                    format!(r#"{{"{}":55}}"#, region_id.as_u64()),
792                ),
793            ]))
794            .build();
795
796        let request = scan_request_from_query_context(region_id, &query_ctx).unwrap();
797        assert_eq!(request.memtable_min_sequence, Some(55));
798    }
799
800    #[test]
801    fn test_scan_request_from_query_context_does_not_apply_incremental_for_sink_table() {
802        let region_id = test_region_id();
803        let query_ctx = QueryContextBuilder::default()
804            .extensions(HashMap::from([
805                (
806                    FLOW_INCREMENTAL_MODE.to_string(),
807                    "memtable_only".to_string(),
808                ),
809                (
810                    FLOW_INCREMENTAL_AFTER_SEQS.to_string(),
811                    format!(r#"{{"{}":55}}"#, region_id.as_u64()),
812                ),
813                (
814                    FLOW_SINK_TABLE_ID.to_string(),
815                    region_id.table_id().to_string(),
816                ),
817            ]))
818            .snapshot_seqs(Arc::new(RwLock::new(HashMap::from([(
819                region_id.as_u64(),
820                88_u64,
821            )]))))
822            .sst_min_sequences(Arc::new(RwLock::new(HashMap::from([(
823                region_id.as_u64(),
824                77_u64,
825            )]))))
826            .build();
827
828        let request = scan_request_from_query_context(region_id, &query_ctx).unwrap();
829        assert_eq!(request.memtable_min_sequence, None);
830        assert_eq!(request.memtable_max_sequence, None);
831        assert_eq!(request.sst_min_sequence, None);
832        assert!(!request.snapshot_on_scan);
833    }
834
835    #[test]
836    fn test_scan_request_from_query_context_rejects_missing_memtable_only_region() {
837        let region_id = test_region_id();
838        let query_ctx = QueryContextBuilder::default()
839            .extensions(HashMap::from([
840                (
841                    FLOW_INCREMENTAL_MODE.to_string(),
842                    "memtable_only".to_string(),
843                ),
844                (
845                    FLOW_INCREMENTAL_AFTER_SEQS.to_string(),
846                    r#"{"9":55}"#.to_string(),
847                ),
848            ]))
849            .build();
850
851        let err = scan_request_from_query_context(region_id, &query_ctx).unwrap_err();
852        assert!(matches!(err, Error::InvalidQueryContextExtension { .. }));
853    }
854
855    #[test]
856    fn test_scan_request_from_query_context_rejects_invalid_incremental_json() {
857        let region_id = test_region_id();
858        let query_ctx = QueryContextBuilder::default()
859            .extensions(HashMap::from([(
860                FLOW_INCREMENTAL_AFTER_SEQS.to_string(),
861                "not-json".to_string(),
862            )]))
863            .build();
864
865        let err = scan_request_from_query_context(region_id, &query_ctx).unwrap_err();
866        assert!(matches!(err, Error::InvalidQueryContextExtension { .. }));
867        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
868    }
869
870    #[test]
871    fn test_scan_request_from_query_context_rejects_invalid_sink_table_id() {
872        let region_id = test_region_id();
873        let query_ctx = QueryContextBuilder::default()
874            .extensions(HashMap::from([(
875                FLOW_SINK_TABLE_ID.to_string(),
876                "abc".to_string(),
877            )]))
878            .build();
879
880        let err = scan_request_from_query_context(region_id, &query_ctx).unwrap_err();
881        assert!(matches!(err, Error::InvalidQueryContextExtension { .. }));
882        assert_eq!(err.status_code(), StatusCode::InvalidArguments);
883    }
884}