From 6e430b38987a6ad86818df3da5823bba17f3d6de Mon Sep 17 00:00:00 2001 From: evenyag Date: Wed, 29 Oct 2025 12:03:27 +0800 Subject: [PATCH] test: test mito filter delete Signed-off-by: evenyag --- src/mito2/src/engine/prune_test.rs | 100 ++++++++++++++++++++++++++++- src/mito2/src/test_util.rs | 13 ++++ 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/src/mito2/src/engine/prune_test.rs b/src/mito2/src/engine/prune_test.rs index b260024043..beb5e2644a 100644 --- a/src/mito2/src/engine/prune_test.rs +++ b/src/mito2/src/engine/prune_test.rs @@ -22,8 +22,10 @@ use store_api::region_request::RegionRequest; use store_api::storage::{RegionId, ScanRequest}; use crate::config::MitoConfig; +use crate::test_util::batch_util::sort_batches_and_print; use crate::test_util::{ - CreateRequestBuilder, TestEnv, build_rows, flush_region, put_rows, rows_schema, + CreateRequestBuilder, TestEnv, build_delete_rows, build_rows, delete_rows, delete_rows_schema, + flush_region, put_rows, rows_schema, }; async fn check_prune_row_groups(exprs: Vec, expected: &str, flat_format: bool) { @@ -377,3 +379,99 @@ async fn test_mem_range_prune_with_format(flat_format: bool) { +-------+---------+---------------------+"; assert_eq!(expected, batches.pretty_print().unwrap()); } + +#[tokio::test] +async fn test_scan_filter_field_after_delete() { + test_scan_filter_field_after_delete_with_format(false).await; + test_scan_filter_field_after_delete_with_format(true).await; +} + +async fn test_scan_filter_field_after_delete_with_format(flat_format: bool) { + common_telemetry::init_default_ut_logging(); + + let mut env = TestEnv::new().await; + let engine = env + .create_engine(MitoConfig { + default_experimental_flat_format: flat_format, + ..Default::default() + }) + .await; + + let region_id = RegionId::new(1, 1); + + env.get_schema_metadata_manager() + .register_region_table_info( + region_id.table_id(), + "test_table", + "test_catalog", + "test_schema", + None, + env.get_kv_backend(), + ) + .await; + let request = CreateRequestBuilder::new() + .insert_option("compaction.type", "twcs") + .build(); + + let column_schemas = rows_schema(&request); + engine + .handle_request(region_id, RegionRequest::Create(request.clone())) + .await + .unwrap(); + + // put 1, 2, 3, 4 and flush + put_rows( + &engine, + region_id, + Rows { + schema: column_schemas, + rows: build_rows(1, 5), + }, + ) + .await; + flush_region(&engine, region_id, None).await; + + // delete 2, 3 + let delete_schemas = delete_rows_schema(&request); + delete_rows( + &engine, + region_id, + Rows { + schema: delete_schemas, + rows: build_delete_rows(2, 4), + }, + ) + .await; + + // Scans and filter fields, the field should be deleted. + let request = ScanRequest { + filters: vec![col("field_0").eq(lit(3.0f64))], + ..Default::default() + }; + let stream = engine + .scan_to_stream(region_id, request.clone()) + .await + .unwrap(); + let batches = RecordBatches::try_collect(stream).await.unwrap(); + let expected = "\ ++-------+---------+----+ +| tag_0 | field_0 | ts | ++-------+---------+----+ ++-------+---------+----+"; + assert_eq!( + expected, + sort_batches_and_print(&batches, &["tag_0", "field_0", "ts"]) + ); + + // flush delete op + flush_region(&engine, region_id, None).await; + let stream = engine + .scan_to_stream(region_id, request.clone()) + .await + .unwrap(); + let batches = RecordBatches::try_collect(stream).await.unwrap(); + assert_eq!( + expected, + sort_batches_and_print(&batches, &["tag_0", "field_0", "ts"]) + ); +} diff --git a/src/mito2/src/test_util.rs b/src/mito2/src/test_util.rs index 4aac6f2e4b..0f3acc93b9 100644 --- a/src/mito2/src/test_util.rs +++ b/src/mito2/src/test_util.rs @@ -1068,6 +1068,19 @@ pub fn build_rows(start: usize, end: usize) -> Vec { .collect() } +/// Build rows with schema (string, ts_millis) in range `[start, end)`. +/// `start`, `end` are in second resolution. +pub fn build_delete_rows(start: usize, end: usize) -> Vec { + (start..end) + .map(|i| { + row(vec![ + ValueData::StringValue(i.to_string()), + ValueData::TimestampMillisecondValue(i as i64 * 1000), + ]) + }) + .collect() +} + /// Build rows with schema (string, f64, f64, ts_millis). /// - `key`: A string key that is common across all rows. /// - `timestamps`: Array of timestamp values.