diff --git a/src/mito2/src/compaction.rs b/src/mito2/src/compaction.rs index 5cf3c444a8..73dfb5061c 100644 --- a/src/mito2/src/compaction.rs +++ b/src/mito2/src/compaction.rs @@ -1006,14 +1006,12 @@ impl CompactionSstReaderBuilder<'_> { /// for compaction. The schema of the [FlatSource] is unified. async fn build_flat_sst_reader(self) -> Result { let scan_input = self.build_scan_input().await?.with_compaction(true); - - let schema = scan_input.mapper.output_schema(); - let schema = schema.arrow_schema(); + let schema = scan_input.mapper.input_arrow_schema(true); let stream = SeqScan::new(scan_input) .build_flat_reader_for_compaction() .await?; - Ok(FlatSource::new_stream(schema.clone(), stream)) + Ok(FlatSource::new_stream(schema, stream)) } async fn build_scan_input(self) -> Result { @@ -1096,7 +1094,15 @@ impl CompactionSstReaderBuilder<'_> { .map(|x| x.0.parquet_metadata()) { Ok(x) => x, - Err(e) if e.is_object_not_found() => continue, + Err(e) if e.is_object_not_found() => { + warn!( + e; + "Input SST file does not exist during compaction metadata pre-read, skip it, region_id: {}, file: {}", + file_handle.region_id(), + file_handle.file_id() + ); + continue; + } Err(e) => return Err(e), }; let file_metadata = parquet_metadata.file_metadata(); diff --git a/tests/cases/standalone/common/types/json/json2.result b/tests/cases/standalone/common/types/json/json2.result index fdae802f3b..79526be76e 100644 --- a/tests/cases/standalone/common/types/json/json2.result +++ b/tests/cases/standalone/common/types/json/json2.result @@ -155,3 +155,60 @@ drop table json2_table; Affected Rows: 0 +create table json2_overlap ( + ts timestamp time index, + j json2 +) with ( + 'append_mode' = 'true', + 'sst_format' = 'flat', +); + +Affected Rows: 0 + +insert into json2_overlap +values (1, '{"a": {"b": 1}}'), + (3, '{"a": {"b": 3}}'); + +Affected Rows: 2 + +admin flush_table('json2_overlap'); + ++------------------------------------+ +| ADMIN flush_table('json2_overlap') | ++------------------------------------+ +| 0 | ++------------------------------------+ + +insert into json2_overlap +values (2, '{"a": {"b": "s2"}}'), + (4, '{"a": {"b": "s4"}}'); + +Affected Rows: 2 + +admin flush_table('json2_overlap'); + ++------------------------------------+ +| ADMIN flush_table('json2_overlap') | ++------------------------------------+ +| 0 | ++------------------------------------+ + +admin compact_table('json2_overlap', 'swcs', '86400'); + ++-------------------------------------------------------+ +| ADMIN compact_table('json2_overlap', 'swcs', '86400') | ++-------------------------------------------------------+ +| 0 | ++-------------------------------------------------------+ + +select count(*) from json2_overlap; + ++----------+ +| count(*) | ++----------+ +| 4 | ++----------+ + +drop table json2_overlap; + +Affected Rows: 0 diff --git a/tests/cases/standalone/common/types/json/json2.sql b/tests/cases/standalone/common/types/json/json2.sql index 57e113f8be..777f1e6b9e 100644 --- a/tests/cases/standalone/common/types/json/json2.sql +++ b/tests/cases/standalone/common/types/json/json2.sql @@ -47,3 +47,29 @@ select j.c, j.y from json2_table order by ts; select j.d from json2_table order by ts; drop table json2_table; + +create table json2_overlap ( + ts timestamp time index, + j json2 +) with ( + 'append_mode' = 'true', + 'sst_format' = 'flat', +); + +insert into json2_overlap +values (1, '{"a": {"b": 1}}'), + (3, '{"a": {"b": 3}}'); + +admin flush_table('json2_overlap'); + +insert into json2_overlap +values (2, '{"a": {"b": "s2"}}'), + (4, '{"a": {"b": "s4"}}'); + +admin flush_table('json2_overlap'); + +admin compact_table('json2_overlap', 'swcs', '86400'); + +select count(*) from json2_overlap; + +drop table json2_overlap;