diff --git a/src/mito2/src/compaction/buckets.rs b/src/mito2/src/compaction/buckets.rs index e094ceb847..3761a45b1a 100644 --- a/src/mito2/src/compaction/buckets.rs +++ b/src/mito2/src/compaction/buckets.rs @@ -40,7 +40,7 @@ pub(crate) fn infer_time_bucket<'a>(files: impl Iterator) .unwrap_or_else(|| TIME_BUCKETS.max()) // safety: TIME_BUCKETS cannot be empty. } -pub(crate) struct TimeBuckets([i64; 7]); +pub(crate) struct TimeBuckets([i64; 5]); impl TimeBuckets { /// Fits a given time span into time bucket by find the minimum bucket that can cover the span. @@ -71,13 +71,11 @@ impl TimeBuckets { /// A set of predefined time buckets. pub(crate) const TIME_BUCKETS: TimeBuckets = TimeBuckets([ - 60 * 60, // one hour - 2 * 60 * 60, // two hours - 12 * 60 * 60, // twelve hours - 24 * 60 * 60, // one day - 7 * 24 * 60 * 60, // one week - 365 * 24 * 60 * 60, // one year - 10 * 365 * 24 * 60 * 60, // ten years + 60 * 60, // one hour + 2 * 60 * 60, // two hours + 12 * 60 * 60, // twelve hours + 24 * 60 * 60, // one day + 7 * 24 * 60 * 60, // one week ]); #[cfg(test)] @@ -107,7 +105,7 @@ mod tests { TIME_BUCKETS.get(3), TIME_BUCKETS.fit_time_bucket(TIME_BUCKETS.get(3) - 1) ); - assert_eq!(TIME_BUCKETS.get(6), TIME_BUCKETS.fit_time_bucket(i64::MAX)); + assert_eq!(TIME_BUCKETS.get(4), TIME_BUCKETS.fit_time_bucket(i64::MAX)); } #[test] diff --git a/src/mito2/src/engine/compaction_test.rs b/src/mito2/src/engine/compaction_test.rs index 75d932c0f1..03d7d12af5 100644 --- a/src/mito2/src/engine/compaction_test.rs +++ b/src/mito2/src/engine/compaction_test.rs @@ -61,6 +61,30 @@ async fn put_and_flush( assert_eq!(0, result.affected_rows); } +async fn flush(engine: &MitoEngine, region_id: RegionId) { + let result = engine + .handle_request( + region_id, + RegionRequest::Flush(RegionFlushRequest { + row_group_size: None, + }), + ) + .await + .unwrap(); + assert_eq!(0, result.affected_rows); +} + +async fn compact(engine: &MitoEngine, region_id: RegionId) { + let result = engine + .handle_request( + region_id, + RegionRequest::Compact(RegionCompactRequest::default()), + ) + .await + .unwrap(); + assert_eq!(result.affected_rows, 0); +} + async fn delete_and_flush( engine: &MitoEngine, region_id: RegionId, @@ -147,14 +171,7 @@ async fn test_compaction_region() { delete_and_flush(&engine, region_id, &column_schemas, 15..30).await; put_and_flush(&engine, region_id, &column_schemas, 15..25).await; - let result = engine - .handle_request( - region_id, - RegionRequest::Compact(RegionCompactRequest::default()), - ) - .await - .unwrap(); - assert_eq!(result.affected_rows, 0); + compact(&engine, region_id).await; let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap(); // Input: @@ -179,6 +196,136 @@ async fn test_compaction_region() { assert_eq!((0..25).map(|v| v * 1000).collect::>(), vec); } +#[tokio::test] +async fn test_infer_compaction_time_window() { + common_telemetry::init_default_ut_logging(); + let mut env = TestEnv::new(); + let engine = env.create_engine(MitoConfig::default()).await; + + let region_id = RegionId::new(1, 1); + env.get_schema_metadata_manager() + .register_region_table_info( + region_id.table_id(), + "test_table", + "test_catalog", + "test_schema", + None, + env.get_kv_backend(), + ) + .await; + + let request = CreateRequestBuilder::new() + .insert_option("compaction.type", "twcs") + .build(); + + let column_schemas = request + .column_metadatas + .iter() + .map(column_metadata_to_column_schema) + .collect::>(); + engine + .handle_request(region_id, RegionRequest::Create(request)) + .await + .unwrap(); + // time window should be absent + assert!(engine + .get_region(region_id) + .unwrap() + .version_control + .current() + .version + .compaction_time_window + .is_none()); + + put_and_flush(&engine, region_id, &column_schemas, 1..2).await; + put_and_flush(&engine, region_id, &column_schemas, 2..3).await; + put_and_flush(&engine, region_id, &column_schemas, 3..4).await; + put_and_flush(&engine, region_id, &column_schemas, 4..5).await; + + compact(&engine, region_id).await; + + let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap(); + assert_eq!( + 1, + scanner.num_files(), + "unexpected files: {:?}", + scanner.file_ids() + ); + + assert_eq!( + Duration::from_secs(3600), + engine + .get_region(region_id) + .unwrap() + .version_control + .current() + .version + .compaction_time_window + .unwrap() + ); + + // write two rows to trigger another flush. + // note: this two rows still use the original part_duration (1day by default), so they are written + // to the same time partition and flushed to one file. + put_rows( + &engine, + region_id, + Rows { + schema: column_schemas.clone(), + rows: build_rows_for_key("a", 3601, 3602, 0), + }, + ) + .await; + put_rows( + &engine, + region_id, + Rows { + schema: column_schemas.clone(), + rows: build_rows_for_key("a", 7201, 7202, 0), + }, + ) + .await; + // this flush should update part_duration in TimePartitions. + flush(&engine, region_id).await; + compact(&engine, region_id).await; + let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap(); + assert_eq!( + 2, + scanner.num_files(), + "unexpected files: {:?}", + scanner.file_ids() + ); + + // These data should use new part_duration in TimePartitions and get written to two different + // time partitions so we end up with 4 ssts. + put_rows( + &engine, + region_id, + Rows { + schema: column_schemas.clone(), + rows: build_rows_for_key("a", 3601, 3602, 0), + }, + ) + .await; + put_rows( + &engine, + region_id, + Rows { + schema: column_schemas.clone(), + rows: build_rows_for_key("a", 7201, 7202, 0), + }, + ) + .await; + flush(&engine, region_id).await; + let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap(); + assert_eq!( + 4, + scanner.num_files(), + "unexpected files: {:?}", + scanner.file_ids() + ); +} + #[tokio::test] async fn test_compaction_overlapping_files() { common_telemetry::init_default_ut_logging(); @@ -216,14 +363,7 @@ async fn test_compaction_overlapping_files() { put_and_flush(&engine, region_id, &column_schemas, 20..30).await; delete_and_flush(&engine, region_id, &column_schemas, 30..40).await; - let result = engine - .handle_request( - region_id, - RegionRequest::Compact(RegionCompactRequest::default()), - ) - .await - .unwrap(); - assert_eq!(result.affected_rows, 0); + compact(&engine, region_id).await; let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap(); assert_eq!( @@ -282,15 +422,7 @@ async fn test_compaction_region_with_overlapping() { put_and_flush(&engine, region_id, &column_schemas, 3600..10800).await; // window 10800 delete_and_flush(&engine, region_id, &column_schemas, 0..3600).await; // window 3600 - let result = engine - .handle_request( - region_id, - RegionRequest::Compact(RegionCompactRequest::default()), - ) - .await - .unwrap(); - assert_eq!(result.affected_rows, 0); - + compact(&engine, region_id).await; let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap(); let stream = scanner.scan().await.unwrap(); let vec = collect_stream_ts(stream).await; @@ -336,15 +468,7 @@ async fn test_compaction_region_with_overlapping_delete_all() { put_and_flush(&engine, region_id, &column_schemas, 0..3600).await; // window 3600 delete_and_flush(&engine, region_id, &column_schemas, 0..10800).await; // window 10800 - let result = engine - .handle_request( - region_id, - RegionRequest::Compact(RegionCompactRequest::default()), - ) - .await - .unwrap(); - assert_eq!(result.affected_rows, 0); - + compact(&engine, region_id).await; let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap(); assert_eq!( 2, @@ -477,15 +601,7 @@ async fn test_compaction_update_time_window() { put_and_flush(&engine, region_id, &column_schemas, 1800..2700).await; // window 3600 put_and_flush(&engine, region_id, &column_schemas, 2700..3600).await; // window 3600 - let result = engine - .handle_request( - region_id, - RegionRequest::Compact(RegionCompactRequest::default()), - ) - .await - .unwrap(); - assert_eq!(result.affected_rows, 0); - + compact(&engine, region_id).await; assert_eq!( engine .get_region(region_id) @@ -572,13 +688,7 @@ async fn test_change_region_compaction_window() { put_and_flush(&engine, region_id, &column_schemas, 1200..1800).await; // window 3600 put_and_flush(&engine, region_id, &column_schemas, 1800..2400).await; // window 3600 - engine - .handle_request( - region_id, - RegionRequest::Compact(RegionCompactRequest::default()), - ) - .await - .unwrap(); + compact(&engine, region_id).await; // Put window 7200 put_and_flush(&engine, region_id, &column_schemas, 4000..5000).await; @@ -623,13 +733,7 @@ async fn test_change_region_compaction_window() { // Compaction again. It should compacts window 3600 and 7200 // into 7200. - engine - .handle_request( - region_id, - RegionRequest::Compact(RegionCompactRequest::default()), - ) - .await - .unwrap(); + compact(&engine, region_id).await; // Check compaction window. { let region = engine.get_region(region_id).unwrap(); @@ -709,13 +813,7 @@ async fn test_open_overwrite_compaction_window() { put_and_flush(&engine, region_id, &column_schemas, 1200..1800).await; // window 3600 put_and_flush(&engine, region_id, &column_schemas, 1800..2400).await; // window 3600 - engine - .handle_request( - region_id, - RegionRequest::Compact(RegionCompactRequest::default()), - ) - .await - .unwrap(); + compact(&engine, region_id).await; // Check compaction window. { diff --git a/src/mito2/src/lib.rs b/src/mito2/src/lib.rs index e47310ce0e..66ffdf9384 100644 --- a/src/mito2/src/lib.rs +++ b/src/mito2/src/lib.rs @@ -21,6 +21,7 @@ #![feature(result_flattening)] #![feature(int_roundings)] #![feature(debug_closure_helpers)] +#![feature(duration_constructors)] #[cfg(any(test, feature = "test"))] #[cfg_attr(feature = "test", allow(unused))] diff --git a/src/mito2/src/memtable/time_partition.rs b/src/mito2/src/memtable/time_partition.rs index 967643bf57..55dbe1aae1 100644 --- a/src/mito2/src/memtable/time_partition.rs +++ b/src/mito2/src/memtable/time_partition.rs @@ -40,25 +40,22 @@ use crate::memtable::key_values::KeyValue; use crate::memtable::version::SmallMemtableVec; use crate::memtable::{KeyValues, MemtableBuilderRef, MemtableId, MemtableRef}; +/// Initial time window if not specified. +const INITIAL_TIME_WINDOW: Duration = Duration::from_days(1); + /// A partition holds rows with timestamps between `[min, max)`. #[derive(Debug, Clone)] pub struct TimePartition { /// Memtable of the partition. memtable: MemtableRef, /// Time range of the partition. `min` is inclusive and `max` is exclusive. - /// `None` means there is no time range. The time - /// range is `None` if and only if the [TimePartitions::part_duration] is `None`. - time_range: Option, + time_range: PartTimeRange, } impl TimePartition { /// Returns whether the `ts` belongs to the partition. fn contains_timestamp(&self, ts: Timestamp) -> bool { - let Some(range) = self.time_range else { - return true; - }; - - range.contains_timestamp(ts) + self.time_range.contains_timestamp(ts) } /// Write rows to the part. @@ -72,14 +69,11 @@ impl TimePartition { } /// Write a partial [BulkPart] according to [TimePartition::time_range]. - fn write_record_batch_partial(&self, part: &BulkPart) -> error::Result<()> { - let Some(range) = self.time_range else { - unreachable!("TimePartition must have explicit time range when a bulk request involves multiple time partition") - }; + fn write_record_batch_partial(&self, part: &BulkPart) -> Result<()> { let Some(filtered) = filter_record_batch( part, - range.min_timestamp.value(), - range.max_timestamp.value(), + self.time_range.min_timestamp.value(), + self.time_range.max_timestamp.value(), )? else { return Ok(()); @@ -209,10 +203,7 @@ pub struct TimePartitions { /// Mutable data of partitions. inner: Mutex, /// Duration of a partition. - /// - /// `None` means there is only one partition and the [TimePartition::time_range] is - /// also `None`. - part_duration: Option, + part_duration: Duration, /// Metadata of the region. metadata: RegionMetadataRef, /// Builder of memtables. @@ -229,26 +220,10 @@ impl TimePartitions { next_memtable_id: MemtableId, part_duration: Option, ) -> Self { - let mut inner = PartitionsInner::new(next_memtable_id); - if part_duration.is_none() { - // If `part_duration` is None, then we create a partition with `None` time - // range so we will write all rows to that partition. - let memtable = builder.build(inner.alloc_memtable_id(), &metadata); - debug!( - "Creates a time partition for all timestamps, region: {}, memtable_id: {}", - metadata.region_id, - memtable.id(), - ); - let part = TimePartition { - memtable, - time_range: None, - }; - inner.parts.push(part); - } - + let inner = PartitionsInner::new(next_memtable_id); Self { inner: Mutex::new(inner), - part_duration, + part_duration: part_duration.unwrap_or(INITIAL_TIME_WINDOW), metadata, builder, } @@ -329,19 +304,18 @@ impl TimePartitions { part_start: Timestamp, inner: &mut MutexGuard, ) -> Result { - let part_duration = self.part_duration.unwrap(); let part_pos = match inner .parts .iter() - .position(|part| part.time_range.unwrap().min_timestamp == part_start) + .position(|part| part.time_range.min_timestamp == part_start) { Some(pos) => pos, None => { - let range = PartTimeRange::from_start_duration(part_start, part_duration) + let range = PartTimeRange::from_start_duration(part_start, self.part_duration) .with_context(|| InvalidRequestSnafu { region_id: self.metadata.region_id, reason: format!( - "Partition time range for {part_start:?} is out of bound, bucket size: {part_duration:?}", + "Partition time range for {part_start:?} is out of bound, bucket size: {:?}", self.part_duration ), })?; let memtable = self @@ -351,14 +325,14 @@ impl TimePartitions { "Create time partition {:?} for region {}, duration: {:?}, memtable_id: {}, parts_total: {}", range, self.metadata.region_id, - part_duration, + self.part_duration, memtable.id(), inner.parts.len() + 1 ); let pos = inner.parts.len(); inner.parts.push(TimePartition { memtable, - time_range: Some(range), + time_range: range, }); pos } @@ -396,13 +370,13 @@ impl TimePartitions { /// Forks latest partition and updates the partition duration if `part_duration` is Some. pub fn fork(&self, metadata: &RegionMetadataRef, part_duration: Option) -> Self { // Fall back to the existing partition duration. - let part_duration = part_duration.or(self.part_duration); + let part_duration = part_duration.unwrap_or(self.part_duration); let mut inner = self.inner.lock().unwrap(); let latest_part = inner .parts .iter() - .max_by_key(|part| part.time_range.map(|range| range.min_timestamp)) + .max_by_key(|part| part.time_range.min_timestamp) .cloned(); let Some(old_part) = latest_part else { @@ -411,33 +385,31 @@ impl TimePartitions { metadata.clone(), self.builder.clone(), inner.next_memtable_id, - part_duration, + Some(part_duration), ); }; let old_stats = old_part.memtable.stats(); // Use the max timestamp to compute the new time range for the memtable. - // If `part_duration` is None, the new range will be None. - let new_time_range = - old_stats - .time_range() - .zip(part_duration) - .and_then(|(range, bucket)| { - partition_start_timestamp(range.1, bucket) - .and_then(|start| PartTimeRange::from_start_duration(start, bucket)) - }); - // Forks the latest partition, but compute the time range based on the new duration. - let memtable = old_part.memtable.fork(inner.alloc_memtable_id(), metadata); - let new_part = TimePartition { - memtable, - time_range: new_time_range, - }; + let partitions_inner = old_stats + .time_range() + .and_then(|(_, old_stats_end_timestamp)| { + partition_start_timestamp(old_stats_end_timestamp, part_duration) + .and_then(|start| PartTimeRange::from_start_duration(start, part_duration)) + }) + .map(|part_time_range| { + // Forks the latest partition, but compute the time range based on the new duration. + let memtable = old_part.memtable.fork(inner.alloc_memtable_id(), metadata); + let part = TimePartition { + memtable, + time_range: part_time_range, + }; + PartitionsInner::with_partition(part, inner.next_memtable_id) + }) + .unwrap_or_else(|| PartitionsInner::new(inner.next_memtable_id)); Self { - inner: Mutex::new(PartitionsInner::with_partition( - new_part, - inner.next_memtable_id, - )), + inner: Mutex::new(partitions_inner), part_duration, metadata: metadata.clone(), builder: self.builder.clone(), @@ -445,7 +417,7 @@ impl TimePartitions { } /// Returns partition duration. - pub(crate) fn part_duration(&self) -> Option { + pub(crate) fn part_duration(&self) -> Duration { self.part_duration } @@ -490,7 +462,7 @@ impl TimePartitions { self.metadata.clone(), self.builder.clone(), self.next_memtable_id(), - part_duration.or(self.part_duration), + Some(part_duration.unwrap_or(self.part_duration)), ) } @@ -514,11 +486,7 @@ impl TimePartitions { let mut present = HashSet::new(); // First find any existing partitions that overlap for part in existing_parts { - let Some(part_time_range) = part.time_range.as_ref() else { - matching.push(part); - return Ok((matching, Vec::new())); - }; - + let part_time_range = &part.time_range; if !(max < part_time_range.min_timestamp || min >= part_time_range.max_timestamp) { matching.push(part); present.insert(part_time_range.min_timestamp.value()); @@ -526,7 +494,7 @@ impl TimePartitions { } // safety: self.part_duration can only be present when reach here. - let part_duration = self.part_duration.unwrap(); + let part_duration = self.part_duration_or_default(); let timestamp_unit = self.metadata.time_index_type().unit(); let part_duration_sec = part_duration.as_secs() as i64; @@ -621,12 +589,13 @@ impl TimePartitions { Ok((matching, missing)) } + /// Returns partition duration, or use default 1day duration is not present. + fn part_duration_or_default(&self) -> Duration { + self.part_duration + } + /// Write to multiple partitions. fn write_multi_parts(&self, kvs: &KeyValues, parts: &PartitionVec) -> Result<()> { - // If part duration is `None` then there is always one partition and all rows - // will be put in that partition before invoking this method. - debug_assert!(self.part_duration.is_some()); - let mut parts_to_write = HashMap::new(); let mut missing_parts = HashMap::new(); for kv in kvs.iter() { @@ -635,9 +604,8 @@ impl TimePartitions { let ts = kv.timestamp().as_timestamp().unwrap().unwrap(); for part in parts { if part.contains_timestamp(ts) { - // Safety: Since part duration is `Some` so all time range should be `Some`. parts_to_write - .entry(part.time_range.unwrap().min_timestamp) + .entry(part.time_range.min_timestamp) .or_insert_with(|| PartitionToWrite { partition: part.clone(), key_values: Vec::new(), @@ -652,7 +620,7 @@ impl TimePartitions { if !part_found { // We need to write it to a new part. // Safety: `new()` ensures duration is always Some if we do to this method. - let part_duration = self.part_duration.unwrap(); + let part_duration = self.part_duration_or_default(); let part_start = partition_start_timestamp(ts, part_duration).with_context(|| { InvalidRequestSnafu { @@ -787,7 +755,7 @@ mod tests { let metadata = memtable_util::metadata_for_test(); let builder = Arc::new(PartitionTreeMemtableBuilder::default()); let partitions = TimePartitions::new(metadata.clone(), builder, 0, None); - assert_eq!(1, partitions.num_partitions()); + assert_eq!(0, partitions.num_partitions()); assert!(partitions.is_empty()); let kvs = memtable_util::build_key_values( @@ -849,14 +817,15 @@ mod tests { let parts = partitions.list_partitions(); assert_eq!( Timestamp::new_millisecond(0), - parts[0].time_range.unwrap().min_timestamp + parts[0].time_range.min_timestamp ); assert_eq!( Timestamp::new_millisecond(10000), - parts[0].time_range.unwrap().max_timestamp + parts[0].time_range.max_timestamp ); } + #[cfg(test)] fn new_multi_partitions(metadata: &RegionMetadataRef) -> TimePartitions { let builder = Arc::new(PartitionTreeMemtableBuilder::default()); let partitions = @@ -900,11 +869,11 @@ mod tests { assert_eq!(0, parts[0].memtable.id()); assert_eq!( Timestamp::new_millisecond(0), - parts[0].time_range.unwrap().min_timestamp + parts[0].time_range.min_timestamp ); assert_eq!( Timestamp::new_millisecond(5000), - parts[0].time_range.unwrap().max_timestamp + parts[0].time_range.max_timestamp ); assert_eq!(&[0, 2000, 3000, 4000], ×tamps[..]); let iter = parts[1].memtable.iter(None, None, None).unwrap(); @@ -913,11 +882,11 @@ mod tests { assert_eq!(&[5000, 7000], ×tamps[..]); assert_eq!( Timestamp::new_millisecond(5000), - parts[1].time_range.unwrap().min_timestamp + parts[1].time_range.min_timestamp ); assert_eq!( Timestamp::new_millisecond(10000), - parts[1].time_range.unwrap().max_timestamp + parts[1].time_range.max_timestamp ); } @@ -928,26 +897,26 @@ mod tests { let partitions = TimePartitions::new(metadata.clone(), builder.clone(), 0, None); let new_parts = partitions.new_with_part_duration(Some(Duration::from_secs(5))); - assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap()); - assert_eq!(1, new_parts.next_memtable_id()); + assert_eq!(Duration::from_secs(5), new_parts.part_duration()); + assert_eq!(0, new_parts.next_memtable_id()); // Won't update the duration if it's None. let new_parts = new_parts.new_with_part_duration(None); - assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap()); + assert_eq!(Duration::from_secs(5), new_parts.part_duration()); // Don't need to create new memtables. - assert_eq!(1, new_parts.next_memtable_id()); + assert_eq!(0, new_parts.next_memtable_id()); let new_parts = new_parts.new_with_part_duration(Some(Duration::from_secs(10))); - assert_eq!(Duration::from_secs(10), new_parts.part_duration().unwrap()); + assert_eq!(Duration::from_secs(10), new_parts.part_duration()); // Don't need to create new memtables. - assert_eq!(1, new_parts.next_memtable_id()); + assert_eq!(0, new_parts.next_memtable_id()); let builder = Arc::new(PartitionTreeMemtableBuilder::default()); let partitions = TimePartitions::new(metadata.clone(), builder.clone(), 0, None); // Need to build a new memtable as duration is still None. let new_parts = partitions.new_with_part_duration(None); - assert!(new_parts.part_duration().is_none()); - assert_eq!(2, new_parts.next_memtable_id()); + assert_eq!(INITIAL_TIME_WINDOW, new_parts.part_duration()); + assert_eq!(0, new_parts.next_memtable_id()); } #[test] @@ -957,28 +926,28 @@ mod tests { let partitions = TimePartitions::new(metadata.clone(), builder, 0, None); partitions.freeze().unwrap(); let new_parts = partitions.fork(&metadata, None); - assert!(new_parts.part_duration().is_none()); - assert_eq!(1, new_parts.list_partitions()[0].memtable.id()); - assert_eq!(2, new_parts.next_memtable_id()); + assert_eq!(INITIAL_TIME_WINDOW, new_parts.part_duration()); + assert!(new_parts.list_partitions().is_empty()); + assert_eq!(0, new_parts.next_memtable_id()); new_parts.freeze().unwrap(); let new_parts = new_parts.fork(&metadata, Some(Duration::from_secs(5))); - assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap()); - assert_eq!(2, new_parts.list_partitions()[0].memtable.id()); - assert_eq!(3, new_parts.next_memtable_id()); + assert_eq!(Duration::from_secs(5), new_parts.part_duration()); + assert!(new_parts.list_partitions().is_empty()); + assert_eq!(0, new_parts.next_memtable_id()); new_parts.freeze().unwrap(); let new_parts = new_parts.fork(&metadata, None); // Won't update the duration. - assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap()); - assert_eq!(3, new_parts.list_partitions()[0].memtable.id()); - assert_eq!(4, new_parts.next_memtable_id()); + assert_eq!(Duration::from_secs(5), new_parts.part_duration()); + assert!(new_parts.list_partitions().is_empty()); + assert_eq!(0, new_parts.next_memtable_id()); new_parts.freeze().unwrap(); let new_parts = new_parts.fork(&metadata, Some(Duration::from_secs(10))); - assert_eq!(Duration::from_secs(10), new_parts.part_duration().unwrap()); - assert_eq!(4, new_parts.list_partitions()[0].memtable.id()); - assert_eq!(5, new_parts.next_memtable_id()); + assert_eq!(Duration::from_secs(10), new_parts.part_duration()); + assert!(new_parts.list_partitions().is_empty()); + assert_eq!(0, new_parts.next_memtable_id()); } #[test] @@ -990,14 +959,14 @@ mod tests { // Won't update the duration. let new_parts = partitions.fork(&metadata, None); assert!(new_parts.is_empty()); - assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap()); + assert_eq!(Duration::from_secs(5), new_parts.part_duration()); assert_eq!(2, new_parts.list_partitions()[0].memtable.id()); assert_eq!(3, new_parts.next_memtable_id()); // Although we don't fork a memtable multiple times, we still add a test for it. let new_parts = partitions.fork(&metadata, Some(Duration::from_secs(10))); assert!(new_parts.is_empty()); - assert_eq!(Duration::from_secs(10), new_parts.part_duration().unwrap()); + assert_eq!(Duration::from_secs(10), new_parts.part_duration()); assert_eq!(3, new_parts.list_partitions()[0].memtable.id()); assert_eq!(4, new_parts.next_memtable_id()); } @@ -1018,9 +987,9 @@ mod tests { Timestamp::new_millisecond(2000), ) .unwrap(); - assert_eq!(matching.len(), 1); - assert!(missing.is_empty()); - assert!(matching[0].time_range.is_none()); + assert_eq!(matching.len(), 0); + assert_eq!(missing.len(), 1); + assert_eq!(missing[0], Timestamp::new_millisecond(0)); // Case 2: With time range partitioning let partitions = TimePartitions::new( @@ -1052,7 +1021,7 @@ mod tests { .unwrap(); assert_eq!(matching.len(), 1); assert!(missing.is_empty()); - assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0); + assert_eq!(matching[0].time_range.min_timestamp.value(), 0); // Test case 2b: Query spanning multiple existing partitions let (matching, missing) = partitions @@ -1065,8 +1034,8 @@ mod tests { .unwrap(); assert_eq!(matching.len(), 2); assert!(missing.is_empty()); - assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0); - assert_eq!(matching[1].time_range.unwrap().min_timestamp.value(), 5000); + assert_eq!(matching[0].time_range.min_timestamp.value(), 0); + assert_eq!(matching[1].time_range.min_timestamp.value(), 5000); // Test case 2c: Query requiring new partition let (matching, missing) = partitions @@ -1092,8 +1061,8 @@ mod tests { .unwrap(); assert_eq!(matching.len(), 2); assert!(missing.is_empty()); - assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0); - assert_eq!(matching[1].time_range.unwrap().min_timestamp.value(), 5000); + assert_eq!(matching[0].time_range.min_timestamp.value(), 0); + assert_eq!(matching[1].time_range.min_timestamp.value(), 5000); // Test case 2e: Corner case let (matching, missing) = partitions @@ -1106,8 +1075,8 @@ mod tests { .unwrap(); assert_eq!(matching.len(), 2); assert!(missing.is_empty()); - assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0); - assert_eq!(matching[1].time_range.unwrap().min_timestamp.value(), 5000); + assert_eq!(matching[0].time_range.min_timestamp.value(), 0); + assert_eq!(matching[1].time_range.min_timestamp.value(), 5000); // Test case 2f: Corner case with let (matching, missing) = partitions @@ -1120,7 +1089,7 @@ mod tests { .unwrap(); assert_eq!(matching.len(), 1); assert_eq!(1, missing.len()); - assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 5000); + assert_eq!(matching[0].time_range.min_timestamp.value(), 5000); assert_eq!(missing[0].value(), 10000); // Test case 2g: Cross 0 @@ -1133,7 +1102,7 @@ mod tests { ) .unwrap(); assert_eq!(matching.len(), 1); - assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0); + assert_eq!(matching[0].time_range.min_timestamp.value(), 0); assert_eq!(1, missing.len()); assert_eq!(missing[0].value(), -5000); @@ -1151,8 +1120,8 @@ mod tests { ) .unwrap(); assert_eq!(2, matching.len()); - assert_eq!(matching[0].time_range.unwrap().min_timestamp.value(), 0); - assert_eq!(matching[1].time_range.unwrap().min_timestamp.value(), 5000); + assert_eq!(matching[0].time_range.min_timestamp.value(), 0); + assert_eq!(matching[1].time_range.min_timestamp.value(), 5000); assert_eq!(2, missing.len()); assert_eq!(missing[0].value(), -100000000000); assert_eq!(missing[1].value(), 100000000000); @@ -1162,10 +1131,7 @@ mod tests { let schema = Arc::new(Schema::new(vec![ Field::new( "ts", - arrow::datatypes::DataType::Timestamp( - arrow::datatypes::TimeUnit::Millisecond, - None, - ), + DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, None), false, ), Field::new("val", DataType::Utf8, true), diff --git a/src/mito2/src/memtable/version.rs b/src/mito2/src/memtable/version.rs index f443396109..537332d3b7 100644 --- a/src/mito2/src/memtable/version.rs +++ b/src/mito2/src/memtable/version.rs @@ -76,7 +76,7 @@ impl MemtableVersion { ) -> Result> { if self.mutable.is_empty() { // No need to freeze the mutable memtable, but we need to check the time window. - if self.mutable.part_duration() == time_window { + if Some(self.mutable.part_duration()) == time_window { // If the time window is the same, we don't need to update it. return Ok(None); } @@ -98,7 +98,7 @@ impl MemtableVersion { // soft limit. self.mutable.freeze()?; // Fork the memtable. - if self.mutable.part_duration() != time_window { + if Some(self.mutable.part_duration()) != time_window { common_telemetry::debug!( "Fork memtable, update partition duration from {:?}, to {:?}", self.mutable.part_duration(), diff --git a/src/mito2/src/region/version.rs b/src/mito2/src/region/version.rs index d5078933bc..b40c11ee5b 100644 --- a/src/mito2/src/region/version.rs +++ b/src/mito2/src/region/version.rs @@ -142,7 +142,7 @@ impl VersionControl { /// Mark all opened files as deleted and set the delete marker in [VersionControlData] pub(crate) fn mark_dropped(&self, memtable_builder: &MemtableBuilderRef) { let version = self.current().version; - let part_duration = version.memtables.mutable.part_duration(); + let part_duration = Some(version.memtables.mutable.part_duration()); let next_memtable_id = version.memtables.mutable.next_memtable_id(); let new_mutable = Arc::new(TimePartitions::new( version.metadata.clone(), @@ -166,7 +166,7 @@ impl VersionControl { /// new schema. Memtables of the version must be empty. pub(crate) fn alter_schema(&self, metadata: RegionMetadataRef, builder: &MemtableBuilderRef) { let version = self.current().version; - let part_duration = version.memtables.mutable.part_duration(); + let part_duration = Some(version.memtables.mutable.part_duration()); let next_memtable_id = version.memtables.mutable.next_memtable_id(); let new_mutable = Arc::new(TimePartitions::new( metadata.clone(), @@ -202,7 +202,7 @@ impl VersionControl { version.metadata.clone(), memtable_builder.clone(), next_memtable_id, - part_duration, + Some(part_duration), )); let new_version = Arc::new( VersionBuilder::new(version.metadata.clone(), new_mutable) diff --git a/tests-integration/src/tests/instance_test.rs b/tests-integration/src/tests/instance_test.rs index e157fdc44f..6406b1c8a2 100644 --- a/tests-integration/src/tests/instance_test.rs +++ b/tests-integration/src/tests/instance_test.rs @@ -1458,9 +1458,12 @@ async fn test_insert_with_default_value_for_type(instance: Arc, type_n .data; assert!(matches!(output, OutputData::AffectedRows(1))); - let output = execute_sql(&instance, &format!("select host, cpu from {table_name}")) - .await - .data; + let output = execute_sql( + &instance, + &format!("select host, cpu from {table_name} order by host"), + ) + .await + .data; let expected = "\ +-------+-----+ | host | cpu | @@ -1757,7 +1760,12 @@ async fn test_execute_copy_from_orc_with_cast(instance: Arc) { assert!(matches!(output, OutputData::AffectedRows(5))); - let output = execute_sql(&instance, "select * from demo;").await.data; + let output = execute_sql( + &instance, + "select * from demo order by timestamp_simple asc;", + ) + .await + .data; let expected = r#"+-------------------------------+----------------------------+-------------------------+----------------------------+ | bigint_direct | bigint_neg_direct | bigint_other | timestamp_simple | +-------------------------------+----------------------------+-------------------------+----------------------------+ diff --git a/tests-integration/tests/grpc.rs b/tests-integration/tests/grpc.rs index 7125b9851b..007e424b56 100644 --- a/tests-integration/tests/grpc.rs +++ b/tests-integration/tests/grpc.rs @@ -520,7 +520,7 @@ async fn insert_and_assert(db: &Database) { // select let output = db - .sql("SELECT host, cpu, memory, ts FROM demo") + .sql("SELECT host, cpu, memory, ts FROM demo order by host") .await .unwrap(); diff --git a/tests/cases/standalone/common/alter/alter_metric_table.result b/tests/cases/standalone/common/alter/alter_metric_table.result index 6f1aea3cba..1e51766669 100644 --- a/tests/cases/standalone/common/alter/alter_metric_table.result +++ b/tests/cases/standalone/common/alter/alter_metric_table.result @@ -145,7 +145,7 @@ INSERT INTO t1 (ts, val, host) VALUES Affected Rows: 6 -SELECT * FROM t1; +SELECT * FROM t1 ORDER BY ts ASC; +-------------+---------------------+------+ | host | ts | val | @@ -159,7 +159,7 @@ ALTER TABLE t1 ADD COLUMN k STRING PRIMARY KEY; Affected Rows: 0 -SELECT * FROM t1; +SELECT * FROM t1 ORDER BY ts ASC; +-------------+---+---------------------+------+ | host | k | ts | val | diff --git a/tests/cases/standalone/common/alter/alter_metric_table.sql b/tests/cases/standalone/common/alter/alter_metric_table.sql index 24b9de96d2..293fb134b3 100644 --- a/tests/cases/standalone/common/alter/alter_metric_table.sql +++ b/tests/cases/standalone/common/alter/alter_metric_table.sql @@ -52,11 +52,11 @@ INSERT INTO t1 (ts, val, host) VALUES ('2022-01-02 00:00:00', 4.56, 'example.com'), ('2022-01-03 00:00:00', 7.89, 'example.com'); -SELECT * FROM t1; +SELECT * FROM t1 ORDER BY ts ASC; ALTER TABLE t1 ADD COLUMN k STRING PRIMARY KEY; -SELECT * FROM t1; +SELECT * FROM t1 ORDER BY ts ASC; DROP TABLE t1; diff --git a/tests/cases/standalone/common/basic.result b/tests/cases/standalone/common/basic.result index 9516d0cfbc..4878220e65 100644 --- a/tests/cases/standalone/common/basic.result +++ b/tests/cases/standalone/common/basic.result @@ -76,7 +76,7 @@ insert into foo (host) values ('host3'); Affected Rows: 1 -select * from foo; +select * from foo order by ts; +-------+---------------------+-----+ | host | ts | cpu | @@ -141,7 +141,7 @@ SELECT * FROM system_metrics; | host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +-------+-------+----------+-------------+-----------+-------------------------+ -select * from foo; +select * from foo order by host asc; +-------+---------------------+-----+ | host | ts | cpu | @@ -151,12 +151,12 @@ select * from foo; | host3 | 2023-04-29T00:00:00 | 0.0 | +-------+---------------------+-----+ -SELECT * from t1; +SELECT * from t1 order by ts desc; ++ ++ -SELECT * from t2; +SELECT * from t2 order by ts desc; +------+-------------------------+-----+ | job | ts | val | diff --git a/tests/cases/standalone/common/basic.sql b/tests/cases/standalone/common/basic.sql index b66651b8e8..f48910ce71 100644 --- a/tests/cases/standalone/common/basic.sql +++ b/tests/cases/standalone/common/basic.sql @@ -37,7 +37,7 @@ insert into foo (host, cpu) values ('host2', 2.2); insert into foo (host) values ('host3'); -select * from foo; +select * from foo order by ts; CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = ""); @@ -63,11 +63,11 @@ INSERT INTO t2 VALUES ('job1', 0, 0), ('job2', 1, 1); SELECT * FROM system_metrics; -select * from foo; +select * from foo order by host asc; -SELECT * from t1; +SELECT * from t1 order by ts desc; -SELECT * from t2; +SELECT * from t2 order by ts desc; DROP TABLE t1; diff --git a/tests/cases/standalone/common/create/create_metric_table.result b/tests/cases/standalone/common/create/create_metric_table.result index cc984cb62d..2c4c863a3b 100644 --- a/tests/cases/standalone/common/create/create_metric_table.result +++ b/tests/cases/standalone/common/create/create_metric_table.result @@ -219,7 +219,7 @@ INSERT INTO t1 (ts, val, host) VALUES Affected Rows: 6 -SELECT * FROM t1; +SELECT * FROM t1 ORDER BY ts ASC; +-------------+---------------------+------+ | host | ts | val | diff --git a/tests/cases/standalone/common/create/create_metric_table.sql b/tests/cases/standalone/common/create/create_metric_table.sql index 2242ebfc31..8a4aa2b7cd 100644 --- a/tests/cases/standalone/common/create/create_metric_table.sql +++ b/tests/cases/standalone/common/create/create_metric_table.sql @@ -84,7 +84,7 @@ INSERT INTO t1 (ts, val, host) VALUES ('2022-01-02 00:00:00', 4.56, 'example.com'), ('2022-01-03 00:00:00', 7.89, 'example.com'); -SELECT * FROM t1; +SELECT * FROM t1 ORDER BY ts ASC; DROP TABLE t1; diff --git a/tests/cases/standalone/common/flow/flow_basic.result b/tests/cases/standalone/common/flow/flow_basic.result index f222a9c331..20ce8bafc4 100644 --- a/tests/cases/standalone/common/flow/flow_basic.result +++ b/tests/cases/standalone/common/flow/flow_basic.result @@ -500,7 +500,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; +------+---------------------+ | rate | time_window | @@ -529,7 +530,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; +------+---------------------+ | rate | time_window | @@ -559,7 +561,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; +-------------------+---------------------+ | rate | time_window | @@ -590,7 +593,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; +--------------------+---------------------+ | rate | time_window | @@ -1282,17 +1286,18 @@ ADMIN FLUSH_FLOW('requests_long_term'); SELECT * FROM - requests_without_ip; + requests_without_ip +ORDER BY ts ASC; +--------------+-----+---------------------+ | service_name | val | ts | +--------------+-----+---------------------+ | | 100 | 2024-10-18T19:00:00 | +| svc1 | 100 | 2024-10-18T19:00:00 | | | 200 | 2024-10-18T19:00:30 | +| svc1 | 200 | 2024-10-18T19:00:30 | | | 300 | 2024-10-18T19:01:00 | | | 100 | 2024-10-18T19:01:01 | -| svc1 | 100 | 2024-10-18T19:00:00 | -| svc1 | 200 | 2024-10-18T19:00:30 | | svc1 | 400 | 2024-10-18T19:01:30 | | svc1 | 200 | 2024-10-18T19:01:31 | +--------------+-----+---------------------+ @@ -1335,25 +1340,26 @@ ADMIN FLUSH_FLOW('requests_long_term'); SELECT * FROM - requests_without_ip; + requests_without_ip +ORDER BY ts ASC; +--------------+-----+---------------------+ | service_name | val | ts | +--------------+-----+---------------------+ | | 100 | 2024-10-18T19:00:00 | +| svc1 | 100 | 2024-10-18T19:00:00 | | | 200 | 2024-10-18T19:00:30 | +| svc1 | 200 | 2024-10-18T19:00:30 | | | 300 | 2024-10-18T19:01:00 | | | 100 | 2024-10-18T19:01:01 | +| svc1 | 400 | 2024-10-18T19:01:30 | +| svc1 | 200 | 2024-10-18T19:01:31 | | | 100 | 2024-10-19T19:00:00 | | | 200 | 2024-10-19T19:00:30 | | | 300 | 2024-10-19T19:01:00 | | | 100 | 2024-10-19T19:01:01 | | | 400 | 2024-10-19T19:01:30 | | | 200 | 2024-10-19T19:01:31 | -| svc1 | 100 | 2024-10-18T19:00:00 | -| svc1 | 200 | 2024-10-18T19:00:30 | -| svc1 | 400 | 2024-10-18T19:01:30 | -| svc1 | 200 | 2024-10-18T19:01:31 | +--------------+-----+---------------------+ INSERT INTO @@ -1382,31 +1388,32 @@ ADMIN FLUSH_FLOW('requests_long_term'); SELECT * FROM - requests_without_ip; + requests_without_ip +ORDER BY ts ASC;; +--------------+-----+---------------------+ | service_name | val | ts | +--------------+-----+---------------------+ | | 100 | 2024-10-18T19:00:00 | +| svc1 | 100 | 2024-10-18T19:00:00 | +| svc2 | 100 | 2024-10-18T19:00:00 | | | 200 | 2024-10-18T19:00:30 | +| svc1 | 200 | 2024-10-18T19:00:30 | +| svc2 | 200 | 2024-10-18T19:00:30 | | | 300 | 2024-10-18T19:01:00 | +| svc2 | 300 | 2024-10-18T19:01:00 | | | 100 | 2024-10-18T19:01:01 | +| svc2 | 100 | 2024-10-18T19:01:01 | +| svc1 | 400 | 2024-10-18T19:01:30 | +| svc2 | 400 | 2024-10-18T19:01:30 | +| svc1 | 200 | 2024-10-18T19:01:31 | +| svc2 | 200 | 2024-10-18T19:01:31 | | | 100 | 2024-10-19T19:00:00 | | | 200 | 2024-10-19T19:00:30 | | | 300 | 2024-10-19T19:01:00 | | | 100 | 2024-10-19T19:01:01 | | | 400 | 2024-10-19T19:01:30 | | | 200 | 2024-10-19T19:01:31 | -| svc1 | 100 | 2024-10-18T19:00:00 | -| svc1 | 200 | 2024-10-18T19:00:30 | -| svc1 | 400 | 2024-10-18T19:01:30 | -| svc1 | 200 | 2024-10-18T19:01:31 | -| svc2 | 100 | 2024-10-18T19:00:00 | -| svc2 | 200 | 2024-10-18T19:00:30 | -| svc2 | 300 | 2024-10-18T19:01:00 | -| svc2 | 100 | 2024-10-18T19:01:01 | -| svc2 | 400 | 2024-10-18T19:01:30 | -| svc2 | 200 | 2024-10-18T19:01:31 | +--------------+-----+---------------------+ DROP FLOW requests_long_term; diff --git a/tests/cases/standalone/common/flow/flow_basic.sql b/tests/cases/standalone/common/flow/flow_basic.sql index 92310f98c4..99c54db623 100644 --- a/tests/cases/standalone/common/flow/flow_basic.sql +++ b/tests/cases/standalone/common/flow/flow_basic.sql @@ -219,7 +219,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; INSERT INTO bytes_log @@ -234,7 +235,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; INSERT INTO bytes_log @@ -249,7 +251,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; INSERT INTO bytes_log @@ -264,7 +267,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; DROP TABLE bytes_log; @@ -610,7 +614,8 @@ ADMIN FLUSH_FLOW('requests_long_term'); SELECT * FROM - requests_without_ip; + requests_without_ip +ORDER BY ts ASC; -- Test if FLOWS table works, but don't care about the result since it vary from runs SELECT @@ -636,7 +641,8 @@ ADMIN FLUSH_FLOW('requests_long_term'); SELECT * FROM - requests_without_ip; + requests_without_ip +ORDER BY ts ASC; INSERT INTO requests @@ -656,7 +662,8 @@ ADMIN FLUSH_FLOW('requests_long_term'); SELECT * FROM - requests_without_ip; + requests_without_ip +ORDER BY ts ASC;; DROP FLOW requests_long_term; diff --git a/tests/cases/standalone/common/flow/flow_insert.result b/tests/cases/standalone/common/flow/flow_insert.result index b1797b2e1d..3673ac8a1f 100644 --- a/tests/cases/standalone/common/flow/flow_insert.result +++ b/tests/cases/standalone/common/flow/flow_insert.result @@ -67,7 +67,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; +------+---------------------+ | rate | time_window | @@ -97,7 +98,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; +------+---------------------+ | rate | time_window | @@ -128,7 +130,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; +-------------------+---------------------+ | rate | time_window | @@ -160,7 +163,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; +--------------------+---------------------+ | rate | time_window | diff --git a/tests/cases/standalone/common/flow/flow_insert.sql b/tests/cases/standalone/common/flow/flow_insert.sql index e44b8d558c..969888f4fe 100644 --- a/tests/cases/standalone/common/flow/flow_insert.sql +++ b/tests/cases/standalone/common/flow/flow_insert.sql @@ -39,7 +39,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; -- reordered insert, also test if null is handled correctly INSERT INTO @@ -55,7 +56,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; -- reordered insert INSERT INTO @@ -71,7 +73,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; -- reordered insert INSERT INTO @@ -87,7 +90,8 @@ SELECT rate, time_window FROM - approx_rate; + approx_rate +ORDER BY time_window ASC; DROP TABLE bytes_log; diff --git a/tests/cases/standalone/common/insert/insert_default_timezone.result b/tests/cases/standalone/common/insert/insert_default_timezone.result index e44e9692ee..9b12e4a15d 100644 --- a/tests/cases/standalone/common/insert/insert_default_timezone.result +++ b/tests/cases/standalone/common/insert/insert_default_timezone.result @@ -7,7 +7,7 @@ INSERT INTO test1 VALUES (1, DEFAULT), (2, DEFAULT), (3, '2024-01-31 00:01:01'), Affected Rows: 4 -SELECT * FROM test1; +SELECT * FROM test1 ORDER BY j; +---+---------------------+ | i | j | @@ -30,7 +30,7 @@ INSERT INTO test2 VALUES (1, DEFAULT), (2, DEFAULT), (3, '2024-01-31 00:01:01'), Affected Rows: 4 -SELECT * FROM test2; +SELECT * FROM test2 ORDER BY j; +---+---------------------+ | i | j | @@ -41,7 +41,7 @@ SELECT * FROM test2; | 4 | 2025-01-31T16:01:01 | +---+---------------------+ -SELECT * FROM test1; +SELECT * FROM test1 ORDER BY j; +---+---------------------+ | i | j | diff --git a/tests/cases/standalone/common/insert/insert_default_timezone.sql b/tests/cases/standalone/common/insert/insert_default_timezone.sql index 403b534f5a..2473cdcee4 100644 --- a/tests/cases/standalone/common/insert/insert_default_timezone.sql +++ b/tests/cases/standalone/common/insert/insert_default_timezone.sql @@ -4,7 +4,7 @@ CREATE TABLE test1 (i INTEGER, j TIMESTAMP default '2024-01-30 00:01:01' TIME IN INSERT INTO test1 VALUES (1, DEFAULT), (2, DEFAULT), (3, '2024-01-31 00:01:01'), (4, '2025-02-01 00:01:01'); -SELECT * FROM test1; +SELECT * FROM test1 ORDER BY j; SET time_zone = 'Asia/Shanghai'; @@ -12,9 +12,9 @@ CREATE TABLE test2 (i INTEGER, j TIMESTAMP default '2024-01-30 00:01:01' TIME IN INSERT INTO test2 VALUES (1, DEFAULT), (2, DEFAULT), (3, '2024-01-31 00:01:01'), (4, '2025-02-01 00:01:01'); -SELECT * FROM test2; +SELECT * FROM test2 ORDER BY j; -SELECT * FROM test1; +SELECT * FROM test1 ORDER BY j; SET time_zone = 'UTC'; diff --git a/tests/cases/standalone/common/system/timezone.result b/tests/cases/standalone/common/system/timezone.result index 26b15544f8..7037536dae 100644 --- a/tests/cases/standalone/common/system/timezone.result +++ b/tests/cases/standalone/common/system/timezone.result @@ -36,7 +36,7 @@ INSERT INTO test values Affected Rows: 5 -SELECT * from test; +SELECT * from test ORDER BY ts ASC; +-----+---------------------+ | d | ts | @@ -48,7 +48,7 @@ SELECT * from test; | 5.0 | 2024-01-04T16:00:00 | +-----+---------------------+ -SELECT * from test where ts >= '2024-01-02 08:00:00'; +SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts; +-----+---------------------+ | d | ts | @@ -59,7 +59,7 @@ SELECT * from test where ts >= '2024-01-02 08:00:00'; | 5.0 | 2024-01-04T16:00:00 | +-----+---------------------+ -SELECT * from test where ts <= '2024-01-03 16:00:00'; +SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts; +-----+---------------------+ | d | ts | @@ -69,7 +69,7 @@ SELECT * from test where ts <= '2024-01-03 16:00:00'; | 3.0 | 2024-01-03T16:00:00 | +-----+---------------------+ -select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test; +select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts; +----------------------------------------------------+ | date_format(test.ts,Utf8("%Y-%m-%d %H:%M:%S:%3f")) | @@ -126,7 +126,7 @@ select timezone(); | +08:00 | +------------+ -SELECT * from test; +SELECT * from test ORDER BY ts; +-----+---------------------+ | d | ts | @@ -138,7 +138,7 @@ SELECT * from test; | 5.0 | 2024-01-04T16:00:00 | +-----+---------------------+ -SELECT * from test where ts >= '2024-01-02 08:00:00'; +SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts; +-----+---------------------+ | d | ts | @@ -149,7 +149,7 @@ SELECT * from test where ts >= '2024-01-02 08:00:00'; | 5.0 | 2024-01-04T16:00:00 | +-----+---------------------+ -SELECT * from test where ts <= '2024-01-03 16:00:00'; +SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts; +-----+---------------------+ | d | ts | @@ -158,7 +158,7 @@ SELECT * from test where ts <= '2024-01-03 16:00:00'; | 2.0 | 2024-01-02T08:00:00 | +-----+---------------------+ -select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test; +select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts; +----------------------------------------------------+ | date_format(test.ts,Utf8("%Y-%m-%d %H:%M:%S:%3f")) | @@ -215,7 +215,7 @@ select timezone(); | -08:00 | +------------+ -SELECT * from test; +SELECT * from test ORDER BY ts; +-----+---------------------+ | d | ts | @@ -227,7 +227,7 @@ SELECT * from test; | 5.0 | 2024-01-04T16:00:00 | +-----+---------------------+ -SELECT * from test where ts >= '2024-01-02 08:00:00'; +SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts; +-----+---------------------+ | d | ts | @@ -237,7 +237,7 @@ SELECT * from test where ts >= '2024-01-02 08:00:00'; | 5.0 | 2024-01-04T16:00:00 | +-----+---------------------+ -SELECT * from test where ts <= '2024-01-03 16:00:00'; +SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts; +-----+---------------------+ | d | ts | @@ -248,7 +248,7 @@ SELECT * from test where ts <= '2024-01-03 16:00:00'; | 4.0 | 2024-01-04T00:00:00 | +-----+---------------------+ -select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test; +select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts; +----------------------------------------------------+ | date_format(test.ts,Utf8("%Y-%m-%d %H:%M:%S:%3f")) | diff --git a/tests/cases/standalone/common/system/timezone.sql b/tests/cases/standalone/common/system/timezone.sql index 828d29421e..1f749b2ac8 100644 --- a/tests/cases/standalone/common/system/timezone.sql +++ b/tests/cases/standalone/common/system/timezone.sql @@ -14,13 +14,13 @@ INSERT INTO test values (4, '2024-01-04 00:00:00'), (5, '2024-01-05 00:00:00+08:00'); -SELECT * from test; +SELECT * from test ORDER BY ts ASC; -SELECT * from test where ts >= '2024-01-02 08:00:00'; +SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts; -SELECT * from test where ts <= '2024-01-03 16:00:00'; +SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts; -select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test; +select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts; select to_unixtime('2024-01-02 00:00:00'); @@ -35,13 +35,13 @@ SHOW VARIABLES system_time_zone; select timezone(); -SELECT * from test; +SELECT * from test ORDER BY ts; -SELECT * from test where ts >= '2024-01-02 08:00:00'; +SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts; -SELECT * from test where ts <= '2024-01-03 16:00:00'; +SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts; -select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test; +select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts; select to_unixtime('2024-01-02 00:00:00'); @@ -56,13 +56,13 @@ SHOW VARIABLES system_time_zone; select timezone(); -SELECT * from test; +SELECT * from test ORDER BY ts; -SELECT * from test where ts >= '2024-01-02 08:00:00'; +SELECT * from test where ts >= '2024-01-02 08:00:00' ORDER BY ts; -SELECT * from test where ts <= '2024-01-03 16:00:00'; +SELECT * from test where ts <= '2024-01-03 16:00:00' ORDER BY ts; -select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test; +select date_format(ts, '%Y-%m-%d %H:%M:%S:%3f') from test ORDER BY ts; select to_unixtime('2024-01-02 00:00:00'); diff --git a/tests/cases/standalone/common/types/string/scan_big_varchar.result b/tests/cases/standalone/common/types/string/scan_big_varchar.result index d016ecc92a..4d9261d116 100644 --- a/tests/cases/standalone/common/types/string/scan_big_varchar.result +++ b/tests/cases/standalone/common/types/string/scan_big_varchar.result @@ -22,7 +22,7 @@ INSERT INTO test SELECT a||a||a||a||a||a||a||a||a||a, to_unixtime(ts) * 7 FROM t Affected Rows: 1 -- now create a second table, we only insert the big varchar string in there -CREATE TABLE bigtable (a VARCHAR, ts timestamp_s time index); +CREATE TABLE bigtable (a VARCHAR, ts timestamp_s time index) WITH ('compaction.type' = 'twcs', 'compaction.twcs.time_window'='1000000y'); Affected Rows: 0 @@ -174,6 +174,15 @@ SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; | 2048 | 2048 | 10000 | 20480000 | +----------+-------------------+-----------------------------------+-----------------------------------+ +-- SQLNESS ARG restart=true +SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; + ++----------+-------------------+-----------------------------------+-----------------------------------+ +| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | ++----------+-------------------+-----------------------------------+-----------------------------------+ +| 2048 | 2048 | 10000 | 20480000 | ++----------+-------------------+-----------------------------------+-----------------------------------+ + INSERT INTO bigtable SELECT a, to_unixtime(ts) * 67 FROM bigtable; Affected Rows: 2048 @@ -198,39 +207,6 @@ SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; | 8192 | 8192 | 10000 | 81920000 | +----------+-------------------+-----------------------------------+-----------------------------------+ --- SQLNESS ARG restart=true -SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; - -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 8192 | 8192 | 10000 | 81920000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ - -INSERT INTO bigtable SELECT a, to_unixtime(ts) * 73 FROM bigtable; - -Affected Rows: 8192 - -SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; - -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 16384 | 16384 | 10000 | 163840000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ - -INSERT INTO bigtable SELECT a, to_unixtime(ts) * 79 FROM bigtable; - -Affected Rows: 16384 - -SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; - -+----------+-------------------+-----------------------------------+-----------------------------------+ -| count(*) | count(bigtable.a) | max(character_length(bigtable.a)) | sum(character_length(bigtable.a)) | -+----------+-------------------+-----------------------------------+-----------------------------------+ -| 32768 | 32768 | 10000 | 327680000 | -+----------+-------------------+-----------------------------------+-----------------------------------+ - DROP TABLE test; Affected Rows: 0 diff --git a/tests/cases/standalone/common/types/string/scan_big_varchar.sql b/tests/cases/standalone/common/types/string/scan_big_varchar.sql index 1586fa230b..c03028358a 100644 --- a/tests/cases/standalone/common/types/string/scan_big_varchar.sql +++ b/tests/cases/standalone/common/types/string/scan_big_varchar.sql @@ -13,7 +13,7 @@ INSERT INTO test SELECT a||a||a||a||a||a||a||a||a||a, to_unixtime(ts) * 5 FROM t INSERT INTO test SELECT a||a||a||a||a||a||a||a||a||a, to_unixtime(ts) * 7 FROM test WHERE LENGTH(a)=(SELECT MAX(LENGTH(a)) FROM test); -- now create a second table, we only insert the big varchar string in there -CREATE TABLE bigtable (a VARCHAR, ts timestamp_s time index); +CREATE TABLE bigtable (a VARCHAR, ts timestamp_s time index) WITH ('compaction.type' = 'twcs', 'compaction.twcs.time_window'='1000000y'); INSERT INTO bigtable SELECT a, ts FROM test WHERE LENGTH(a)=(SELECT MAX(LENGTH(a)) FROM test); @@ -67,6 +67,8 @@ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 63 FROM bigtable; SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; +-- SQLNESS ARG restart=true +SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; INSERT INTO bigtable SELECT a, to_unixtime(ts) * 67 FROM bigtable; @@ -76,17 +78,6 @@ INSERT INTO bigtable SELECT a, to_unixtime(ts) * 71 FROM bigtable; SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; --- SQLNESS ARG restart=true -SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; - -INSERT INTO bigtable SELECT a, to_unixtime(ts) * 73 FROM bigtable; - -SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; - -INSERT INTO bigtable SELECT a, to_unixtime(ts) * 79 FROM bigtable; - -SELECT COUNT(*), COUNT(a), MAX(LENGTH(a)), SUM(LENGTH(a)) FROM bigtable; - DROP TABLE test;