Skip to main content

mito2/read/
last_row.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Utilities to read the last row of each time series.
16
17use std::sync::Arc;
18
19use async_trait::async_trait;
20use datatypes::arrow::array::{Array, BinaryArray};
21use datatypes::arrow::compute::concat_batches;
22use datatypes::arrow::record_batch::RecordBatch;
23use futures::{Stream, TryStreamExt};
24use snafu::ResultExt;
25use store_api::storage::{FileId, TimeSeriesRowSelector};
26
27use crate::cache::{
28    CacheStrategy, SelectorResult, SelectorResultKey, SelectorResultValue,
29    selector_result_cache_hit, selector_result_cache_miss,
30};
31use crate::error::{ComputeArrowSnafu, Result};
32use crate::read::{
33    Batch, BatchReader, BoxedBatchReader, BoxedRecordBatchStream, timestamp_array_to_i64_slice,
34};
35use crate::sst::parquet::DEFAULT_READ_BATCH_SIZE;
36use crate::sst::parquet::flat_format::{primary_key_column_index, time_index_column_index};
37use crate::sst::parquet::format::{PrimaryKeyArray, primary_key_offsets};
38use crate::sst::parquet::read_columns::ParquetReadColumns;
39use crate::sst::parquet::reader::FlatRowGroupReader;
40
41/// Reader to keep the last row for each time series.
42/// It assumes that batches from the input reader are
43/// - sorted
44/// - all deleted rows has been filtered.
45/// - not empty
46///
47/// This reader is different from the [MergeMode](crate::region::options::MergeMode) as
48/// it focus on time series (the same key).
49#[allow(dead_code)]
50pub(crate) struct LastRowReader {
51    /// Inner reader.
52    reader: BoxedBatchReader,
53    /// The last batch pending to return.
54    selector: LastRowSelector,
55}
56
57#[allow(dead_code)]
58impl LastRowReader {
59    /// Creates a new `LastRowReader`.
60    pub(crate) fn new(reader: BoxedBatchReader) -> Self {
61        Self {
62            reader,
63            selector: LastRowSelector::default(),
64        }
65    }
66
67    /// Returns the last row of the next key.
68    pub(crate) async fn next_last_row(&mut self) -> Result<Option<Batch>> {
69        while let Some(batch) = self.reader.next_batch().await? {
70            if let Some(yielded) = self.selector.on_next(batch) {
71                return Ok(Some(yielded));
72            }
73        }
74        Ok(self.selector.finish())
75    }
76}
77
78#[async_trait]
79impl BatchReader for LastRowReader {
80    async fn next_batch(&mut self) -> Result<Option<Batch>> {
81        self.next_last_row().await
82    }
83}
84
85/// Common struct that selects only the last row of each time series.
86#[derive(Default)]
87pub struct LastRowSelector {
88    last_batch: Option<Batch>,
89}
90
91impl LastRowSelector {
92    /// Handles next batch. Return the yielding batch if present.
93    pub fn on_next(&mut self, batch: Batch) -> Option<Batch> {
94        if let Some(last) = &self.last_batch {
95            if last.primary_key() == batch.primary_key() {
96                // Same key, update last batch.
97                self.last_batch = Some(batch);
98                None
99            } else {
100                // Different key, return the last row in `last` and update `last_batch` by
101                // current batch.
102                debug_assert!(!last.is_empty());
103                let last_row = last.slice(last.num_rows() - 1, 1);
104                self.last_batch = Some(batch);
105                Some(last_row)
106            }
107        } else {
108            self.last_batch = Some(batch);
109            None
110        }
111    }
112
113    /// Finishes the selector and returns the pending batch if any.
114    pub fn finish(&mut self) -> Option<Batch> {
115        if let Some(last) = self.last_batch.take() {
116            // This is the last key.
117            let last_row = last.slice(last.num_rows() - 1, 1);
118            return Some(last_row);
119        }
120        None
121    }
122}
123
124/// Cached last row reader for flat format row group.
125/// If the last rows are already cached (as flat `RecordBatch`), returns cached values.
126/// Otherwise, reads from the row group, selects last rows, and updates the cache.
127pub(crate) enum FlatRowGroupLastRowCachedReader {
128    /// Cache hit, reads last rows from cached value.
129    Hit(FlatLastRowCacheReader),
130    /// Cache miss, reads from row group reader and updates cache.
131    Miss(FlatRowGroupLastRowReader),
132}
133
134impl FlatRowGroupLastRowCachedReader {
135    pub(crate) fn new(
136        file_id: FileId,
137        row_group_idx: usize,
138        cache_strategy: CacheStrategy,
139        read_cols: &ParquetReadColumns,
140        reader: FlatRowGroupReader,
141    ) -> Self {
142        let key = SelectorResultKey {
143            file_id,
144            row_group_idx,
145            selector: TimeSeriesRowSelector::LastRow,
146        };
147
148        if let Some(value) = cache_strategy.get_selector_result(&key) {
149            let is_flat = matches!(&value.result, SelectorResult::Flat(_));
150            let schema_matches = value.read_cols == *read_cols;
151            if is_flat && schema_matches {
152                Self::new_hit(value)
153            } else {
154                Self::new_miss(key, read_cols, reader, cache_strategy)
155            }
156        } else {
157            Self::new_miss(key, read_cols, reader, cache_strategy)
158        }
159    }
160
161    /// Returns the next RecordBatch.
162    pub(crate) async fn next_batch(&mut self) -> Result<Option<RecordBatch>> {
163        match self {
164            FlatRowGroupLastRowCachedReader::Hit(r) => r.next_batch(),
165            FlatRowGroupLastRowCachedReader::Miss(r) => r.next_batch().await,
166        }
167    }
168
169    fn new_hit(value: Arc<SelectorResultValue>) -> Self {
170        selector_result_cache_hit();
171        Self::Hit(FlatLastRowCacheReader { value, idx: 0 })
172    }
173
174    fn new_miss(
175        key: SelectorResultKey,
176        read_cols: &ParquetReadColumns,
177        reader: FlatRowGroupReader,
178        cache_strategy: CacheStrategy,
179    ) -> Self {
180        selector_result_cache_miss();
181        Self::Miss(FlatRowGroupLastRowReader::new(
182            key,
183            read_cols.clone(),
184            reader,
185            cache_strategy,
186        ))
187    }
188}
189
190/// Iterates over cached flat last rows.
191pub(crate) struct FlatLastRowCacheReader {
192    value: Arc<SelectorResultValue>,
193    idx: usize,
194}
195
196impl FlatLastRowCacheReader {
197    fn next_batch(&mut self) -> Result<Option<RecordBatch>> {
198        let batches = match &self.value.result {
199            SelectorResult::Flat(batches) => batches,
200            SelectorResult::PrimaryKey(_) => unreachable!(),
201        };
202        if self.idx < batches.len() {
203            let res = Ok(Some(batches[self.idx].clone()));
204            self.idx += 1;
205            res
206        } else {
207            Ok(None)
208        }
209    }
210}
211
212/// Buffer that accumulates small `RecordBatch`es and tracks total row count.
213pub(crate) struct BatchBuffer {
214    batches: Vec<RecordBatch>,
215    num_rows: usize,
216}
217
218impl BatchBuffer {
219    fn new() -> Self {
220        Self {
221            batches: Vec::new(),
222            num_rows: 0,
223        }
224    }
225
226    /// Returns true if total buffered rows reaches `DEFAULT_READ_BATCH_SIZE`.
227    fn is_full(&self) -> bool {
228        self.num_rows >= DEFAULT_READ_BATCH_SIZE
229    }
230
231    /// Extends the buffer from a slice of batches.
232    fn extend_from_slice(&mut self, batches: &[RecordBatch]) {
233        for batch in batches {
234            self.num_rows += batch.num_rows();
235        }
236        self.batches.extend_from_slice(batches);
237    }
238
239    /// Returns true if the buffer has no batches.
240    fn is_empty(&self) -> bool {
241        self.batches.is_empty()
242    }
243
244    /// Concatenates all buffered batches into one, resets the buffer, and returns the result.
245    fn concat(&mut self) -> Result<RecordBatch> {
246        debug_assert!(!self.batches.is_empty());
247        let schema = self.batches[0].schema();
248        let merged = concat_batches(&schema, &self.batches).context(ComputeArrowSnafu)?;
249        self.batches.clear();
250        self.num_rows = 0;
251        Ok(merged)
252    }
253}
254
255/// Reads last rows from a flat format row group and caches the results.
256pub(crate) struct FlatRowGroupLastRowReader {
257    key: SelectorResultKey,
258    reader: FlatRowGroupReader,
259    selector: FlatLastTimestampSelector,
260    yielded_batches: Vec<RecordBatch>,
261    cache_strategy: CacheStrategy,
262    read_cols: ParquetReadColumns,
263    /// Accumulates small selector-output batches before concatenating.
264    pending: BatchBuffer,
265}
266
267impl FlatRowGroupLastRowReader {
268    fn new(
269        key: SelectorResultKey,
270        read_cols: ParquetReadColumns,
271        reader: FlatRowGroupReader,
272        cache_strategy: CacheStrategy,
273    ) -> Self {
274        Self {
275            key,
276            reader,
277            selector: FlatLastTimestampSelector::default(),
278            yielded_batches: vec![],
279            cache_strategy,
280            read_cols,
281            pending: BatchBuffer::new(),
282        }
283    }
284
285    /// Concatenates pending batches and records the result in `yielded_batches`.
286    fn flush_pending(&mut self) -> Result<Option<RecordBatch>> {
287        if self.pending.is_empty() {
288            return Ok(None);
289        }
290        let merged = self.pending.concat()?;
291        self.yielded_batches.push(merged.clone());
292        Ok(Some(merged))
293    }
294
295    async fn next_batch(&mut self) -> Result<Option<RecordBatch>> {
296        if self.pending.is_full() {
297            return self.flush_pending();
298        }
299
300        while let Some(batch) = self.reader.next_batch().await? {
301            self.selector.on_next(batch, &mut self.pending)?;
302            if self.pending.is_full() {
303                return self.flush_pending();
304            }
305        }
306
307        // Reader exhausted — flush remaining selector state.
308        self.selector.finish(&mut self.pending)?;
309        if !self.pending.is_empty() {
310            let result = self.flush_pending();
311            // All last rows in row group are yielded, update cache.
312            self.maybe_update_cache();
313            return result;
314        }
315
316        // All last rows in row group are yielded, update cache.
317        self.maybe_update_cache();
318        Ok(None)
319    }
320
321    fn maybe_update_cache(&mut self) {
322        if self.yielded_batches.is_empty() {
323            return;
324        }
325        let batches = std::mem::take(&mut self.yielded_batches);
326        let value = Arc::new(SelectorResultValue::new_flat(
327            batches,
328            self.read_cols.clone(),
329        ));
330        self.cache_strategy.put_selector_result(self.key, value);
331    }
332}
333
334/// Selects the last-timestamp rows per primary key from flat `RecordBatch`.
335///
336/// Assumes that input batches are sorted by primary key then by timestamp,
337/// and contain only PUT operations (no DELETE).
338#[derive(Default)]
339pub(crate) struct FlatLastTimestampSelector {
340    /// State for the currently in-progress primary key.
341    current_key: Option<LastKeyState>,
342}
343
344#[derive(Debug)]
345struct LastKeyState {
346    key: Vec<u8>,
347    last_timestamp: i64,
348    slices: Vec<RecordBatch>,
349}
350
351impl LastKeyState {
352    fn new(key: Vec<u8>, last_timestamp: i64, first_slice: RecordBatch) -> Self {
353        Self {
354            key,
355            last_timestamp,
356            slices: vec![first_slice],
357        }
358    }
359}
360
361impl FlatLastTimestampSelector {
362    /// Processes the next batch and appends completed-key results into `output_buffer`.
363    pub(crate) fn on_next(
364        &mut self,
365        batch: RecordBatch,
366        output_buffer: &mut BatchBuffer,
367    ) -> Result<()> {
368        if batch.num_rows() == 0 {
369            return Ok(());
370        }
371
372        let num_columns = batch.num_columns();
373        let pk_col_idx = primary_key_column_index(num_columns);
374        let ts_col_idx = time_index_column_index(num_columns);
375
376        let pk_array = batch
377            .column(pk_col_idx)
378            .as_any()
379            .downcast_ref::<PrimaryKeyArray>()
380            .unwrap();
381        let offsets = primary_key_offsets(pk_array)?;
382        if offsets.is_empty() {
383            return Ok(());
384        }
385
386        let ts_values = timestamp_array_to_i64_slice(batch.column(ts_col_idx));
387        for i in 0..offsets.len() - 1 {
388            let range_start = offsets[i];
389            let range_end = offsets[i + 1];
390            let range_key = primary_key_bytes_at(&batch, pk_col_idx, range_start);
391            let range_last_ts = ts_values[range_end - 1];
392            let range_last_ts_start = last_timestamp_start(ts_values, range_start, range_end);
393            let range_slice = batch.slice(range_last_ts_start, range_end - range_last_ts_start);
394
395            match self.current_key.as_mut() {
396                Some(state) if state.key.as_slice() == range_key => {
397                    if range_last_ts > state.last_timestamp {
398                        state.last_timestamp = range_last_ts;
399                        state.slices.clear();
400                        state.slices.push(range_slice);
401                    } else if range_last_ts == state.last_timestamp {
402                        state.slices.push(range_slice);
403                    }
404                }
405                Some(_) => {
406                    self.flush_current_key(output_buffer);
407                    self.current_key = Some(LastKeyState::new(
408                        range_key.to_vec(),
409                        range_last_ts,
410                        range_slice,
411                    ));
412                }
413                None => {
414                    self.current_key = Some(LastKeyState::new(
415                        range_key.to_vec(),
416                        range_last_ts,
417                        range_slice,
418                    ));
419                }
420            }
421        }
422
423        Ok(())
424    }
425
426    /// Finishes the selector and appends remaining results into `output_buffer`.
427    pub(crate) fn finish(&mut self, output_buffer: &mut BatchBuffer) -> Result<()> {
428        self.flush_current_key(output_buffer);
429        Ok(())
430    }
431
432    fn flush_current_key(&mut self, output_buffer: &mut BatchBuffer) {
433        let Some(state) = self.current_key.take() else {
434            return;
435        };
436        output_buffer.extend_from_slice(&state.slices);
437    }
438}
439
440/// Reader that keeps only the last row of each time series from a flat RecordBatch stream.
441/// Assumes input is sorted, deduped, and contains no delete operations.
442pub(crate) struct FlatLastRowReader {
443    stream: BoxedRecordBatchStream,
444    selector: FlatLastTimestampSelector,
445    pending: BatchBuffer,
446}
447
448impl FlatLastRowReader {
449    /// Creates a new `FlatLastRowReader`.
450    pub(crate) fn new(stream: BoxedRecordBatchStream) -> Self {
451        Self {
452            stream,
453            selector: FlatLastTimestampSelector::default(),
454            pending: BatchBuffer::new(),
455        }
456    }
457
458    /// Converts the reader into a stream of RecordBatches.
459    pub(crate) fn into_stream(mut self) -> impl Stream<Item = Result<RecordBatch>> {
460        async_stream::try_stream! {
461            while let Some(batch) = self.stream.try_next().await? {
462                self.selector.on_next(batch, &mut self.pending)?;
463                if self.pending.is_full() {
464                    yield self.pending.concat()?;
465                }
466            }
467            self.selector.finish(&mut self.pending)?;
468            if !self.pending.is_empty() {
469                yield self.pending.concat()?;
470            }
471        }
472    }
473}
474
475/// Gets the primary key bytes at `index` from the primary key dictionary column.
476fn primary_key_bytes_at(batch: &RecordBatch, pk_col_idx: usize, index: usize) -> &[u8] {
477    let pk_dict = batch
478        .column(pk_col_idx)
479        .as_any()
480        .downcast_ref::<PrimaryKeyArray>()
481        .unwrap();
482    let key = pk_dict.keys().value(index);
483    let binary_values = pk_dict
484        .values()
485        .as_any()
486        .downcast_ref::<BinaryArray>()
487        .unwrap();
488    binary_values.value(key as usize)
489}
490
491/// Finds the start index of rows sharing the last (maximum) timestamp
492/// within the range `[range_start, range_end)`.
493fn last_timestamp_start(ts_values: &[i64], range_start: usize, range_end: usize) -> usize {
494    debug_assert!(range_start < range_end);
495
496    let last_ts = ts_values[range_end - 1];
497    let mut start = range_end - 1;
498    while start > range_start && ts_values[start - 1] == last_ts {
499        start -= 1;
500    }
501    start
502}
503
504#[cfg(test)]
505mod tests {
506    use std::sync::Arc;
507
508    use api::v1::OpType;
509    use datatypes::arrow::array::{
510        ArrayRef, BinaryDictionaryBuilder, Int64Array, TimestampMillisecondArray, UInt8Array,
511        UInt64Array,
512    };
513    use datatypes::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit, UInt32Type};
514    use datatypes::arrow::record_batch::RecordBatch;
515
516    use super::*;
517    use crate::test_util::{VecBatchReader, check_reader_result, new_batch};
518
519    #[tokio::test]
520    async fn test_last_row_one_batch() {
521        let input = [new_batch(
522            b"k1",
523            &[1, 2],
524            &[11, 11],
525            &[OpType::Put, OpType::Put],
526            &[21, 22],
527        )];
528        let reader = VecBatchReader::new(&input);
529        let mut reader = LastRowReader::new(Box::new(reader));
530        check_reader_result(
531            &mut reader,
532            &[new_batch(b"k1", &[2], &[11], &[OpType::Put], &[22])],
533        )
534        .await;
535
536        // Only one row.
537        let input = [new_batch(b"k1", &[1], &[11], &[OpType::Put], &[21])];
538        let reader = VecBatchReader::new(&input);
539        let mut reader = LastRowReader::new(Box::new(reader));
540        check_reader_result(
541            &mut reader,
542            &[new_batch(b"k1", &[1], &[11], &[OpType::Put], &[21])],
543        )
544        .await;
545    }
546
547    #[tokio::test]
548    async fn test_last_row_multi_batch() {
549        let input = [
550            new_batch(
551                b"k1",
552                &[1, 2],
553                &[11, 11],
554                &[OpType::Put, OpType::Put],
555                &[21, 22],
556            ),
557            new_batch(
558                b"k1",
559                &[3, 4],
560                &[11, 11],
561                &[OpType::Put, OpType::Put],
562                &[23, 24],
563            ),
564            new_batch(
565                b"k2",
566                &[1, 2],
567                &[11, 11],
568                &[OpType::Put, OpType::Put],
569                &[31, 32],
570            ),
571        ];
572        let reader = VecBatchReader::new(&input);
573        let mut reader = LastRowReader::new(Box::new(reader));
574        check_reader_result(
575            &mut reader,
576            &[
577                new_batch(b"k1", &[4], &[11], &[OpType::Put], &[24]),
578                new_batch(b"k2", &[2], &[11], &[OpType::Put], &[32]),
579            ],
580        )
581        .await;
582    }
583
584    /// Helper to build a flat format RecordBatch for testing.
585    fn new_flat_batch(primary_keys: &[&[u8]], timestamps: &[i64], fields: &[i64]) -> RecordBatch {
586        let num_rows = timestamps.len();
587        assert_eq!(primary_keys.len(), num_rows);
588        assert_eq!(fields.len(), num_rows);
589
590        let columns: Vec<ArrayRef> = vec![
591            // field0 column
592            Arc::new(Int64Array::from_iter_values(fields.iter().copied())),
593            // ts column (time index)
594            Arc::new(TimestampMillisecondArray::from_iter_values(
595                timestamps.iter().copied(),
596            )),
597            // __primary_key column (dictionary(uint32, binary))
598            {
599                let mut builder = BinaryDictionaryBuilder::<UInt32Type>::new();
600                for &pk in primary_keys {
601                    builder.append(pk).unwrap();
602                }
603                Arc::new(builder.finish())
604            },
605            // __sequence column
606            Arc::new(UInt64Array::from_iter_values(vec![1u64; num_rows])),
607            // __op_type column
608            Arc::new(UInt8Array::from_iter_values(vec![1u8; num_rows])),
609        ];
610
611        RecordBatch::try_new(test_flat_schema(), columns).unwrap()
612    }
613
614    fn test_flat_schema() -> SchemaRef {
615        let fields = vec![
616            Field::new("field0", DataType::Int64, false),
617            Field::new(
618                "ts",
619                DataType::Timestamp(TimeUnit::Millisecond, None),
620                false,
621            ),
622            Field::new(
623                "__primary_key",
624                DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Binary)),
625                false,
626            ),
627            Field::new("__sequence", DataType::UInt64, false),
628            Field::new("__op_type", DataType::UInt8, false),
629        ];
630        Arc::new(Schema::new(fields))
631    }
632
633    /// Collects all rows from the selector across all result batches.
634    fn collect_flat_results(
635        selector: &mut FlatLastTimestampSelector,
636        batches: Vec<RecordBatch>,
637    ) -> Vec<(Vec<u8>, i64)> {
638        let mut output_buffer = BatchBuffer::new();
639        let mut results = Vec::new();
640        for batch in batches {
641            selector.on_next(batch, &mut output_buffer).unwrap();
642            for r in output_buffer.batches.drain(..) {
643                extract_flat_rows(&r, &mut results);
644            }
645            output_buffer.num_rows = 0;
646        }
647        selector.finish(&mut output_buffer).unwrap();
648        for r in output_buffer.batches.drain(..) {
649            extract_flat_rows(&r, &mut results);
650        }
651        results
652    }
653
654    /// Extracts (primary_key, timestamp) pairs from a result batch.
655    fn extract_flat_rows(batch: &RecordBatch, out: &mut Vec<(Vec<u8>, i64)>) {
656        let ts_col = batch
657            .column(1)
658            .as_any()
659            .downcast_ref::<TimestampMillisecondArray>()
660            .unwrap();
661        let pk_col = batch
662            .column(2)
663            .as_any()
664            .downcast_ref::<PrimaryKeyArray>()
665            .unwrap();
666        let binary_values = pk_col
667            .values()
668            .as_any()
669            .downcast_ref::<BinaryArray>()
670            .unwrap();
671
672        for i in 0..batch.num_rows() {
673            let key_idx = pk_col.keys().value(i);
674            let pk = binary_values.value(key_idx as usize).to_vec();
675            let ts = ts_col.value(i);
676            out.push((pk, ts));
677        }
678    }
679
680    #[test]
681    fn test_flat_single_batch_one_key() {
682        let mut selector = FlatLastTimestampSelector::default();
683        let batch = new_flat_batch(&[b"k1", b"k1", b"k1"], &[1, 2, 3], &[10, 20, 30]);
684        let results = collect_flat_results(&mut selector, vec![batch]);
685        assert_eq!(vec![(b"k1".to_vec(), 3)], results);
686    }
687
688    #[test]
689    fn test_flat_single_batch_multiple_keys() {
690        let mut selector = FlatLastTimestampSelector::default();
691        let batch = new_flat_batch(
692            &[b"k1", b"k1", b"k2", b"k2", b"k3"],
693            &[1, 2, 3, 4, 5],
694            &[10, 20, 30, 40, 50],
695        );
696        let results = collect_flat_results(&mut selector, vec![batch]);
697        assert_eq!(
698            vec![
699                (b"k1".to_vec(), 2),
700                (b"k2".to_vec(), 4),
701                (b"k3".to_vec(), 5),
702            ],
703            results
704        );
705    }
706
707    #[test]
708    fn test_flat_key_spans_batches() {
709        let mut selector = FlatLastTimestampSelector::default();
710        let batches = vec![
711            new_flat_batch(&[b"k1", b"k1"], &[1, 2], &[10, 20]),
712            new_flat_batch(&[b"k1", b"k2"], &[3, 4], &[30, 40]),
713            new_flat_batch(&[b"k2", b"k3"], &[5, 6], &[50, 60]),
714        ];
715        let results = collect_flat_results(&mut selector, batches);
716        assert_eq!(
717            vec![
718                (b"k1".to_vec(), 3),
719                (b"k2".to_vec(), 5),
720                (b"k3".to_vec(), 6),
721            ],
722            results
723        );
724    }
725
726    #[test]
727    fn test_flat_duplicate_last_timestamps() {
728        let mut selector = FlatLastTimestampSelector::default();
729        // k1 has two rows with the same last timestamp (3).
730        let batch = new_flat_batch(
731            &[b"k1", b"k1", b"k1", b"k2"],
732            &[1, 3, 3, 5],
733            &[10, 20, 30, 40],
734        );
735        let results = collect_flat_results(&mut selector, vec![batch]);
736        assert_eq!(
737            vec![
738                (b"k1".to_vec(), 3),
739                (b"k1".to_vec(), 3),
740                (b"k2".to_vec(), 5),
741            ],
742            results
743        );
744    }
745
746    #[test]
747    fn test_flat_duplicate_last_timestamps_across_batches() {
748        let mut selector = FlatLastTimestampSelector::default();
749        // k1's last timestamp (3) spans two batches.
750        let batches = vec![
751            new_flat_batch(&[b"k1", b"k1"], &[1, 3], &[10, 20]),
752            new_flat_batch(&[b"k1", b"k2"], &[3, 5], &[30, 40]),
753        ];
754        let results = collect_flat_results(&mut selector, batches);
755        assert_eq!(
756            vec![
757                (b"k1".to_vec(), 3),
758                (b"k1".to_vec(), 3),
759                (b"k2".to_vec(), 5),
760            ],
761            results
762        );
763    }
764
765    #[test]
766    fn test_flat_pending_chain_dropped_by_higher_timestamp() {
767        let mut selector = FlatLastTimestampSelector::default();
768        let batches = vec![
769            new_flat_batch(&[b"k1", b"k1"], &[1, 3], &[10, 20]),
770            new_flat_batch(&[b"k1", b"k1"], &[3, 3], &[21, 22]),
771            new_flat_batch(&[b"k1", b"k1"], &[4, 4], &[23, 24]),
772        ];
773        let results = collect_flat_results(&mut selector, batches);
774        assert_eq!(vec![(b"k1".to_vec(), 4), (b"k1".to_vec(), 4)], results);
775    }
776
777    #[test]
778    fn test_flat_finish_is_one_shot() {
779        let mut selector = FlatLastTimestampSelector::default();
780        let batch = new_flat_batch(&[b"k1", b"k1", b"k2"], &[1, 2, 3], &[10, 20, 30]);
781        let mut output_buffer = BatchBuffer::new();
782
783        // Feed one batch: completed keys can be emitted before EOF.
784        selector.on_next(batch, &mut output_buffer).unwrap();
785        let mut pre_finish = Vec::new();
786        for r in output_buffer.batches.drain(..) {
787            extract_flat_rows(&r, &mut pre_finish);
788        }
789        output_buffer.num_rows = 0;
790        assert_eq!(vec![(b"k1".to_vec(), 2)], pre_finish);
791
792        // Simulate EOF by calling finish().
793        selector.finish(&mut output_buffer).unwrap();
794        assert!(!output_buffer.is_empty());
795        output_buffer.batches.clear();
796        output_buffer.num_rows = 0;
797
798        // A second finish after EOF should not yield any more rows.
799        selector.finish(&mut output_buffer).unwrap();
800        assert!(output_buffer.is_empty());
801    }
802}