mito2/memtable/
simple_bulk_memtable.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#[cfg(any(test, feature = "test"))]
16mod test_only;
17
18use std::collections::HashSet;
19use std::fmt::{Debug, Formatter};
20use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering};
21use std::sync::{Arc, RwLock};
22use std::time::{Duration, Instant};
23
24use api::v1::OpType;
25use datatypes::vectors::Helper;
26use mito_codec::key_values::KeyValue;
27use rayon::prelude::*;
28use snafu::{OptionExt, ResultExt};
29use store_api::metadata::RegionMetadataRef;
30use store_api::storage::ColumnId;
31
32use crate::flush::WriteBufferManagerRef;
33use crate::memtable::bulk::part::BulkPart;
34use crate::memtable::stats::WriteMetrics;
35use crate::memtable::time_series::Series;
36use crate::memtable::{
37    AllocTracker, BatchToRecordBatchContext, BoxedBatchIterator, IterBuilder, KeyValues,
38    MemScanMetrics, Memtable, MemtableId, MemtableRange, MemtableRangeContext, MemtableRanges,
39    MemtableRef, MemtableStats, RangesOptions, read_column_ids_from_projection,
40};
41use crate::metrics::MEMTABLE_ACTIVE_SERIES_COUNT;
42use crate::read::Batch;
43use crate::read::dedup::LastNonNullIter;
44use crate::region::options::MergeMode;
45use crate::{error, metrics};
46
47pub struct SimpleBulkMemtable {
48    id: MemtableId,
49    region_metadata: RegionMetadataRef,
50    alloc_tracker: AllocTracker,
51    max_timestamp: AtomicI64,
52    min_timestamp: AtomicI64,
53    max_sequence: AtomicU64,
54    dedup: bool,
55    merge_mode: MergeMode,
56    num_rows: AtomicUsize,
57    series: RwLock<Series>,
58}
59
60impl Drop for SimpleBulkMemtable {
61    fn drop(&mut self) {
62        MEMTABLE_ACTIVE_SERIES_COUNT.dec();
63    }
64}
65
66impl SimpleBulkMemtable {
67    pub fn new(
68        id: MemtableId,
69        region_metadata: RegionMetadataRef,
70        write_buffer_manager: Option<WriteBufferManagerRef>,
71        dedup: bool,
72        merge_mode: MergeMode,
73    ) -> Self {
74        let series = RwLock::new(Series::with_capacity(&region_metadata, 1024, 8192));
75
76        Self {
77            id,
78            region_metadata,
79            alloc_tracker: AllocTracker::new(write_buffer_manager),
80            max_timestamp: AtomicI64::new(i64::MIN),
81            min_timestamp: AtomicI64::new(i64::MAX),
82            max_sequence: AtomicU64::new(0),
83            dedup,
84            merge_mode,
85            num_rows: AtomicUsize::new(0),
86            series,
87        }
88    }
89
90    fn build_projection(&self, projection: Option<&[ColumnId]>) -> HashSet<ColumnId> {
91        if let Some(projection) = projection {
92            projection.iter().copied().collect()
93        } else {
94            self.region_metadata
95                .field_columns()
96                .map(|c| c.column_id)
97                .collect()
98        }
99    }
100
101    fn write_key_value(&self, kv: KeyValue, stats: &mut WriteMetrics) {
102        let ts = kv.timestamp();
103        let sequence = kv.sequence();
104        let op_type = kv.op_type();
105        let mut series = self.series.write().unwrap();
106        let size = series.push(ts, sequence, op_type, kv.fields());
107        stats.value_bytes += size;
108        // safety: timestamp of kv must be both present and a valid timestamp value.
109        let ts = kv
110            .timestamp()
111            .try_into_timestamp()
112            .unwrap()
113            .unwrap()
114            .value();
115        stats.min_ts = stats.min_ts.min(ts);
116        stats.max_ts = stats.max_ts.max(ts);
117    }
118
119    /// Updates memtable stats.
120    fn update_stats(&self, stats: WriteMetrics) {
121        self.alloc_tracker
122            .on_allocation(stats.key_bytes + stats.value_bytes);
123        self.num_rows.fetch_add(stats.num_rows, Ordering::SeqCst);
124        self.max_timestamp.fetch_max(stats.max_ts, Ordering::SeqCst);
125        self.min_timestamp.fetch_min(stats.min_ts, Ordering::SeqCst);
126        self.max_sequence
127            .fetch_max(stats.max_sequence, Ordering::SeqCst);
128    }
129
130    #[cfg(test)]
131    fn schema(&self) -> &RegionMetadataRef {
132        &self.region_metadata
133    }
134}
135
136impl Debug for SimpleBulkMemtable {
137    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
138        f.debug_struct("SimpleBulkMemtable").finish()
139    }
140}
141
142impl Memtable for SimpleBulkMemtable {
143    fn id(&self) -> MemtableId {
144        self.id
145    }
146
147    fn write(&self, kvs: &KeyValues) -> error::Result<()> {
148        let mut stats = WriteMetrics::default();
149        let max_sequence = kvs.max_sequence();
150        for kv in kvs.iter() {
151            self.write_key_value(kv, &mut stats);
152        }
153        stats.max_sequence = max_sequence;
154        stats.num_rows = kvs.num_rows();
155        self.update_stats(stats);
156        Ok(())
157    }
158
159    fn write_one(&self, kv: KeyValue) -> error::Result<()> {
160        debug_assert_eq!(0, kv.num_primary_keys());
161        let mut stats = WriteMetrics::default();
162        self.write_key_value(kv, &mut stats);
163        stats.num_rows = 1;
164        stats.max_sequence = kv.sequence();
165        self.update_stats(stats);
166        Ok(())
167    }
168
169    fn write_bulk(&self, part: BulkPart) -> error::Result<()> {
170        let rb = &part.batch;
171
172        let ts = Helper::try_into_vector(
173            rb.column_by_name(&self.region_metadata.time_index_column().column_schema.name)
174                .with_context(|| error::InvalidRequestSnafu {
175                    region_id: self.region_metadata.region_id,
176                    reason: "Timestamp not found",
177                })?,
178        )
179        .context(error::ConvertVectorSnafu)?;
180
181        let sequence = part.sequence;
182
183        let fields: Vec<_> = self
184            .region_metadata
185            .field_columns()
186            .map(|f| {
187                let array = rb.column_by_name(&f.column_schema.name).ok_or_else(|| {
188                    error::InvalidRequestSnafu {
189                        region_id: self.region_metadata.region_id,
190                        reason: format!("Column {} not found", f.column_schema.name),
191                    }
192                    .build()
193                })?;
194                Helper::try_into_vector(array).context(error::ConvertVectorSnafu)
195            })
196            .collect::<error::Result<Vec<_>>>()?;
197
198        let mut series = self.series.write().unwrap();
199        let extend_timer = metrics::REGION_WORKER_HANDLE_WRITE_ELAPSED
200            .with_label_values(&["bulk_extend"])
201            .start_timer();
202        series.extend(ts, OpType::Put as u8, sequence, fields)?;
203        extend_timer.observe_duration();
204
205        self.update_stats(WriteMetrics {
206            key_bytes: 0,
207            value_bytes: part.estimated_size(),
208            min_ts: part.min_timestamp,
209            max_ts: part.max_timestamp,
210            num_rows: part.num_rows(),
211            max_sequence: sequence,
212        });
213        Ok(())
214    }
215
216    #[cfg(any(test, feature = "test"))]
217    fn iter(
218        &self,
219        projection: Option<&[ColumnId]>,
220        _predicate: Option<table::predicate::Predicate>,
221        sequence: Option<store_api::storage::SequenceRange>,
222    ) -> error::Result<BoxedBatchIterator> {
223        let iter = self.create_iter(projection, sequence)?.build(None)?;
224        if self.merge_mode == MergeMode::LastNonNull {
225            let iter = LastNonNullIter::new(iter);
226            Ok(Box::new(iter))
227        } else {
228            Ok(Box::new(iter))
229        }
230    }
231
232    fn ranges(
233        &self,
234        projection: Option<&[ColumnId]>,
235        options: RangesOptions,
236    ) -> error::Result<MemtableRanges> {
237        let predicate = options.predicate;
238        let sequence = options.sequence;
239        let start_time = Instant::now();
240        let read_column_ids = read_column_ids_from_projection(&self.region_metadata, projection);
241        let projection = Arc::new(self.build_projection(projection));
242
243        // Use the memtable's overall time range and max sequence for all ranges
244        let max_sequence = self.max_sequence.load(Ordering::Relaxed);
245        let time_range = {
246            let num_rows = self.num_rows.load(Ordering::Relaxed);
247            if num_rows > 0 {
248                let ts_type = self.region_metadata.time_index_type();
249                let max_timestamp =
250                    ts_type.create_timestamp(self.max_timestamp.load(Ordering::Relaxed));
251                let min_timestamp =
252                    ts_type.create_timestamp(self.min_timestamp.load(Ordering::Relaxed));
253                Some((min_timestamp, max_timestamp))
254            } else {
255                None
256            }
257        };
258
259        let values = self.series.read().unwrap().read_to_values();
260        let batch_to_record_batch = Arc::new(BatchToRecordBatchContext::new(
261            self.region_metadata.clone(),
262            read_column_ids.clone(),
263        ));
264
265        let contexts = values
266            .into_par_iter()
267            .filter_map(|v| {
268                let filtered = match v.to_batch(
269                    &[],
270                    &self.region_metadata,
271                    &projection,
272                    sequence,
273                    self.dedup,
274                    self.merge_mode,
275                ) {
276                    Ok(filtered) => filtered,
277                    Err(e) => {
278                        return Some(Err(e));
279                    }
280                };
281                if filtered.is_empty() {
282                    None
283                } else {
284                    Some(Ok(filtered))
285                }
286            })
287            .map(|result| {
288                result.map(|batch| {
289                    let num_rows = batch.num_rows();
290                    let estimated_bytes = batch.memory_size();
291
292                    let range_stats = MemtableStats {
293                        estimated_bytes,
294                        time_range,
295                        num_rows,
296                        num_ranges: 1,
297                        max_sequence,
298                        series_count: 1,
299                    };
300
301                    let builder = BatchRangeBuilder {
302                        batch,
303                        merge_mode: self.merge_mode,
304                        scan_cost: start_time.elapsed(),
305                    };
306                    (
307                        range_stats,
308                        Arc::new(MemtableRangeContext::new_with_batch_to_record_batch(
309                            self.id,
310                            Box::new(builder),
311                            predicate.clone(),
312                            Some(batch_to_record_batch.clone()),
313                        )),
314                    )
315                })
316            })
317            .collect::<error::Result<Vec<_>>>()?;
318
319        let ranges = contexts
320            .into_iter()
321            .enumerate()
322            .map(|(idx, (range_stats, context))| (idx, MemtableRange::new(context, range_stats)))
323            .collect();
324
325        Ok(MemtableRanges { ranges })
326    }
327
328    fn is_empty(&self) -> bool {
329        self.series.read().unwrap().is_empty()
330    }
331
332    fn freeze(&self) -> error::Result<()> {
333        self.series.write().unwrap().freeze(&self.region_metadata);
334        Ok(())
335    }
336
337    fn stats(&self) -> MemtableStats {
338        let estimated_bytes = self.alloc_tracker.bytes_allocated();
339        let num_rows = self.num_rows.load(Ordering::Relaxed);
340        if num_rows == 0 {
341            // no rows ever written
342            return MemtableStats {
343                estimated_bytes,
344                time_range: None,
345                num_rows: 0,
346                num_ranges: 0,
347                max_sequence: 0,
348                series_count: 0,
349            };
350        }
351        let ts_type = self.region_metadata.time_index_type();
352        let max_timestamp = ts_type.create_timestamp(self.max_timestamp.load(Ordering::Relaxed));
353        let min_timestamp = ts_type.create_timestamp(self.min_timestamp.load(Ordering::Relaxed));
354        MemtableStats {
355            estimated_bytes,
356            time_range: Some((min_timestamp, max_timestamp)),
357            num_rows,
358            num_ranges: 1,
359            max_sequence: self.max_sequence.load(Ordering::Relaxed),
360            series_count: 1,
361        }
362    }
363
364    fn fork(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef {
365        Arc::new(Self::new(
366            id,
367            metadata.clone(),
368            self.alloc_tracker.write_buffer_manager(),
369            self.dedup,
370            self.merge_mode,
371        ))
372    }
373}
374
375#[derive(Clone)]
376pub struct BatchRangeBuilder {
377    pub batch: Batch,
378    pub merge_mode: MergeMode,
379    scan_cost: Duration,
380}
381
382impl IterBuilder for BatchRangeBuilder {
383    fn build(&self, metrics: Option<MemScanMetrics>) -> error::Result<BoxedBatchIterator> {
384        let batch = self.batch.clone();
385        if let Some(metrics) = metrics {
386            let inner = crate::memtable::MemScanMetricsData {
387                total_series: 1,
388                num_rows: batch.num_rows(),
389                num_batches: 1,
390                scan_cost: self.scan_cost,
391            };
392            metrics.merge_inner(&inner);
393        }
394
395        let iter = Iter {
396            batch: Some(Ok(batch)),
397        };
398
399        if self.merge_mode == MergeMode::LastNonNull {
400            Ok(Box::new(LastNonNullIter::new(iter)))
401        } else {
402            Ok(Box::new(iter))
403        }
404    }
405}
406
407struct Iter {
408    batch: Option<error::Result<Batch>>,
409}
410
411impl Iterator for Iter {
412    type Item = error::Result<Batch>;
413
414    fn next(&mut self) -> Option<Self::Item> {
415        self.batch.take()
416    }
417}
418
419#[cfg(test)]
420mod tests {
421    use std::sync::Arc;
422
423    use api::v1::helper::row;
424    use api::v1::value::ValueData;
425    use api::v1::{Mutation, OpType, Rows, SemanticType};
426    use common_recordbatch::DfRecordBatch;
427    use common_time::Timestamp;
428    use datatypes::arrow::array::{ArrayRef, Float64Array, RecordBatch, TimestampMillisecondArray};
429    use datatypes::arrow_array::StringArray;
430    use datatypes::data_type::ConcreteDataType;
431    use datatypes::prelude::{ScalarVector, Vector};
432    use datatypes::schema::ColumnSchema;
433    use datatypes::value::Value;
434    use datatypes::vectors::TimestampMillisecondVector;
435    use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
436    use store_api::storage::{RegionId, SequenceNumber, SequenceRange};
437
438    use super::*;
439    use crate::read;
440    use crate::read::dedup::DedupReader;
441    use crate::read::merge::MergeReaderBuilder;
442    use crate::read::{BatchReader, Source};
443    use crate::region::options::MergeMode;
444    use crate::test_util::column_metadata_to_column_schema;
445
446    fn new_test_metadata() -> RegionMetadataRef {
447        let mut builder = RegionMetadataBuilder::new(1.into());
448        builder
449            .push_column_metadata(ColumnMetadata {
450                column_schema: ColumnSchema::new(
451                    "ts",
452                    ConcreteDataType::timestamp_millisecond_datatype(),
453                    false,
454                ),
455                semantic_type: SemanticType::Timestamp,
456                column_id: 1,
457            })
458            .push_column_metadata(ColumnMetadata {
459                column_schema: ColumnSchema::new("f1", ConcreteDataType::float64_datatype(), true),
460                semantic_type: SemanticType::Field,
461                column_id: 2,
462            })
463            .push_column_metadata(ColumnMetadata {
464                column_schema: ColumnSchema::new("f2", ConcreteDataType::string_datatype(), true),
465                semantic_type: SemanticType::Field,
466                column_id: 3,
467            });
468        Arc::new(builder.build().unwrap())
469    }
470
471    fn new_test_memtable(dedup: bool, merge_mode: MergeMode) -> SimpleBulkMemtable {
472        SimpleBulkMemtable::new(1, new_test_metadata(), None, dedup, merge_mode)
473    }
474
475    fn build_key_values(
476        metadata: &RegionMetadataRef,
477        sequence: SequenceNumber,
478        row_values: &[(i64, f64, String)],
479        op_type: OpType,
480    ) -> KeyValues {
481        let column_schemas: Vec<_> = metadata
482            .column_metadatas
483            .iter()
484            .map(column_metadata_to_column_schema)
485            .collect();
486
487        let rows: Vec<_> = row_values
488            .iter()
489            .map(|(ts, f1, f2)| {
490                row(vec![
491                    ValueData::TimestampMillisecondValue(*ts),
492                    ValueData::F64Value(*f1),
493                    ValueData::StringValue(f2.clone()),
494                ])
495            })
496            .collect();
497        let mutation = Mutation {
498            op_type: op_type as i32,
499            sequence,
500            rows: Some(Rows {
501                schema: column_schemas,
502                rows,
503            }),
504            write_hint: None,
505        };
506        KeyValues::new(metadata, mutation).unwrap()
507    }
508
509    #[test]
510    fn test_write_and_iter() {
511        let memtable = new_test_memtable(false, MergeMode::LastRow);
512        memtable
513            .write(&build_key_values(
514                &memtable.region_metadata,
515                0,
516                &[(1, 1.0, "a".to_string())],
517                OpType::Put,
518            ))
519            .unwrap();
520        memtable
521            .write(&build_key_values(
522                &memtable.region_metadata,
523                1,
524                &[(2, 2.0, "b".to_string())],
525                OpType::Put,
526            ))
527            .unwrap();
528
529        let mut iter = memtable.iter(None, None, None).unwrap();
530        let batch = iter.next().unwrap().unwrap();
531        assert_eq!(2, batch.num_rows());
532        assert_eq!(2, batch.fields().len());
533        let ts_v = batch
534            .timestamps()
535            .as_any()
536            .downcast_ref::<TimestampMillisecondVector>()
537            .unwrap();
538        assert_eq!(Value::Timestamp(Timestamp::new_millisecond(1)), ts_v.get(0));
539        assert_eq!(Value::Timestamp(Timestamp::new_millisecond(2)), ts_v.get(1));
540    }
541
542    #[test]
543    fn test_projection() {
544        let memtable = new_test_memtable(false, MergeMode::LastRow);
545        memtable
546            .write(&build_key_values(
547                &memtable.region_metadata,
548                0,
549                &[(1, 1.0, "a".to_string())],
550                OpType::Put,
551            ))
552            .unwrap();
553
554        let mut iter = memtable.iter(None, None, None).unwrap();
555        let batch = iter.next().unwrap().unwrap();
556        assert_eq!(1, batch.num_rows());
557        assert_eq!(2, batch.fields().len());
558
559        let ts_v = batch
560            .timestamps()
561            .as_any()
562            .downcast_ref::<TimestampMillisecondVector>()
563            .unwrap();
564        assert_eq!(Value::Timestamp(Timestamp::new_millisecond(1)), ts_v.get(0));
565
566        // Only project column 2 (f1)
567        let projection = vec![2];
568        let mut iter = memtable.iter(Some(&projection), None, None).unwrap();
569        let batch = iter.next().unwrap().unwrap();
570
571        assert_eq!(1, batch.num_rows());
572        assert_eq!(1, batch.fields().len()); // only f1
573        assert_eq!(2, batch.fields()[0].column_id);
574    }
575
576    #[test]
577    fn test_dedup() {
578        let memtable = new_test_memtable(true, MergeMode::LastRow);
579        memtable
580            .write(&build_key_values(
581                &memtable.region_metadata,
582                0,
583                &[(1, 1.0, "a".to_string())],
584                OpType::Put,
585            ))
586            .unwrap();
587        memtable
588            .write(&build_key_values(
589                &memtable.region_metadata,
590                1,
591                &[(1, 2.0, "b".to_string())],
592                OpType::Put,
593            ))
594            .unwrap();
595        let mut iter = memtable.iter(None, None, None).unwrap();
596        let batch = iter.next().unwrap().unwrap();
597
598        assert_eq!(1, batch.num_rows()); // deduped to 1 row
599        assert_eq!(2.0, batch.fields()[0].data.get(0).as_f64_lossy().unwrap()); // last write wins
600    }
601
602    #[test]
603    fn test_write_one() {
604        let memtable = new_test_memtable(false, MergeMode::LastRow);
605        let kvs = build_key_values(
606            &memtable.region_metadata,
607            0,
608            &[(1, 1.0, "a".to_string())],
609            OpType::Put,
610        );
611        let kv = kvs.iter().next().unwrap();
612        memtable.write_one(kv).unwrap();
613
614        let mut iter = memtable.iter(None, None, None).unwrap();
615        let batch = iter.next().unwrap().unwrap();
616        assert_eq!(1, batch.num_rows());
617    }
618
619    #[tokio::test]
620    async fn test_write_dedup() {
621        let memtable = new_test_memtable(true, MergeMode::LastRow);
622        let kvs = build_key_values(
623            &memtable.region_metadata,
624            0,
625            &[(1, 1.0, "a".to_string())],
626            OpType::Put,
627        );
628        let kv = kvs.iter().next().unwrap();
629        memtable.write_one(kv).unwrap();
630        memtable.freeze().unwrap();
631
632        let kvs = build_key_values(
633            &memtable.region_metadata,
634            1,
635            &[(1, 1.0, "a".to_string())],
636            OpType::Delete,
637        );
638        let kv = kvs.iter().next().unwrap();
639        memtable.write_one(kv).unwrap();
640
641        let ranges = memtable.ranges(None, RangesOptions::default()).unwrap();
642        let mut source = vec![];
643        for r in ranges.ranges.values() {
644            source.push(Source::Iter(r.build_iter().unwrap()));
645        }
646
647        let reader = MergeReaderBuilder::from_sources(source)
648            .build()
649            .await
650            .unwrap();
651
652        let mut reader = DedupReader::new(reader, read::dedup::LastRow::new(false), None);
653        let mut num_rows = 0;
654        while let Some(b) = reader.next_batch().await.unwrap() {
655            num_rows += b.num_rows();
656        }
657        assert_eq!(num_rows, 1);
658    }
659
660    #[tokio::test]
661    async fn test_delete_only() {
662        let memtable = new_test_memtable(true, MergeMode::LastRow);
663        let kvs = build_key_values(
664            &memtable.region_metadata,
665            0,
666            &[(1, 1.0, "a".to_string())],
667            OpType::Delete,
668        );
669        let kv = kvs.iter().next().unwrap();
670        memtable.write_one(kv).unwrap();
671        memtable.freeze().unwrap();
672
673        let ranges = memtable.ranges(None, RangesOptions::default()).unwrap();
674        let mut source = vec![];
675        for r in ranges.ranges.values() {
676            source.push(Source::Iter(r.build_iter().unwrap()));
677        }
678
679        let reader = MergeReaderBuilder::from_sources(source)
680            .build()
681            .await
682            .unwrap();
683
684        let mut reader = DedupReader::new(reader, read::dedup::LastRow::new(false), None);
685        let mut num_rows = 0;
686        while let Some(b) = reader.next_batch().await.unwrap() {
687            num_rows += b.num_rows();
688            assert_eq!(b.num_rows(), 1);
689            assert_eq!(b.op_types().get_data(0).unwrap(), OpType::Delete as u8);
690        }
691        assert_eq!(num_rows, 1);
692    }
693
694    #[tokio::test]
695    async fn test_single_range() {
696        let memtable = new_test_memtable(true, MergeMode::LastRow);
697        let kvs = build_key_values(
698            &memtable.region_metadata,
699            0,
700            &[(1, 1.0, "a".to_string())],
701            OpType::Put,
702        );
703        memtable.write_one(kvs.iter().next().unwrap()).unwrap();
704
705        let kvs = build_key_values(
706            &memtable.region_metadata,
707            1,
708            &[(1, 2.0, "b".to_string())],
709            OpType::Put,
710        );
711        memtable.write_one(kvs.iter().next().unwrap()).unwrap();
712        memtable.freeze().unwrap();
713
714        let ranges = memtable.ranges(None, RangesOptions::default()).unwrap();
715        assert_eq!(ranges.ranges.len(), 1);
716        let range = ranges.ranges.into_values().next().unwrap();
717        let mut reader = range.context.builder.build(None).unwrap();
718
719        let mut num_rows = 0;
720        while let Some(b) = reader.next().transpose().unwrap() {
721            num_rows += b.num_rows();
722            assert_eq!(b.fields()[1].data.get(0).as_string(), Some("b".to_string()));
723        }
724        assert_eq!(num_rows, 1);
725    }
726
727    #[test]
728    fn test_write_bulk() {
729        let memtable = new_test_memtable(false, MergeMode::LastRow);
730        let arrow_schema = memtable.schema().schema.arrow_schema().clone();
731        let arrays = vec![
732            Arc::new(TimestampMillisecondArray::from(vec![1, 2])) as ArrayRef,
733            Arc::new(Float64Array::from(vec![1.0, 2.0])) as ArrayRef,
734            Arc::new(StringArray::from(vec!["a", "b"])) as ArrayRef,
735        ];
736        let rb = DfRecordBatch::try_new(arrow_schema, arrays).unwrap();
737
738        let part = BulkPart {
739            batch: rb,
740            sequence: 1,
741            min_timestamp: 1,
742            max_timestamp: 2,
743            timestamp_index: 0,
744            raw_data: None,
745        };
746        memtable.write_bulk(part).unwrap();
747
748        let mut iter = memtable.iter(None, None, None).unwrap();
749        let batch = iter.next().unwrap().unwrap();
750        assert_eq!(2, batch.num_rows());
751
752        let stats = memtable.stats();
753        assert_eq!(1, stats.max_sequence);
754        assert_eq!(2, stats.num_rows);
755        assert_eq!(
756            Some((Timestamp::new_millisecond(1), Timestamp::new_millisecond(2))),
757            stats.time_range
758        );
759
760        let kvs = build_key_values(
761            &memtable.region_metadata,
762            2,
763            &[(3, 3.0, "c".to_string())],
764            OpType::Put,
765        );
766        memtable.write(&kvs).unwrap();
767        let mut iter = memtable.iter(None, None, None).unwrap();
768        let batch = iter.next().unwrap().unwrap();
769        assert_eq!(3, batch.num_rows());
770        assert_eq!(
771            vec![1, 2, 3],
772            batch
773                .timestamps()
774                .as_any()
775                .downcast_ref::<TimestampMillisecondVector>()
776                .unwrap()
777                .iter_data()
778                .map(|t| { t.unwrap().0.value() })
779                .collect::<Vec<_>>()
780        );
781    }
782
783    #[test]
784    fn test_is_empty() {
785        let memtable = new_test_memtable(false, MergeMode::LastRow);
786        assert!(memtable.is_empty());
787
788        memtable
789            .write(&build_key_values(
790                &memtable.region_metadata,
791                0,
792                &[(1, 1.0, "a".to_string())],
793                OpType::Put,
794            ))
795            .unwrap();
796        assert!(!memtable.is_empty());
797    }
798
799    #[test]
800    fn test_stats() {
801        let memtable = new_test_memtable(false, MergeMode::LastRow);
802        let stats = memtable.stats();
803        assert_eq!(0, stats.num_rows);
804        assert!(stats.time_range.is_none());
805
806        memtable
807            .write(&build_key_values(
808                &memtable.region_metadata,
809                0,
810                &[(1, 1.0, "a".to_string())],
811                OpType::Put,
812            ))
813            .unwrap();
814        let stats = memtable.stats();
815        assert_eq!(1, stats.num_rows);
816        assert!(stats.time_range.is_some());
817    }
818
819    #[test]
820    fn test_fork() {
821        let memtable = new_test_memtable(false, MergeMode::LastRow);
822        memtable
823            .write(&build_key_values(
824                &memtable.region_metadata,
825                0,
826                &[(1, 1.0, "a".to_string())],
827                OpType::Put,
828            ))
829            .unwrap();
830
831        let forked = memtable.fork(2, &memtable.region_metadata);
832        assert!(forked.is_empty());
833    }
834
835    #[test]
836    fn test_sequence_filter() {
837        let memtable = new_test_memtable(false, MergeMode::LastRow);
838        memtable
839            .write(&build_key_values(
840                &memtable.region_metadata,
841                0,
842                &[(1, 1.0, "a".to_string())],
843                OpType::Put,
844            ))
845            .unwrap();
846        memtable
847            .write(&build_key_values(
848                &memtable.region_metadata,
849                1,
850                &[(2, 2.0, "b".to_string())],
851                OpType::Put,
852            ))
853            .unwrap();
854
855        // Filter with sequence 0 should only return first write
856        let mut iter = memtable
857            .iter(None, None, Some(SequenceRange::LtEq { max: 0 }))
858            .unwrap();
859        let batch = iter.next().unwrap().unwrap();
860        assert_eq!(1, batch.num_rows());
861        assert_eq!(1.0, batch.fields()[0].data.get(0).as_f64_lossy().unwrap());
862    }
863
864    fn rb_with_large_string(
865        ts: i64,
866        string_len: i32,
867        region_meta: &RegionMetadataRef,
868    ) -> RecordBatch {
869        let schema = region_meta.schema.arrow_schema().clone();
870        RecordBatch::try_new(
871            schema,
872            vec![
873                Arc::new(StringArray::from_iter_values(
874                    ["a".repeat(string_len as usize).clone()].into_iter(),
875                )) as ArrayRef,
876                Arc::new(TimestampMillisecondArray::from_iter_values(
877                    [ts].into_iter(),
878                )) as ArrayRef,
879            ],
880        )
881        .unwrap()
882    }
883
884    #[tokio::test]
885    async fn test_write_read_large_string() {
886        let mut builder = RegionMetadataBuilder::new(RegionId::new(123, 456));
887        builder
888            .push_column_metadata(ColumnMetadata {
889                column_schema: ColumnSchema::new("k0", ConcreteDataType::string_datatype(), false),
890                semantic_type: SemanticType::Field,
891                column_id: 0,
892            })
893            .push_column_metadata(ColumnMetadata {
894                column_schema: ColumnSchema::new(
895                    "ts",
896                    ConcreteDataType::timestamp_millisecond_datatype(),
897                    false,
898                ),
899                semantic_type: SemanticType::Timestamp,
900                column_id: 1,
901            })
902            .primary_key(vec![]);
903        let region_meta = Arc::new(builder.build().unwrap());
904        let memtable =
905            SimpleBulkMemtable::new(0, region_meta.clone(), None, true, MergeMode::LastRow);
906        memtable
907            .write_bulk(BulkPart {
908                batch: rb_with_large_string(0, i32::MAX, &region_meta),
909                max_timestamp: 0,
910                min_timestamp: 0,
911                sequence: 0,
912                timestamp_index: 1,
913                raw_data: None,
914            })
915            .unwrap();
916
917        memtable.freeze().unwrap();
918        memtable
919            .write_bulk(BulkPart {
920                batch: rb_with_large_string(1, 3, &region_meta),
921                max_timestamp: 1,
922                min_timestamp: 1,
923                sequence: 1,
924                timestamp_index: 1,
925                raw_data: None,
926            })
927            .unwrap();
928        let MemtableRanges { ranges, .. } =
929            memtable.ranges(None, RangesOptions::default()).unwrap();
930        let mut source = if ranges.len() == 1 {
931            let only_range = ranges.into_values().next().unwrap();
932            Source::Iter(only_range.build_iter().unwrap())
933        } else {
934            let sources = ranges
935                .into_values()
936                .map(|r| r.build_iter().map(Source::Iter))
937                .collect::<error::Result<Vec<_>>>()
938                .unwrap();
939            let merge_reader = MergeReaderBuilder::from_sources(sources)
940                .build()
941                .await
942                .unwrap();
943            Source::Reader(Box::new(merge_reader))
944        };
945
946        let mut rows = 0;
947        while let Some(b) = source.next_batch().await.unwrap() {
948            rows += b.num_rows();
949        }
950        assert_eq!(rows, 2);
951    }
952
953    #[test]
954    fn test_build_record_batch_iter_from_memtable() {
955        let memtable = new_test_memtable(false, MergeMode::LastRow);
956
957        let kvs = build_key_values(
958            &memtable.region_metadata,
959            0,
960            &[(1, 1.0, "a".to_string()), (2, 2.0, "b".to_string())],
961            OpType::Put,
962        );
963        memtable.write(&kvs).unwrap();
964
965        let read_column_ids: Vec<ColumnId> = memtable
966            .region_metadata
967            .column_metadatas
968            .iter()
969            .map(|c| c.column_id)
970            .collect();
971        let ranges = memtable
972            .ranges(Some(&read_column_ids), RangesOptions::default())
973            .unwrap();
974        assert!(!ranges.ranges.is_empty());
975
976        let mut total_rows = 0;
977        for range in ranges.ranges.into_values() {
978            let mut iter = range.build_record_batch_iter(None, None).unwrap();
979            while let Some(rb) = iter.next().transpose().unwrap() {
980                total_rows += rb.num_rows();
981                let schema = rb.schema();
982                let column_names: Vec<_> =
983                    schema.fields().iter().map(|f| f.name().as_str()).collect();
984                assert_eq!(
985                    column_names,
986                    vec!["f1", "f2", "ts", "__primary_key", "__sequence", "__op_type"]
987                );
988            }
989        }
990        assert_eq!(2, total_rows);
991    }
992}