Skip to main content

mito2/memtable/
simple_bulk_memtable.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#[cfg(any(test, feature = "test"))]
16mod test_only;
17
18use std::collections::HashSet;
19use std::fmt::{Debug, Formatter};
20use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering};
21use std::sync::{Arc, RwLock};
22use std::time::{Duration, Instant};
23
24use api::v1::OpType;
25use datatypes::vectors::Helper;
26use mito_codec::key_values::KeyValue;
27use rayon::prelude::*;
28use snafu::{OptionExt, ResultExt};
29use store_api::metadata::RegionMetadataRef;
30use store_api::storage::ColumnId;
31
32use crate::flush::WriteBufferManagerRef;
33use crate::memtable::bulk::part::BulkPart;
34use crate::memtable::stats::WriteMetrics;
35use crate::memtable::time_series::Series;
36use crate::memtable::{
37    AllocTracker, BatchToRecordBatchContext, BoxedBatchIterator, IterBuilder, KeyValues,
38    MemScanMetrics, Memtable, MemtableId, MemtableRange, MemtableRangeContext, MemtableRanges,
39    MemtableRef, MemtableStats, RangesOptions, read_column_ids_from_projection,
40};
41use crate::metrics::MEMTABLE_ACTIVE_SERIES_COUNT;
42use crate::read::Batch;
43use crate::read::dedup::LastNonNullIter;
44use crate::region::options::MergeMode;
45use crate::{error, metrics};
46
47pub struct SimpleBulkMemtable {
48    id: MemtableId,
49    region_metadata: RegionMetadataRef,
50    alloc_tracker: AllocTracker,
51    max_timestamp: AtomicI64,
52    min_timestamp: AtomicI64,
53    max_sequence: AtomicU64,
54    dedup: bool,
55    merge_mode: MergeMode,
56    num_rows: AtomicUsize,
57    series: RwLock<Series>,
58}
59
60impl Drop for SimpleBulkMemtable {
61    fn drop(&mut self) {
62        MEMTABLE_ACTIVE_SERIES_COUNT.dec();
63    }
64}
65
66impl SimpleBulkMemtable {
67    pub fn new(
68        id: MemtableId,
69        region_metadata: RegionMetadataRef,
70        write_buffer_manager: Option<WriteBufferManagerRef>,
71        dedup: bool,
72        merge_mode: MergeMode,
73    ) -> Self {
74        let series = RwLock::new(Series::with_capacity(&region_metadata, 1024, 8192));
75
76        Self {
77            id,
78            region_metadata,
79            alloc_tracker: AllocTracker::new(write_buffer_manager),
80            max_timestamp: AtomicI64::new(i64::MIN),
81            min_timestamp: AtomicI64::new(i64::MAX),
82            max_sequence: AtomicU64::new(0),
83            dedup,
84            merge_mode,
85            num_rows: AtomicUsize::new(0),
86            series,
87        }
88    }
89
90    fn build_projection(&self, projection: Option<&[ColumnId]>) -> HashSet<ColumnId> {
91        if let Some(projection) = projection {
92            projection.iter().copied().collect()
93        } else {
94            self.region_metadata
95                .field_columns()
96                .map(|c| c.column_id)
97                .collect()
98        }
99    }
100
101    fn write_key_value(&self, kv: KeyValue, stats: &mut WriteMetrics) {
102        let ts = kv.timestamp();
103        let sequence = kv.sequence();
104        let op_type = kv.op_type();
105        let mut series = self.series.write().unwrap();
106        let size = series.push(ts, sequence, op_type, kv.fields());
107        stats.value_bytes += size;
108        // safety: timestamp of kv must be both present and a valid timestamp value.
109        let ts = kv
110            .timestamp()
111            .try_into_timestamp()
112            .unwrap()
113            .unwrap()
114            .value();
115        stats.min_ts = stats.min_ts.min(ts);
116        stats.max_ts = stats.max_ts.max(ts);
117    }
118
119    /// Updates memtable stats.
120    fn update_stats(&self, stats: WriteMetrics) {
121        self.alloc_tracker
122            .on_allocation(stats.key_bytes + stats.value_bytes);
123        self.num_rows.fetch_add(stats.num_rows, Ordering::SeqCst);
124        self.max_timestamp.fetch_max(stats.max_ts, Ordering::SeqCst);
125        self.min_timestamp.fetch_min(stats.min_ts, Ordering::SeqCst);
126        self.max_sequence
127            .fetch_max(stats.max_sequence, Ordering::SeqCst);
128    }
129
130    #[cfg(test)]
131    fn schema(&self) -> &RegionMetadataRef {
132        &self.region_metadata
133    }
134}
135
136impl Debug for SimpleBulkMemtable {
137    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
138        f.debug_struct("SimpleBulkMemtable").finish()
139    }
140}
141
142impl Memtable for SimpleBulkMemtable {
143    fn id(&self) -> MemtableId {
144        self.id
145    }
146
147    fn write(&self, kvs: &KeyValues) -> error::Result<()> {
148        let mut stats = WriteMetrics::default();
149        let max_sequence = kvs.max_sequence();
150        for kv in kvs.iter() {
151            self.write_key_value(kv, &mut stats);
152        }
153        stats.max_sequence = max_sequence;
154        stats.num_rows = kvs.num_rows();
155        self.update_stats(stats);
156        Ok(())
157    }
158
159    fn write_one(&self, kv: KeyValue) -> error::Result<()> {
160        debug_assert_eq!(0, kv.num_primary_keys());
161        let mut stats = WriteMetrics::default();
162        self.write_key_value(kv, &mut stats);
163        stats.num_rows = 1;
164        stats.max_sequence = kv.sequence();
165        self.update_stats(stats);
166        Ok(())
167    }
168
169    fn write_bulk(&self, part: BulkPart) -> error::Result<()> {
170        let rb = &part.batch;
171
172        let ts = Helper::try_into_vector(
173            rb.column_by_name(&self.region_metadata.time_index_column().column_schema.name)
174                .with_context(|| error::InvalidRequestSnafu {
175                    region_id: self.region_metadata.region_id,
176                    reason: "Timestamp not found",
177                })?,
178        )
179        .context(error::ConvertVectorSnafu)?;
180
181        let sequence = part.sequence;
182
183        let fields: Vec<_> = self
184            .region_metadata
185            .field_columns()
186            .map(|f| {
187                let array = rb.column_by_name(&f.column_schema.name).ok_or_else(|| {
188                    error::InvalidRequestSnafu {
189                        region_id: self.region_metadata.region_id,
190                        reason: format!("Column {} not found", f.column_schema.name),
191                    }
192                    .build()
193                })?;
194                Helper::try_into_vector(array).context(error::ConvertVectorSnafu)
195            })
196            .collect::<error::Result<Vec<_>>>()?;
197
198        let mut series = self.series.write().unwrap();
199        let extend_timer = metrics::REGION_WORKER_HANDLE_WRITE_ELAPSED
200            .with_label_values(&["bulk_extend"])
201            .start_timer();
202        series.extend(ts, OpType::Put as u8, sequence, fields)?;
203        extend_timer.observe_duration();
204
205        self.update_stats(WriteMetrics {
206            key_bytes: 0,
207            value_bytes: part.estimated_size(),
208            min_ts: part.min_timestamp,
209            max_ts: part.max_timestamp,
210            num_rows: part.num_rows(),
211            max_sequence: sequence,
212        });
213        Ok(())
214    }
215
216    fn ranges(
217        &self,
218        projection: Option<&[ColumnId]>,
219        options: RangesOptions,
220    ) -> error::Result<MemtableRanges> {
221        let predicate = options.predicate;
222        let sequence = options.sequence;
223        let start_time = Instant::now();
224        let read_column_ids = read_column_ids_from_projection(&self.region_metadata, projection);
225        let projection = Arc::new(self.build_projection(projection));
226
227        // Use the memtable's overall time range and max sequence for all ranges
228        let max_sequence = self.max_sequence.load(Ordering::Relaxed);
229        let time_range = {
230            let num_rows = self.num_rows.load(Ordering::Relaxed);
231            if num_rows > 0 {
232                let ts_type = self.region_metadata.time_index_type();
233                let max_timestamp =
234                    ts_type.create_timestamp(self.max_timestamp.load(Ordering::Relaxed));
235                let min_timestamp =
236                    ts_type.create_timestamp(self.min_timestamp.load(Ordering::Relaxed));
237                Some((min_timestamp, max_timestamp))
238            } else {
239                None
240            }
241        };
242
243        let values = self.series.read().unwrap().read_to_values();
244        let batch_to_record_batch = Arc::new(BatchToRecordBatchContext::new(
245            self.region_metadata.clone(),
246            read_column_ids.clone(),
247        ));
248
249        let contexts = values
250            .into_par_iter()
251            .filter_map(|v| {
252                let filtered = match v.to_batch(
253                    &[],
254                    &self.region_metadata,
255                    &projection,
256                    sequence,
257                    self.dedup,
258                    self.merge_mode,
259                ) {
260                    Ok(filtered) => filtered,
261                    Err(e) => {
262                        return Some(Err(e));
263                    }
264                };
265                if filtered.is_empty() {
266                    None
267                } else {
268                    Some(Ok(filtered))
269                }
270            })
271            .map(|result| {
272                result.map(|batch| {
273                    let num_rows = batch.num_rows();
274                    let estimated_bytes = batch.memory_size();
275
276                    let range_stats = MemtableStats {
277                        estimated_bytes,
278                        time_range,
279                        num_rows,
280                        num_ranges: 1,
281                        max_sequence,
282                        series_count: 1,
283                    };
284
285                    let builder = BatchRangeBuilder {
286                        batch,
287                        merge_mode: self.merge_mode,
288                        scan_cost: start_time.elapsed(),
289                    };
290                    (
291                        range_stats,
292                        Arc::new(MemtableRangeContext::new_with_batch_to_record_batch(
293                            self.id,
294                            Box::new(builder),
295                            predicate.clone(),
296                            Some(batch_to_record_batch.clone()),
297                        )),
298                    )
299                })
300            })
301            .collect::<error::Result<Vec<_>>>()?;
302
303        let ranges = contexts
304            .into_iter()
305            .enumerate()
306            .map(|(idx, (range_stats, context))| (idx, MemtableRange::new(context, range_stats)))
307            .collect();
308
309        Ok(MemtableRanges { ranges })
310    }
311
312    fn is_empty(&self) -> bool {
313        self.series.read().unwrap().is_empty()
314    }
315
316    fn freeze(&self) -> error::Result<()> {
317        self.series.write().unwrap().freeze(&self.region_metadata);
318        Ok(())
319    }
320
321    fn stats(&self) -> MemtableStats {
322        let estimated_bytes = self.alloc_tracker.bytes_allocated();
323        let num_rows = self.num_rows.load(Ordering::Relaxed);
324        if num_rows == 0 {
325            // no rows ever written
326            return MemtableStats {
327                estimated_bytes,
328                time_range: None,
329                num_rows: 0,
330                num_ranges: 0,
331                max_sequence: 0,
332                series_count: 0,
333            };
334        }
335        let ts_type = self.region_metadata.time_index_type();
336        let max_timestamp = ts_type.create_timestamp(self.max_timestamp.load(Ordering::Relaxed));
337        let min_timestamp = ts_type.create_timestamp(self.min_timestamp.load(Ordering::Relaxed));
338        MemtableStats {
339            estimated_bytes,
340            time_range: Some((min_timestamp, max_timestamp)),
341            num_rows,
342            num_ranges: 1,
343            max_sequence: self.max_sequence.load(Ordering::Relaxed),
344            series_count: 1,
345        }
346    }
347
348    fn fork(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef {
349        Arc::new(Self::new(
350            id,
351            metadata.clone(),
352            self.alloc_tracker.write_buffer_manager(),
353            self.dedup,
354            self.merge_mode,
355        ))
356    }
357}
358
359#[derive(Clone)]
360pub struct BatchRangeBuilder {
361    pub batch: Batch,
362    pub merge_mode: MergeMode,
363    scan_cost: Duration,
364}
365
366impl IterBuilder for BatchRangeBuilder {
367    fn build(&self, metrics: Option<MemScanMetrics>) -> error::Result<BoxedBatchIterator> {
368        let batch = self.batch.clone();
369        if let Some(metrics) = metrics {
370            let inner = crate::memtable::MemScanMetricsData {
371                total_series: 1,
372                num_rows: batch.num_rows(),
373                num_batches: 1,
374                scan_cost: self.scan_cost,
375                ..Default::default()
376            };
377            metrics.merge_inner(&inner);
378        }
379
380        let iter = Iter {
381            batch: Some(Ok(batch)),
382        };
383
384        if self.merge_mode == MergeMode::LastNonNull {
385            Ok(Box::new(LastNonNullIter::new(iter)))
386        } else {
387            Ok(Box::new(iter))
388        }
389    }
390}
391
392struct Iter {
393    batch: Option<error::Result<Batch>>,
394}
395
396impl Iterator for Iter {
397    type Item = error::Result<Batch>;
398
399    fn next(&mut self) -> Option<Self::Item> {
400        self.batch.take()
401    }
402}
403
404#[cfg(test)]
405mod tests {
406    use std::sync::Arc;
407
408    use api::v1::helper::row;
409    use api::v1::value::ValueData;
410    use api::v1::{Mutation, OpType, Rows, SemanticType};
411    use common_recordbatch::DfRecordBatch;
412    use common_time::Timestamp;
413    use datatypes::arrow::array::{ArrayRef, Float64Array, RecordBatch, TimestampMillisecondArray};
414    use datatypes::arrow_array::StringArray;
415    use datatypes::data_type::ConcreteDataType;
416    use datatypes::prelude::{ScalarVector, Vector};
417    use datatypes::schema::ColumnSchema;
418    use datatypes::value::Value;
419    use datatypes::vectors::TimestampMillisecondVector;
420    use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
421    use store_api::storage::{RegionId, SequenceNumber, SequenceRange};
422
423    use super::*;
424    use crate::read;
425    use crate::read::dedup::DedupReader;
426    use crate::read::merge::MergeReaderBuilder;
427    use crate::read::{BatchReader, Source};
428    use crate::region::options::MergeMode;
429    use crate::test_util::column_metadata_to_column_schema;
430
431    fn new_test_metadata() -> RegionMetadataRef {
432        let mut builder = RegionMetadataBuilder::new(1.into());
433        builder
434            .push_column_metadata(ColumnMetadata {
435                column_schema: ColumnSchema::new(
436                    "ts",
437                    ConcreteDataType::timestamp_millisecond_datatype(),
438                    false,
439                ),
440                semantic_type: SemanticType::Timestamp,
441                column_id: 1,
442            })
443            .push_column_metadata(ColumnMetadata {
444                column_schema: ColumnSchema::new("f1", ConcreteDataType::float64_datatype(), true),
445                semantic_type: SemanticType::Field,
446                column_id: 2,
447            })
448            .push_column_metadata(ColumnMetadata {
449                column_schema: ColumnSchema::new("f2", ConcreteDataType::string_datatype(), true),
450                semantic_type: SemanticType::Field,
451                column_id: 3,
452            });
453        Arc::new(builder.build().unwrap())
454    }
455
456    fn new_test_memtable(dedup: bool, merge_mode: MergeMode) -> SimpleBulkMemtable {
457        SimpleBulkMemtable::new(1, new_test_metadata(), None, dedup, merge_mode)
458    }
459
460    fn build_key_values(
461        metadata: &RegionMetadataRef,
462        sequence: SequenceNumber,
463        row_values: &[(i64, f64, String)],
464        op_type: OpType,
465    ) -> KeyValues {
466        let column_schemas: Vec<_> = metadata
467            .column_metadatas
468            .iter()
469            .map(column_metadata_to_column_schema)
470            .collect();
471
472        let rows: Vec<_> = row_values
473            .iter()
474            .map(|(ts, f1, f2)| {
475                row(vec![
476                    ValueData::TimestampMillisecondValue(*ts),
477                    ValueData::F64Value(*f1),
478                    ValueData::StringValue(f2.clone()),
479                ])
480            })
481            .collect();
482        let mutation = Mutation {
483            op_type: op_type as i32,
484            sequence,
485            rows: Some(Rows {
486                schema: column_schemas,
487                rows,
488            }),
489            write_hint: None,
490        };
491        KeyValues::new(metadata, mutation).unwrap()
492    }
493
494    #[test]
495    fn test_write_and_iter() {
496        let memtable = new_test_memtable(false, MergeMode::LastRow);
497        memtable
498            .write(&build_key_values(
499                &memtable.region_metadata,
500                0,
501                &[(1, 1.0, "a".to_string())],
502                OpType::Put,
503            ))
504            .unwrap();
505        memtable
506            .write(&build_key_values(
507                &memtable.region_metadata,
508                1,
509                &[(2, 2.0, "b".to_string())],
510                OpType::Put,
511            ))
512            .unwrap();
513
514        let mut iter = memtable
515            .ranges(None, RangesOptions::default())
516            .unwrap()
517            .build(None)
518            .unwrap();
519        let batch = iter.next().unwrap().unwrap();
520        assert_eq!(2, batch.num_rows());
521        assert_eq!(2, batch.fields().len());
522        let ts_v = batch
523            .timestamps()
524            .as_any()
525            .downcast_ref::<TimestampMillisecondVector>()
526            .unwrap();
527        assert_eq!(Value::Timestamp(Timestamp::new_millisecond(1)), ts_v.get(0));
528        assert_eq!(Value::Timestamp(Timestamp::new_millisecond(2)), ts_v.get(1));
529    }
530
531    #[test]
532    fn test_projection() {
533        let memtable = new_test_memtable(false, MergeMode::LastRow);
534        memtable
535            .write(&build_key_values(
536                &memtable.region_metadata,
537                0,
538                &[(1, 1.0, "a".to_string())],
539                OpType::Put,
540            ))
541            .unwrap();
542
543        let mut iter = memtable
544            .ranges(None, RangesOptions::default())
545            .unwrap()
546            .build(None)
547            .unwrap();
548        let batch = iter.next().unwrap().unwrap();
549        assert_eq!(1, batch.num_rows());
550        assert_eq!(2, batch.fields().len());
551
552        let ts_v = batch
553            .timestamps()
554            .as_any()
555            .downcast_ref::<TimestampMillisecondVector>()
556            .unwrap();
557        assert_eq!(Value::Timestamp(Timestamp::new_millisecond(1)), ts_v.get(0));
558
559        // Only project column 2 (f1)
560        let projection = vec![2];
561        let mut iter = memtable
562            .ranges(Some(&projection), RangesOptions::default())
563            .unwrap()
564            .build(None)
565            .unwrap();
566        let batch = iter.next().unwrap().unwrap();
567
568        assert_eq!(1, batch.num_rows());
569        assert_eq!(1, batch.fields().len()); // only f1
570        assert_eq!(2, batch.fields()[0].column_id);
571    }
572
573    #[test]
574    fn test_dedup() {
575        let memtable = new_test_memtable(true, MergeMode::LastRow);
576        memtable
577            .write(&build_key_values(
578                &memtable.region_metadata,
579                0,
580                &[(1, 1.0, "a".to_string())],
581                OpType::Put,
582            ))
583            .unwrap();
584        memtable
585            .write(&build_key_values(
586                &memtable.region_metadata,
587                1,
588                &[(1, 2.0, "b".to_string())],
589                OpType::Put,
590            ))
591            .unwrap();
592        let mut iter = memtable
593            .ranges(None, RangesOptions::default())
594            .unwrap()
595            .build(None)
596            .unwrap();
597        let batch = iter.next().unwrap().unwrap();
598
599        assert_eq!(1, batch.num_rows()); // deduped to 1 row
600        assert_eq!(2.0, batch.fields()[0].data.get(0).as_f64_lossy().unwrap()); // last write wins
601    }
602
603    #[test]
604    fn test_write_one() {
605        let memtable = new_test_memtable(false, MergeMode::LastRow);
606        let kvs = build_key_values(
607            &memtable.region_metadata,
608            0,
609            &[(1, 1.0, "a".to_string())],
610            OpType::Put,
611        );
612        let kv = kvs.iter().next().unwrap();
613        memtable.write_one(kv).unwrap();
614
615        let mut iter = memtable
616            .ranges(None, RangesOptions::default())
617            .unwrap()
618            .build(None)
619            .unwrap();
620        let batch = iter.next().unwrap().unwrap();
621        assert_eq!(1, batch.num_rows());
622    }
623
624    #[tokio::test]
625    async fn test_write_dedup() {
626        let memtable = new_test_memtable(true, MergeMode::LastRow);
627        let kvs = build_key_values(
628            &memtable.region_metadata,
629            0,
630            &[(1, 1.0, "a".to_string())],
631            OpType::Put,
632        );
633        let kv = kvs.iter().next().unwrap();
634        memtable.write_one(kv).unwrap();
635        memtable.freeze().unwrap();
636
637        let kvs = build_key_values(
638            &memtable.region_metadata,
639            1,
640            &[(1, 1.0, "a".to_string())],
641            OpType::Delete,
642        );
643        let kv = kvs.iter().next().unwrap();
644        memtable.write_one(kv).unwrap();
645
646        let ranges = memtable.ranges(None, RangesOptions::default()).unwrap();
647        let mut source = vec![];
648        for r in ranges.ranges.values() {
649            source.push(Source::Iter(r.build_iter().unwrap()));
650        }
651
652        let reader = MergeReaderBuilder::from_sources(source)
653            .build()
654            .await
655            .unwrap();
656
657        let mut reader = DedupReader::new(reader, read::dedup::LastRow::new(false), None);
658        let mut num_rows = 0;
659        while let Some(b) = reader.next_batch().await.unwrap() {
660            num_rows += b.num_rows();
661        }
662        assert_eq!(num_rows, 1);
663    }
664
665    #[tokio::test]
666    async fn test_delete_only() {
667        let memtable = new_test_memtable(true, MergeMode::LastRow);
668        let kvs = build_key_values(
669            &memtable.region_metadata,
670            0,
671            &[(1, 1.0, "a".to_string())],
672            OpType::Delete,
673        );
674        let kv = kvs.iter().next().unwrap();
675        memtable.write_one(kv).unwrap();
676        memtable.freeze().unwrap();
677
678        let ranges = memtable.ranges(None, RangesOptions::default()).unwrap();
679        let mut source = vec![];
680        for r in ranges.ranges.values() {
681            source.push(Source::Iter(r.build_iter().unwrap()));
682        }
683
684        let reader = MergeReaderBuilder::from_sources(source)
685            .build()
686            .await
687            .unwrap();
688
689        let mut reader = DedupReader::new(reader, read::dedup::LastRow::new(false), None);
690        let mut num_rows = 0;
691        while let Some(b) = reader.next_batch().await.unwrap() {
692            num_rows += b.num_rows();
693            assert_eq!(b.num_rows(), 1);
694            assert_eq!(b.op_types().get_data(0).unwrap(), OpType::Delete as u8);
695        }
696        assert_eq!(num_rows, 1);
697    }
698
699    #[tokio::test]
700    async fn test_single_range() {
701        let memtable = new_test_memtable(true, MergeMode::LastRow);
702        let kvs = build_key_values(
703            &memtable.region_metadata,
704            0,
705            &[(1, 1.0, "a".to_string())],
706            OpType::Put,
707        );
708        memtable.write_one(kvs.iter().next().unwrap()).unwrap();
709
710        let kvs = build_key_values(
711            &memtable.region_metadata,
712            1,
713            &[(1, 2.0, "b".to_string())],
714            OpType::Put,
715        );
716        memtable.write_one(kvs.iter().next().unwrap()).unwrap();
717        memtable.freeze().unwrap();
718
719        let ranges = memtable.ranges(None, RangesOptions::default()).unwrap();
720        assert_eq!(ranges.ranges.len(), 1);
721        let range = ranges.ranges.into_values().next().unwrap();
722        let mut reader = range.context.builder.build(None).unwrap();
723
724        let mut num_rows = 0;
725        while let Some(b) = reader.next().transpose().unwrap() {
726            num_rows += b.num_rows();
727            assert_eq!(b.fields()[1].data.get(0).as_string(), Some("b".to_string()));
728        }
729        assert_eq!(num_rows, 1);
730    }
731
732    #[test]
733    fn test_write_bulk() {
734        let memtable = new_test_memtable(false, MergeMode::LastRow);
735        let arrow_schema = memtable.schema().schema.arrow_schema().clone();
736        let arrays = vec![
737            Arc::new(TimestampMillisecondArray::from(vec![1, 2])) as ArrayRef,
738            Arc::new(Float64Array::from(vec![1.0, 2.0])) as ArrayRef,
739            Arc::new(StringArray::from(vec!["a", "b"])) as ArrayRef,
740        ];
741        let rb = DfRecordBatch::try_new(arrow_schema, arrays).unwrap();
742
743        let part = BulkPart {
744            batch: rb,
745            sequence: 1,
746            min_timestamp: 1,
747            max_timestamp: 2,
748            timestamp_index: 0,
749            raw_data: None,
750        };
751        memtable.write_bulk(part).unwrap();
752
753        let mut iter = memtable
754            .ranges(None, RangesOptions::default())
755            .unwrap()
756            .build(None)
757            .unwrap();
758        let batch = iter.next().unwrap().unwrap();
759        assert_eq!(2, batch.num_rows());
760
761        let stats = memtable.stats();
762        assert_eq!(1, stats.max_sequence);
763        assert_eq!(2, stats.num_rows);
764        assert_eq!(
765            Some((Timestamp::new_millisecond(1), Timestamp::new_millisecond(2))),
766            stats.time_range
767        );
768
769        let kvs = build_key_values(
770            &memtable.region_metadata,
771            2,
772            &[(3, 3.0, "c".to_string())],
773            OpType::Put,
774        );
775        memtable.write(&kvs).unwrap();
776        let mut iter = memtable
777            .ranges(None, RangesOptions::default())
778            .unwrap()
779            .build(None)
780            .unwrap();
781        let batch = iter.next().unwrap().unwrap();
782        assert_eq!(3, batch.num_rows());
783        assert_eq!(
784            vec![1, 2, 3],
785            batch
786                .timestamps()
787                .as_any()
788                .downcast_ref::<TimestampMillisecondVector>()
789                .unwrap()
790                .iter_data()
791                .map(|t| { t.unwrap().0.value() })
792                .collect::<Vec<_>>()
793        );
794    }
795
796    #[test]
797    fn test_is_empty() {
798        let memtable = new_test_memtable(false, MergeMode::LastRow);
799        assert!(memtable.is_empty());
800
801        memtable
802            .write(&build_key_values(
803                &memtable.region_metadata,
804                0,
805                &[(1, 1.0, "a".to_string())],
806                OpType::Put,
807            ))
808            .unwrap();
809        assert!(!memtable.is_empty());
810    }
811
812    #[test]
813    fn test_stats() {
814        let memtable = new_test_memtable(false, MergeMode::LastRow);
815        let stats = memtable.stats();
816        assert_eq!(0, stats.num_rows);
817        assert!(stats.time_range.is_none());
818
819        memtable
820            .write(&build_key_values(
821                &memtable.region_metadata,
822                0,
823                &[(1, 1.0, "a".to_string())],
824                OpType::Put,
825            ))
826            .unwrap();
827        let stats = memtable.stats();
828        assert_eq!(1, stats.num_rows);
829        assert!(stats.time_range.is_some());
830    }
831
832    #[test]
833    fn test_fork() {
834        let memtable = new_test_memtable(false, MergeMode::LastRow);
835        memtable
836            .write(&build_key_values(
837                &memtable.region_metadata,
838                0,
839                &[(1, 1.0, "a".to_string())],
840                OpType::Put,
841            ))
842            .unwrap();
843
844        let forked = memtable.fork(2, &memtable.region_metadata);
845        assert!(forked.is_empty());
846    }
847
848    #[test]
849    fn test_sequence_filter() {
850        let memtable = new_test_memtable(false, MergeMode::LastRow);
851        memtable
852            .write(&build_key_values(
853                &memtable.region_metadata,
854                0,
855                &[(1, 1.0, "a".to_string())],
856                OpType::Put,
857            ))
858            .unwrap();
859        memtable
860            .write(&build_key_values(
861                &memtable.region_metadata,
862                1,
863                &[(2, 2.0, "b".to_string())],
864                OpType::Put,
865            ))
866            .unwrap();
867
868        // Filter with sequence 0 should only return first write
869        let mut iter = memtable
870            .ranges(
871                None,
872                RangesOptions {
873                    sequence: Some(SequenceRange::LtEq { max: 0 }),
874                    ..Default::default()
875                },
876            )
877            .unwrap()
878            .build(None)
879            .unwrap();
880        let batch = iter.next().unwrap().unwrap();
881        assert_eq!(1, batch.num_rows());
882        assert_eq!(1.0, batch.fields()[0].data.get(0).as_f64_lossy().unwrap());
883    }
884
885    fn rb_with_large_string(
886        ts: i64,
887        string_len: i32,
888        region_meta: &RegionMetadataRef,
889    ) -> RecordBatch {
890        let schema = region_meta.schema.arrow_schema().clone();
891        RecordBatch::try_new(
892            schema,
893            vec![
894                Arc::new(StringArray::from_iter_values(
895                    ["a".repeat(string_len as usize).clone()].into_iter(),
896                )) as ArrayRef,
897                Arc::new(TimestampMillisecondArray::from_iter_values(
898                    [ts].into_iter(),
899                )) as ArrayRef,
900            ],
901        )
902        .unwrap()
903    }
904
905    #[tokio::test]
906    async fn test_write_read_large_string() {
907        let mut builder = RegionMetadataBuilder::new(RegionId::new(123, 456));
908        builder
909            .push_column_metadata(ColumnMetadata {
910                column_schema: ColumnSchema::new("k0", ConcreteDataType::string_datatype(), false),
911                semantic_type: SemanticType::Field,
912                column_id: 0,
913            })
914            .push_column_metadata(ColumnMetadata {
915                column_schema: ColumnSchema::new(
916                    "ts",
917                    ConcreteDataType::timestamp_millisecond_datatype(),
918                    false,
919                ),
920                semantic_type: SemanticType::Timestamp,
921                column_id: 1,
922            })
923            .primary_key(vec![]);
924        let region_meta = Arc::new(builder.build().unwrap());
925        let memtable =
926            SimpleBulkMemtable::new(0, region_meta.clone(), None, true, MergeMode::LastRow);
927        memtable
928            .write_bulk(BulkPart {
929                batch: rb_with_large_string(0, i32::MAX, &region_meta),
930                max_timestamp: 0,
931                min_timestamp: 0,
932                sequence: 0,
933                timestamp_index: 1,
934                raw_data: None,
935            })
936            .unwrap();
937
938        memtable.freeze().unwrap();
939        memtable
940            .write_bulk(BulkPart {
941                batch: rb_with_large_string(1, 3, &region_meta),
942                max_timestamp: 1,
943                min_timestamp: 1,
944                sequence: 1,
945                timestamp_index: 1,
946                raw_data: None,
947            })
948            .unwrap();
949        let MemtableRanges { ranges, .. } =
950            memtable.ranges(None, RangesOptions::default()).unwrap();
951        let mut source = if ranges.len() == 1 {
952            let only_range = ranges.into_values().next().unwrap();
953            Source::Iter(only_range.build_iter().unwrap())
954        } else {
955            let sources = ranges
956                .into_values()
957                .map(|r| r.build_iter().map(Source::Iter))
958                .collect::<error::Result<Vec<_>>>()
959                .unwrap();
960            let merge_reader = MergeReaderBuilder::from_sources(sources)
961                .build()
962                .await
963                .unwrap();
964            Source::Reader(Box::new(merge_reader))
965        };
966
967        let mut rows = 0;
968        while let Some(b) = source.next_batch().await.unwrap() {
969            rows += b.num_rows();
970        }
971        assert_eq!(rows, 2);
972    }
973
974    #[test]
975    fn test_build_record_batch_iter_from_memtable() {
976        let memtable = new_test_memtable(false, MergeMode::LastRow);
977
978        let kvs = build_key_values(
979            &memtable.region_metadata,
980            0,
981            &[(1, 1.0, "a".to_string()), (2, 2.0, "b".to_string())],
982            OpType::Put,
983        );
984        memtable.write(&kvs).unwrap();
985
986        let read_column_ids: Vec<ColumnId> = memtable
987            .region_metadata
988            .column_metadatas
989            .iter()
990            .map(|c| c.column_id)
991            .collect();
992        let ranges = memtable
993            .ranges(Some(&read_column_ids), RangesOptions::default())
994            .unwrap();
995        assert!(!ranges.ranges.is_empty());
996
997        let mut total_rows = 0;
998        for range in ranges.ranges.into_values() {
999            let mut iter = range.build_record_batch_iter(None, None).unwrap();
1000            while let Some(rb) = iter.next().transpose().unwrap() {
1001                total_rows += rb.num_rows();
1002                let schema = rb.schema();
1003                let column_names: Vec<_> =
1004                    schema.fields().iter().map(|f| f.name().as_str()).collect();
1005                assert_eq!(
1006                    column_names,
1007                    vec!["f1", "f2", "ts", "__primary_key", "__sequence", "__op_type"]
1008                );
1009            }
1010        }
1011        assert_eq!(2, total_rows);
1012    }
1013}