1use std::collections::HashMap;
18use std::sync::Arc;
19
20use api::v1::SemanticType;
21use datatypes::arrow::array::{
22 Array, ArrayRef, BinaryArray, BinaryBuilder, DictionaryArray, UInt32Array,
23};
24use datatypes::arrow::compute::{TakeOptions, take};
25use datatypes::arrow::datatypes::{FieldRef, Schema, SchemaRef};
26use datatypes::arrow::record_batch::RecordBatch;
27use datatypes::data_type::ConcreteDataType;
28use datatypes::prelude::DataType;
29use datatypes::value::Value;
30use datatypes::vectors::VectorRef;
31use datatypes::vectors::json::array::JsonArray;
32use mito_codec::row_converter::{
33 CompositeValues, PrimaryKeyCodec, SortField, build_primary_key_codec,
34 build_primary_key_codec_with_fields,
35};
36use snafu::{OptionExt, ResultExt, ensure};
37use store_api::codec::PrimaryKeyEncoding;
38use store_api::metadata::{RegionMetadata, RegionMetadataRef};
39use store_api::storage::ColumnId;
40
41use crate::error::{
42 CompatReaderSnafu, ComputeArrowSnafu, ConvertValueSnafu, CreateDefaultSnafu, DecodeSnafu,
43 EncodeSnafu, NewRecordBatchSnafu, Result, UnsupportedOperationSnafu,
44};
45use crate::read::flat_projection::{FlatProjectionMapper, flat_projected_columns};
46use crate::sst::parquet::flat_format::primary_key_column_index;
47use crate::sst::parquet::format::{FormatProjection, INTERNAL_COLUMN_NUM, PrimaryKeyArray};
48use crate::sst::{internal_fields, tag_maybe_to_dictionary_field};
49
50pub(crate) fn has_same_columns_and_pk_encoding(
52 left: &RegionMetadata,
53 right: &RegionMetadata,
54) -> bool {
55 if left.primary_key_encoding != right.primary_key_encoding {
56 return false;
57 }
58
59 if left.column_metadatas.len() != right.column_metadatas.len() {
60 return false;
61 }
62
63 for (left_col, right_col) in left.column_metadatas.iter().zip(&right.column_metadatas) {
64 if left_col.column_id != right_col.column_id || !left_col.is_same_datatype(right_col) {
65 return false;
66 }
67 debug_assert_eq!(
68 left_col.column_schema.data_type,
69 right_col.column_schema.data_type
70 );
71 debug_assert_eq!(left_col.semantic_type, right_col.semantic_type);
72 }
73
74 true
75}
76
77pub(crate) struct FlatCompatBatch {
79 index_or_defaults: Vec<IndexOrDefault>,
81 arrow_schema: SchemaRef,
83 compat_pk: FlatCompatPrimaryKey,
85}
86
87impl FlatCompatBatch {
88 pub(crate) fn try_new(
95 mapper: &FlatProjectionMapper,
96 actual: &RegionMetadataRef,
97 format_projection: &FormatProjection,
98 compaction: bool,
99 ) -> Result<Option<Self>> {
100 let actual_schema = flat_projected_columns(actual, format_projection);
101 let expect_schema = mapper.batch_schema();
102 if expect_schema == actual_schema {
103 return Ok(None);
106 }
107
108 if actual.primary_key_encoding == PrimaryKeyEncoding::Sparse && compaction {
109 return FlatCompatBatch::try_new_compact_sparse(mapper, actual);
111 }
112
113 let (index_or_defaults, fields) =
114 Self::compute_index_and_fields(&actual_schema, expect_schema, mapper.metadata())?;
115
116 let compat_pk = FlatCompatPrimaryKey::new(mapper.metadata(), actual)?;
117
118 Ok(Some(Self {
119 index_or_defaults,
120 arrow_schema: Arc::new(Schema::new(fields)),
121 compat_pk,
122 }))
123 }
124
125 fn compute_index_and_fields(
126 actual_schema: &[(ColumnId, ConcreteDataType)],
127 expect_schema: &[(ColumnId, ConcreteDataType)],
128 expect_metadata: &RegionMetadata,
129 ) -> Result<(Vec<IndexOrDefault>, Vec<FieldRef>)> {
130 let actual_schema_index: HashMap<_, _> = actual_schema
132 .iter()
133 .enumerate()
134 .map(|(idx, (column_id, data_type))| (*column_id, (idx, data_type)))
135 .collect();
136
137 let mut index_or_defaults = Vec::with_capacity(expect_schema.len());
138 let mut fields = Vec::with_capacity(expect_schema.len());
139 for (column_id, expect_data_type) in expect_schema {
140 let column_index = expect_metadata.column_index_by_id(*column_id).unwrap();
142 let expect_column = &expect_metadata.column_metadatas[column_index];
143 let column_field = &expect_metadata.schema.arrow_schema().fields()[column_index];
144 if expect_column.semantic_type == SemanticType::Tag {
146 fields.push(tag_maybe_to_dictionary_field(
147 &expect_column.column_schema.data_type,
148 column_field,
149 ));
150 } else {
151 fields.push(column_field.clone());
152 };
153
154 if let Some((index, actual_data_type)) = actual_schema_index.get(column_id) {
155 let mut cast_type = None;
156
157 if expect_data_type != *actual_data_type {
159 cast_type = Some(expect_data_type.clone())
160 }
161 index_or_defaults.push(IndexOrDefault::Index {
163 pos: *index,
164 cast_type,
165 });
166 } else {
167 let default_vector = expect_column
169 .column_schema
170 .create_default_vector(1)
171 .context(CreateDefaultSnafu {
172 region_id: expect_metadata.region_id,
173 column: &expect_column.column_schema.name,
174 })?
175 .with_context(|| CompatReaderSnafu {
176 region_id: expect_metadata.region_id,
177 reason: format!(
178 "column {} does not have a default value to read",
179 expect_column.column_schema.name
180 ),
181 })?;
182 index_or_defaults.push(IndexOrDefault::DefaultValue {
183 default_vector,
184 semantic_type: expect_column.semantic_type,
185 });
186 };
187 }
188 fields.extend_from_slice(&internal_fields());
189
190 Ok((index_or_defaults, fields))
191 }
192
193 fn try_new_compact_sparse(
194 mapper: &FlatProjectionMapper,
195 actual: &RegionMetadataRef,
196 ) -> Result<Option<Self>> {
197 ensure!(
200 mapper.metadata().primary_key_encoding == PrimaryKeyEncoding::Sparse,
201 UnsupportedOperationSnafu {
202 err_msg: "Flat format doesn't support converting sparse encoding back to dense encoding"
203 }
204 );
205
206 let actual_schema: Vec<_> = actual
209 .field_columns()
210 .chain([actual.time_index_column()])
211 .map(|col| (col.column_id, col.column_schema.data_type.clone()))
212 .collect();
213 let expect_schema: Vec<_> = mapper
214 .metadata()
215 .field_columns()
216 .chain([mapper.metadata().time_index_column()])
217 .map(|col| (col.column_id, col.column_schema.data_type.clone()))
218 .collect();
219
220 let (index_or_defaults, fields) =
221 Self::compute_index_and_fields(&actual_schema, &expect_schema, mapper.metadata())?;
222
223 let compat_pk = FlatCompatPrimaryKey::default();
224
225 Ok(Some(Self {
226 index_or_defaults,
227 arrow_schema: Arc::new(Schema::new(fields)),
228 compat_pk,
229 }))
230 }
231
232 pub(crate) fn compat(&self, batch: RecordBatch) -> Result<RecordBatch> {
234 let len = batch.num_rows();
235 let columns = self
236 .index_or_defaults
237 .iter()
238 .map(|index_or_default| match index_or_default {
239 IndexOrDefault::Index { pos, cast_type } => {
240 let old_column = batch.column(*pos);
241
242 if let Some(ty) = cast_type {
243 let casted = if let Some(json_type) = ty.as_json()
244 && json_type.is_json2()
245 {
246 JsonArray::from(old_column)
247 .try_align(&json_type.as_arrow_type())
248 .context(ConvertValueSnafu)?
249 } else {
250 datatypes::arrow::compute::cast(old_column, &ty.as_arrow_type())
251 .context(ComputeArrowSnafu)?
252 };
253 Ok(casted)
254 } else {
255 Ok(old_column.clone())
256 }
257 }
258 IndexOrDefault::DefaultValue {
259 default_vector,
260 semantic_type,
261 } => repeat_vector(default_vector, len, *semantic_type == SemanticType::Tag),
262 })
263 .chain(
264 batch.columns()[batch.num_columns() - INTERNAL_COLUMN_NUM..]
266 .iter()
267 .map(|col| Ok(col.clone())),
268 )
269 .collect::<Result<Vec<_>>>()?;
270
271 let compat_batch = RecordBatch::try_new(self.arrow_schema.clone(), columns)
272 .context(NewRecordBatchSnafu)?;
273
274 self.compat_pk.compat(compat_batch)
276 }
277}
278
279fn repeat_vector(vector: &VectorRef, to_len: usize, is_tag: bool) -> Result<ArrayRef> {
281 assert_eq!(1, vector.len());
282 let data_type = vector.data_type();
283 if is_tag && data_type.is_string() {
284 let values = vector.to_arrow_array();
285 if values.is_null(0) {
286 let keys = UInt32Array::new_null(to_len);
288 Ok(Arc::new(DictionaryArray::new(keys, values.slice(0, 0))))
289 } else {
290 let keys = UInt32Array::from_value(0, to_len);
291 Ok(Arc::new(DictionaryArray::new(keys, values)))
292 }
293 } else {
294 let keys = UInt32Array::from_value(0, to_len);
295 take(
296 &vector.to_arrow_array(),
297 &keys,
298 Some(TakeOptions {
299 check_bounds: false,
300 }),
301 )
302 .context(ComputeArrowSnafu)
303 }
304}
305
306fn is_primary_key_same(expect: &RegionMetadata, actual: &RegionMetadata) -> Result<bool> {
308 ensure!(
309 actual.primary_key.len() <= expect.primary_key.len(),
310 CompatReaderSnafu {
311 region_id: expect.region_id,
312 reason: format!(
313 "primary key has more columns {} than expect {}",
314 actual.primary_key.len(),
315 expect.primary_key.len()
316 ),
317 }
318 );
319 ensure!(
320 actual.primary_key == expect.primary_key[..actual.primary_key.len()],
321 CompatReaderSnafu {
322 region_id: expect.region_id,
323 reason: format!(
324 "primary key has different prefix, expect: {:?}, actual: {:?}",
325 expect.primary_key, actual.primary_key
326 ),
327 }
328 );
329
330 Ok(actual.primary_key.len() == expect.primary_key.len())
331}
332
333#[derive(Debug)]
335enum IndexOrDefault {
336 Index {
338 pos: usize,
339 cast_type: Option<ConcreteDataType>,
340 },
341 DefaultValue {
343 default_vector: VectorRef,
345 semantic_type: SemanticType,
347 },
348}
349
350struct FlatRewritePrimaryKey {
352 codec: Arc<dyn PrimaryKeyCodec>,
354 metadata: RegionMetadataRef,
356 old_codec: Arc<dyn PrimaryKeyCodec>,
359}
360
361impl FlatRewritePrimaryKey {
362 fn new(
363 expect: &RegionMetadataRef,
364 actual: &RegionMetadataRef,
365 ) -> Option<FlatRewritePrimaryKey> {
366 if expect.primary_key_encoding == actual.primary_key_encoding {
367 return None;
368 }
369 let codec = build_primary_key_codec(expect);
370 let old_codec = build_primary_key_codec(actual);
371
372 Some(FlatRewritePrimaryKey {
373 codec,
374 metadata: expect.clone(),
375 old_codec,
376 })
377 }
378
379 fn rewrite_key(
382 &self,
383 append_values: &[(ColumnId, Value)],
384 batch: RecordBatch,
385 ) -> Result<RecordBatch> {
386 let old_pk_dict_array = batch
387 .column(primary_key_column_index(batch.num_columns()))
388 .as_any()
389 .downcast_ref::<PrimaryKeyArray>()
390 .unwrap();
391 let old_pk_values_array = old_pk_dict_array
392 .values()
393 .as_any()
394 .downcast_ref::<BinaryArray>()
395 .unwrap();
396 let mut builder = BinaryBuilder::with_capacity(
397 old_pk_values_array.len(),
398 old_pk_values_array.value_data().len(),
399 );
400
401 let mut buffer = Vec::with_capacity(
403 old_pk_values_array.value_data().len() / old_pk_values_array.len().max(1),
404 );
405 let mut column_id_values = Vec::new();
406 for value in old_pk_values_array.iter() {
408 let Some(old_pk) = value else {
409 builder.append_null();
410 continue;
411 };
412 let mut pk_values = self.old_codec.decode(old_pk).context(DecodeSnafu)?;
414 pk_values.extend(append_values);
415
416 buffer.clear();
417 column_id_values.clear();
418 match pk_values {
420 CompositeValues::Dense(dense_values) => {
421 self.codec
422 .encode_values(dense_values.as_slice(), &mut buffer)
423 .context(EncodeSnafu)?;
424 }
425 CompositeValues::Sparse(sparse_values) => {
426 for id in &self.metadata.primary_key {
427 let value = sparse_values.get_or_null(*id);
428 column_id_values.push((*id, value.clone()));
429 }
430 self.codec
431 .encode_values(&column_id_values, &mut buffer)
432 .context(EncodeSnafu)?;
433 }
434 }
435 builder.append_value(&buffer);
436 }
437 let new_pk_values_array = Arc::new(builder.finish());
438 let new_pk_dict_array =
439 PrimaryKeyArray::new(old_pk_dict_array.keys().clone(), new_pk_values_array);
440
441 let mut columns = batch.columns().to_vec();
442 columns[primary_key_column_index(batch.num_columns())] = Arc::new(new_pk_dict_array);
443
444 RecordBatch::try_new(batch.schema(), columns).context(NewRecordBatchSnafu)
445 }
446}
447
448#[derive(Default)]
450struct FlatCompatPrimaryKey {
451 rewriter: Option<FlatRewritePrimaryKey>,
453 converter: Option<Arc<dyn PrimaryKeyCodec>>,
455 values: Vec<(ColumnId, Value)>,
457}
458
459impl FlatCompatPrimaryKey {
460 fn new(expect: &RegionMetadataRef, actual: &RegionMetadataRef) -> Result<Self> {
461 let rewriter = FlatRewritePrimaryKey::new(expect, actual);
462
463 if is_primary_key_same(expect, actual)? {
464 return Ok(Self {
465 rewriter,
466 converter: None,
467 values: Vec::new(),
468 });
469 }
470
471 let to_add = &expect.primary_key[actual.primary_key.len()..];
473 let mut values = Vec::with_capacity(to_add.len());
474 let mut fields = Vec::with_capacity(to_add.len());
475 for column_id in to_add {
476 let column = expect.column_by_id(*column_id).unwrap();
478 fields.push((
479 *column_id,
480 SortField::new(column.column_schema.data_type.clone()),
481 ));
482 let default_value = column
483 .column_schema
484 .create_default()
485 .context(CreateDefaultSnafu {
486 region_id: expect.region_id,
487 column: &column.column_schema.name,
488 })?
489 .with_context(|| CompatReaderSnafu {
490 region_id: expect.region_id,
491 reason: format!(
492 "key column {} does not have a default value to read",
493 column.column_schema.name
494 ),
495 })?;
496 values.push((*column_id, default_value));
497 }
498 debug_assert!(!fields.is_empty());
500
501 let converter = Some(build_primary_key_codec_with_fields(
503 expect.primary_key_encoding,
504 fields.into_iter(),
505 ));
506
507 Ok(Self {
508 rewriter,
509 converter,
510 values,
511 })
512 }
513
514 fn compat(&self, batch: RecordBatch) -> Result<RecordBatch> {
518 if let Some(rewriter) = &self.rewriter {
519 return rewriter.rewrite_key(&self.values, batch);
521 }
522
523 self.append_key(batch)
524 }
525
526 fn append_key(&self, batch: RecordBatch) -> Result<RecordBatch> {
528 let Some(converter) = &self.converter else {
529 return Ok(batch);
530 };
531
532 let old_pk_dict_array = batch
533 .column(primary_key_column_index(batch.num_columns()))
534 .as_any()
535 .downcast_ref::<PrimaryKeyArray>()
536 .unwrap();
537 let old_pk_values_array = old_pk_dict_array
538 .values()
539 .as_any()
540 .downcast_ref::<BinaryArray>()
541 .unwrap();
542 let mut builder = BinaryBuilder::with_capacity(
543 old_pk_values_array.len(),
544 old_pk_values_array.value_data().len()
545 + converter.estimated_size().unwrap_or_default() * old_pk_values_array.len(),
546 );
547
548 let mut buffer = Vec::with_capacity(
550 old_pk_values_array.value_data().len() / old_pk_values_array.len().max(1)
551 + converter.estimated_size().unwrap_or_default(),
552 );
553
554 for value in old_pk_values_array.iter() {
556 let Some(old_pk) = value else {
557 builder.append_null();
558 continue;
559 };
560
561 buffer.clear();
562 buffer.extend_from_slice(old_pk);
563 converter
564 .encode_values(&self.values, &mut buffer)
565 .context(EncodeSnafu)?;
566
567 builder.append_value(&buffer);
568 }
569
570 let new_pk_values_array = Arc::new(builder.finish());
571 let new_pk_dict_array =
572 PrimaryKeyArray::new(old_pk_dict_array.keys().clone(), new_pk_values_array);
573
574 let mut columns = batch.columns().to_vec();
576 columns[primary_key_column_index(batch.num_columns())] = Arc::new(new_pk_dict_array);
577
578 RecordBatch::try_new(batch.schema(), columns).context(NewRecordBatchSnafu)
579 }
580}
581
582#[cfg(test)]
583mod tests {
584 use std::sync::Arc;
585
586 use api::v1::{OpType, SemanticType};
587 use datatypes::arrow::array::{
588 ArrayRef, BinaryDictionaryBuilder, Int64Array, StringDictionaryBuilder,
589 TimestampMillisecondArray, UInt8Array, UInt64Array,
590 };
591 use datatypes::arrow::datatypes::UInt32Type;
592 use datatypes::arrow::record_batch::RecordBatch;
593 use datatypes::prelude::ConcreteDataType;
594 use datatypes::schema::ColumnSchema;
595 use datatypes::value::ValueRef;
596 use mito_codec::row_converter::{
597 DensePrimaryKeyCodec, PrimaryKeyCodecExt, SparsePrimaryKeyCodec,
598 };
599 use store_api::codec::PrimaryKeyEncoding;
600 use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
601 use store_api::storage::RegionId;
602
603 use super::*;
604 use crate::read::flat_projection::FlatProjectionMapper;
605 use crate::sst::parquet::flat_format::FlatReadFormat;
606 use crate::sst::{FlatSchemaOptions, to_flat_sst_arrow_schema};
607
608 fn new_metadata(
610 semantic_types: &[(ColumnId, SemanticType, ConcreteDataType)],
611 primary_key: &[ColumnId],
612 ) -> RegionMetadata {
613 let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
614 for (id, semantic_type, data_type) in semantic_types {
615 let column_schema = match semantic_type {
616 SemanticType::Tag => {
617 ColumnSchema::new(format!("tag_{id}"), data_type.clone(), true)
618 }
619 SemanticType::Field => {
620 ColumnSchema::new(format!("field_{id}"), data_type.clone(), true)
621 }
622 SemanticType::Timestamp => ColumnSchema::new("ts", data_type.clone(), false),
623 };
624
625 builder.push_column_metadata(ColumnMetadata {
626 column_schema,
627 semantic_type: *semantic_type,
628 column_id: *id,
629 });
630 }
631 builder.primary_key(primary_key.to_vec());
632 builder.build().unwrap()
633 }
634
635 fn encode_key(keys: &[Option<&str>]) -> Vec<u8> {
637 let fields = (0..keys.len())
638 .map(|_| (0, SortField::new(ConcreteDataType::string_datatype())))
639 .collect();
640 let converter = DensePrimaryKeyCodec::with_fields(fields);
641 let row = keys.iter().map(|str_opt| match str_opt {
642 Some(v) => ValueRef::String(v),
643 None => ValueRef::Null,
644 });
645
646 converter.encode(row).unwrap()
647 }
648
649 fn encode_sparse_key(keys: &[(ColumnId, Option<&str>)]) -> Vec<u8> {
651 let fields = (0..keys.len())
652 .map(|_| (1, SortField::new(ConcreteDataType::string_datatype())))
653 .collect();
654 let converter = SparsePrimaryKeyCodec::with_fields(fields);
655 let row = keys
656 .iter()
657 .map(|(id, str_opt)| match str_opt {
658 Some(v) => (*id, ValueRef::String(v)),
659 None => (*id, ValueRef::Null),
660 })
661 .collect::<Vec<_>>();
662 let mut buffer = vec![];
663 converter.encode_value_refs(&row, &mut buffer).unwrap();
664 buffer
665 }
666
667 fn build_flat_test_pk_array(primary_keys: &[&[u8]]) -> ArrayRef {
669 let mut builder = BinaryDictionaryBuilder::<UInt32Type>::new();
670 for &pk in primary_keys {
671 builder.append(pk).unwrap();
672 }
673 Arc::new(builder.finish())
674 }
675
676 #[test]
677 fn test_flat_compat_batch_with_missing_columns() {
678 let actual_metadata = Arc::new(new_metadata(
679 &[
680 (
681 0,
682 SemanticType::Timestamp,
683 ConcreteDataType::timestamp_millisecond_datatype(),
684 ),
685 (1, SemanticType::Tag, ConcreteDataType::string_datatype()),
686 (2, SemanticType::Field, ConcreteDataType::int64_datatype()),
687 ],
688 &[1],
689 ));
690
691 let expected_metadata = Arc::new(new_metadata(
692 &[
693 (
694 0,
695 SemanticType::Timestamp,
696 ConcreteDataType::timestamp_millisecond_datatype(),
697 ),
698 (1, SemanticType::Tag, ConcreteDataType::string_datatype()),
699 (2, SemanticType::Field, ConcreteDataType::int64_datatype()),
700 (3, SemanticType::Field, ConcreteDataType::int64_datatype()),
702 ],
703 &[1],
704 ));
705
706 let mapper = FlatProjectionMapper::all(&expected_metadata).unwrap();
707 let read_format = FlatReadFormat::new(
708 actual_metadata.clone(),
709 [0, 1, 2, 3].into_iter(),
710 None,
711 "test",
712 false,
713 )
714 .unwrap();
715 let format_projection = read_format.format_projection();
716
717 let compat_batch =
718 FlatCompatBatch::try_new(&mapper, &actual_metadata, format_projection, false)
719 .unwrap()
720 .unwrap();
721
722 let mut tag_builder = StringDictionaryBuilder::<UInt32Type>::new();
723 tag_builder.append_value("tag1");
724 tag_builder.append_value("tag1");
725 let tag_dict_array = Arc::new(tag_builder.finish());
726
727 let k1 = encode_key(&[Some("tag1")]);
728 let input_columns: Vec<ArrayRef> = vec![
729 tag_dict_array.clone(),
730 Arc::new(Int64Array::from(vec![100, 200])),
731 Arc::new(TimestampMillisecondArray::from_iter_values([1000, 2000])),
732 build_flat_test_pk_array(&[&k1, &k1]),
733 Arc::new(UInt64Array::from_iter_values([1, 2])),
734 Arc::new(UInt8Array::from_iter_values([
735 OpType::Put as u8,
736 OpType::Put as u8,
737 ])),
738 ];
739 let input_schema =
740 to_flat_sst_arrow_schema(&actual_metadata, &FlatSchemaOptions::default());
741 let input_batch = RecordBatch::try_new(input_schema, input_columns).unwrap();
742
743 let result = compat_batch.compat(input_batch).unwrap();
744
745 let expected_schema =
746 to_flat_sst_arrow_schema(&expected_metadata, &FlatSchemaOptions::default());
747
748 let expected_columns: Vec<ArrayRef> = vec![
749 tag_dict_array.clone(),
750 Arc::new(Int64Array::from(vec![100, 200])),
751 Arc::new(Int64Array::from(vec![None::<i64>, None::<i64>])),
752 Arc::new(TimestampMillisecondArray::from_iter_values([1000, 2000])),
753 build_flat_test_pk_array(&[&k1, &k1]),
754 Arc::new(UInt64Array::from_iter_values([1, 2])),
755 Arc::new(UInt8Array::from_iter_values([
756 OpType::Put as u8,
757 OpType::Put as u8,
758 ])),
759 ];
760 let expected_batch = RecordBatch::try_new(expected_schema, expected_columns).unwrap();
761
762 assert_eq!(expected_batch, result);
763 }
764
765 #[test]
766 fn test_flat_compat_batch_with_read_projection_superset() {
767 let actual_metadata = Arc::new(new_metadata(
768 &[
769 (
770 0,
771 SemanticType::Timestamp,
772 ConcreteDataType::timestamp_millisecond_datatype(),
773 ),
774 (1, SemanticType::Tag, ConcreteDataType::string_datatype()),
775 (2, SemanticType::Field, ConcreteDataType::int64_datatype()),
776 ],
777 &[1],
778 ));
779
780 let expected_metadata = Arc::new(new_metadata(
781 &[
782 (
783 0,
784 SemanticType::Timestamp,
785 ConcreteDataType::timestamp_millisecond_datatype(),
786 ),
787 (1, SemanticType::Tag, ConcreteDataType::string_datatype()),
788 (2, SemanticType::Field, ConcreteDataType::int64_datatype()),
789 (3, SemanticType::Field, ConcreteDataType::int64_datatype()),
791 ],
792 &[1],
793 ));
794
795 let mapper = FlatProjectionMapper::new_with_read_columns(
797 &expected_metadata,
798 vec![1, 2],
799 vec![1, 2, 3],
800 )
801 .unwrap();
802 let read_format = FlatReadFormat::new(
803 actual_metadata.clone(),
804 [1, 2, 3].into_iter(),
805 None,
806 "test",
807 false,
808 )
809 .unwrap();
810 let format_projection = read_format.format_projection();
811
812 let compat_batch =
813 FlatCompatBatch::try_new(&mapper, &actual_metadata, format_projection, false)
814 .unwrap()
815 .unwrap();
816
817 let mut tag_builder = StringDictionaryBuilder::<UInt32Type>::new();
818 tag_builder.append_value("tag1");
819 tag_builder.append_value("tag1");
820 let tag_dict_array = Arc::new(tag_builder.finish());
821
822 let k1 = encode_key(&[Some("tag1")]);
823 let input_columns: Vec<ArrayRef> = vec![
824 tag_dict_array.clone(),
825 Arc::new(Int64Array::from(vec![100, 200])),
826 Arc::new(TimestampMillisecondArray::from_iter_values([1000, 2000])),
827 build_flat_test_pk_array(&[&k1, &k1]),
828 Arc::new(UInt64Array::from_iter_values([1, 2])),
829 Arc::new(UInt8Array::from_iter_values([
830 OpType::Put as u8,
831 OpType::Put as u8,
832 ])),
833 ];
834 let input_schema =
835 to_flat_sst_arrow_schema(&actual_metadata, &FlatSchemaOptions::default());
836 let input_batch = RecordBatch::try_new(input_schema, input_columns).unwrap();
837
838 let result = compat_batch.compat(input_batch).unwrap();
839
840 let expected_schema =
841 to_flat_sst_arrow_schema(&expected_metadata, &FlatSchemaOptions::default());
842 let expected_columns: Vec<ArrayRef> = vec![
843 tag_dict_array.clone(),
844 Arc::new(Int64Array::from(vec![100, 200])),
845 Arc::new(Int64Array::from(vec![None::<i64>, None::<i64>])),
846 Arc::new(TimestampMillisecondArray::from_iter_values([1000, 2000])),
847 build_flat_test_pk_array(&[&k1, &k1]),
848 Arc::new(UInt64Array::from_iter_values([1, 2])),
849 Arc::new(UInt8Array::from_iter_values([
850 OpType::Put as u8,
851 OpType::Put as u8,
852 ])),
853 ];
854 let expected_batch = RecordBatch::try_new(expected_schema, expected_columns).unwrap();
855
856 assert_eq!(expected_batch, result);
857 }
858
859 #[test]
860 fn test_flat_compat_batch_with_different_pk_encoding() {
861 let mut actual_metadata = new_metadata(
862 &[
863 (
864 0,
865 SemanticType::Timestamp,
866 ConcreteDataType::timestamp_millisecond_datatype(),
867 ),
868 (1, SemanticType::Tag, ConcreteDataType::string_datatype()),
869 (2, SemanticType::Field, ConcreteDataType::int64_datatype()),
870 ],
871 &[1],
872 );
873 actual_metadata.primary_key_encoding = PrimaryKeyEncoding::Dense;
874 let actual_metadata = Arc::new(actual_metadata);
875
876 let mut expected_metadata = new_metadata(
877 &[
878 (
879 0,
880 SemanticType::Timestamp,
881 ConcreteDataType::timestamp_millisecond_datatype(),
882 ),
883 (1, SemanticType::Tag, ConcreteDataType::string_datatype()),
884 (2, SemanticType::Field, ConcreteDataType::int64_datatype()),
885 (3, SemanticType::Tag, ConcreteDataType::string_datatype()),
886 ],
887 &[1, 3],
888 );
889 expected_metadata.primary_key_encoding = PrimaryKeyEncoding::Sparse;
890 let expected_metadata = Arc::new(expected_metadata);
891
892 let mapper = FlatProjectionMapper::all(&expected_metadata).unwrap();
893 let read_format = FlatReadFormat::new(
894 actual_metadata.clone(),
895 [0, 1, 2, 3].into_iter(),
896 None,
897 "test",
898 false,
899 )
900 .unwrap();
901 let format_projection = read_format.format_projection();
902
903 let compat_batch =
904 FlatCompatBatch::try_new(&mapper, &actual_metadata, format_projection, false)
905 .unwrap()
906 .unwrap();
907
908 let mut tag1_builder = StringDictionaryBuilder::<UInt32Type>::new();
910 tag1_builder.append_value("tag1");
911 tag1_builder.append_value("tag1");
912 let tag1_dict_array = Arc::new(tag1_builder.finish());
913
914 let k1 = encode_key(&[Some("tag1")]);
915 let input_columns: Vec<ArrayRef> = vec![
916 tag1_dict_array.clone(),
917 Arc::new(Int64Array::from(vec![100, 200])),
918 Arc::new(TimestampMillisecondArray::from_iter_values([1000, 2000])),
919 build_flat_test_pk_array(&[&k1, &k1]),
920 Arc::new(UInt64Array::from_iter_values([1, 2])),
921 Arc::new(UInt8Array::from_iter_values([
922 OpType::Put as u8,
923 OpType::Put as u8,
924 ])),
925 ];
926 let input_schema =
927 to_flat_sst_arrow_schema(&actual_metadata, &FlatSchemaOptions::default());
928 let input_batch = RecordBatch::try_new(input_schema, input_columns).unwrap();
929
930 let result = compat_batch.compat(input_batch).unwrap();
931
932 let sparse_k1 = encode_sparse_key(&[(1, Some("tag1")), (3, None)]);
933 let mut null_tag_builder = StringDictionaryBuilder::<UInt32Type>::new();
934 null_tag_builder.append_nulls(2);
935 let null_tag_dict_array = Arc::new(null_tag_builder.finish());
936 let expected_columns: Vec<ArrayRef> = vec![
937 tag1_dict_array.clone(),
938 null_tag_dict_array,
939 Arc::new(Int64Array::from(vec![100, 200])),
940 Arc::new(TimestampMillisecondArray::from_iter_values([1000, 2000])),
941 build_flat_test_pk_array(&[&sparse_k1, &sparse_k1]),
942 Arc::new(UInt64Array::from_iter_values([1, 2])),
943 Arc::new(UInt8Array::from_iter_values([
944 OpType::Put as u8,
945 OpType::Put as u8,
946 ])),
947 ];
948 let output_schema =
949 to_flat_sst_arrow_schema(&expected_metadata, &FlatSchemaOptions::default());
950 let expected_batch = RecordBatch::try_new(output_schema, expected_columns).unwrap();
951
952 assert_eq!(expected_batch, result);
953 }
954
955 #[test]
956 fn test_flat_compat_batch_compact_sparse() {
957 let mut actual_metadata = new_metadata(
958 &[
959 (
960 0,
961 SemanticType::Timestamp,
962 ConcreteDataType::timestamp_millisecond_datatype(),
963 ),
964 (2, SemanticType::Field, ConcreteDataType::int64_datatype()),
965 ],
966 &[],
967 );
968 actual_metadata.primary_key_encoding = PrimaryKeyEncoding::Sparse;
969 let actual_metadata = Arc::new(actual_metadata);
970
971 let mut expected_metadata = new_metadata(
972 &[
973 (
974 0,
975 SemanticType::Timestamp,
976 ConcreteDataType::timestamp_millisecond_datatype(),
977 ),
978 (2, SemanticType::Field, ConcreteDataType::int64_datatype()),
979 (3, SemanticType::Field, ConcreteDataType::int64_datatype()),
980 ],
981 &[],
982 );
983 expected_metadata.primary_key_encoding = PrimaryKeyEncoding::Sparse;
984 let expected_metadata = Arc::new(expected_metadata);
985
986 let mapper = FlatProjectionMapper::all(&expected_metadata).unwrap();
987 let read_format = FlatReadFormat::new(
988 actual_metadata.clone(),
989 [0, 2, 3].into_iter(),
990 None,
991 "test",
992 true,
993 )
994 .unwrap();
995 let format_projection = read_format.format_projection();
996
997 let compat_batch =
998 FlatCompatBatch::try_new(&mapper, &actual_metadata, format_projection, true)
999 .unwrap()
1000 .unwrap();
1001
1002 let sparse_k1 = encode_sparse_key(&[]);
1003 let input_columns: Vec<ArrayRef> = vec![
1004 Arc::new(Int64Array::from(vec![100, 200])),
1005 Arc::new(TimestampMillisecondArray::from_iter_values([1000, 2000])),
1006 build_flat_test_pk_array(&[&sparse_k1, &sparse_k1]),
1007 Arc::new(UInt64Array::from_iter_values([1, 2])),
1008 Arc::new(UInt8Array::from_iter_values([
1009 OpType::Put as u8,
1010 OpType::Put as u8,
1011 ])),
1012 ];
1013 let input_schema =
1014 to_flat_sst_arrow_schema(&actual_metadata, &FlatSchemaOptions::default());
1015 let input_batch = RecordBatch::try_new(input_schema, input_columns).unwrap();
1016
1017 let result = compat_batch.compat(input_batch).unwrap();
1018
1019 let expected_columns: Vec<ArrayRef> = vec![
1020 Arc::new(Int64Array::from(vec![100, 200])),
1021 Arc::new(Int64Array::from(vec![None::<i64>, None::<i64>])),
1022 Arc::new(TimestampMillisecondArray::from_iter_values([1000, 2000])),
1023 build_flat_test_pk_array(&[&sparse_k1, &sparse_k1]),
1024 Arc::new(UInt64Array::from_iter_values([1, 2])),
1025 Arc::new(UInt8Array::from_iter_values([
1026 OpType::Put as u8,
1027 OpType::Put as u8,
1028 ])),
1029 ];
1030 let output_schema =
1031 to_flat_sst_arrow_schema(&expected_metadata, &FlatSchemaOptions::default());
1032 let expected_batch = RecordBatch::try_new(output_schema, expected_columns).unwrap();
1033
1034 assert_eq!(expected_batch, result);
1035 }
1036}