Skip to main content

mito_codec/row_converter/
sparse.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{HashMap, HashSet};
16use std::sync::Arc;
17
18use bytes::BufMut;
19use common_recordbatch::filter::SimpleFilterEvaluator;
20use datatypes::prelude::ConcreteDataType;
21use datatypes::value::{Value, ValueRef};
22use memcomparable::{Deserializer, Serializer};
23use serde::{Deserialize, Serialize};
24use snafu::ResultExt;
25use store_api::codec::PrimaryKeyEncoding;
26use store_api::metadata::RegionMetadataRef;
27use store_api::storage::ColumnId;
28use store_api::storage::consts::ReservedColumnId;
29
30use crate::error::{DeserializeFieldSnafu, Result, SerializeFieldSnafu, UnsupportedOperationSnafu};
31use crate::key_values::KeyValue;
32use crate::primary_key_filter::SparsePrimaryKeyFilter;
33use crate::row_converter::dense::SortField;
34use crate::row_converter::{CompositeValues, PrimaryKeyCodec, PrimaryKeyFilter};
35
36/// A codec for sparse key of metrics.
37/// It requires the input primary key columns are sorted by the column name in lexicographical order.
38/// It encodes the column id of the physical region.
39#[derive(Clone, Debug)]
40pub struct SparsePrimaryKeyCodec {
41    inner: Arc<SparsePrimaryKeyCodecInner>,
42}
43
44#[derive(Debug)]
45struct SparsePrimaryKeyCodecInner {
46    // Internal fields
47    table_id_field: SortField,
48    // Internal fields
49    tsid_field: SortField,
50    // User defined label field
51    label_field: SortField,
52    // Columns in primary key
53    //
54    // None means all unknown columns is primary key(`Self::label_field`).
55    columns: Option<HashSet<ColumnId>>,
56}
57
58/// Sparse values representation.
59///
60/// A map of [`ColumnId`] to [`Value`].
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub struct SparseValues {
63    values: HashMap<ColumnId, Value>,
64}
65
66impl SparseValues {
67    /// Creates a new [`SparseValues`] instance.
68    pub fn new(values: HashMap<ColumnId, Value>) -> Self {
69        Self { values }
70    }
71
72    /// Returns the value of the given column, or [`Value::Null`] if the column is not present.
73    pub fn get_or_null(&self, column_id: ColumnId) -> &Value {
74        self.values.get(&column_id).unwrap_or(&Value::Null)
75    }
76
77    /// Returns the value of the given column, or [`None`] if the column is not present.
78    pub fn get(&self, column_id: &ColumnId) -> Option<&Value> {
79        self.values.get(column_id)
80    }
81
82    /// Inserts a new value into the [`SparseValues`].
83    pub fn insert(&mut self, column_id: ColumnId, value: Value) {
84        self.values.insert(column_id, value);
85    }
86
87    /// Returns an iterator over all stored column id/value pairs.
88    pub fn iter(&self) -> impl Iterator<Item = (&ColumnId, &Value)> {
89        self.values.iter()
90    }
91}
92
93/// The column id of the tsid.
94pub const RESERVED_COLUMN_ID_TSID: ColumnId = ReservedColumnId::tsid();
95/// The column id of the table id.
96pub const RESERVED_COLUMN_ID_TABLE_ID: ColumnId = ReservedColumnId::table_id();
97/// The size of the column id in the encoded sparse row.
98pub const COLUMN_ID_ENCODE_SIZE: usize = 4;
99
100impl SparsePrimaryKeyCodec {
101    /// Creates a new [`SparsePrimaryKeyCodec`] instance.
102    pub fn from_columns(columns_ids: impl Iterator<Item = ColumnId>) -> Self {
103        let columns = columns_ids.collect();
104        Self {
105            inner: Arc::new(SparsePrimaryKeyCodecInner {
106                table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
107                tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
108                label_field: SortField::new(ConcreteDataType::string_datatype()),
109                columns: Some(columns),
110            }),
111        }
112    }
113
114    /// Creates a new [`SparsePrimaryKeyCodec`] instance.
115    pub fn new(region_metadata: &RegionMetadataRef) -> Self {
116        Self::from_columns(region_metadata.primary_key_columns().map(|c| c.column_id))
117    }
118
119    /// Returns a new [`SparsePrimaryKeyCodec`] instance.
120    ///
121    /// It treats all unknown columns as primary key(label field).
122    pub fn schemaless() -> Self {
123        Self {
124            inner: Arc::new(SparsePrimaryKeyCodecInner {
125                table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
126                tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
127                label_field: SortField::new(ConcreteDataType::string_datatype()),
128                columns: None,
129            }),
130        }
131    }
132
133    /// Creates a new [`SparsePrimaryKeyCodec`] instance with additional label `fields`.
134    pub fn with_fields(fields: Vec<(ColumnId, SortField)>) -> Self {
135        Self {
136            inner: Arc::new(SparsePrimaryKeyCodecInner {
137                columns: Some(fields.iter().map(|f| f.0).collect()),
138                table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
139                tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
140                label_field: SortField::new(ConcreteDataType::string_datatype()),
141            }),
142        }
143    }
144
145    /// Returns the field of the given column id.
146    fn get_field(&self, column_id: ColumnId) -> Option<&SortField> {
147        // if the `columns` is not specified, all unknown columns is primary key(label field).
148        if let Some(columns) = &self.inner.columns
149            && !columns.contains(&column_id)
150        {
151            return None;
152        }
153
154        match column_id {
155            RESERVED_COLUMN_ID_TABLE_ID => Some(&self.inner.table_id_field),
156            RESERVED_COLUMN_ID_TSID => Some(&self.inner.tsid_field),
157            _ => Some(&self.inner.label_field),
158        }
159    }
160
161    /// Encodes the given bytes into a [`SparseValues`].
162    pub fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
163    where
164        I: Iterator<Item = (ColumnId, ValueRef<'a>)>,
165    {
166        let mut serializer = Serializer::new(buffer);
167        for (column_id, value) in row {
168            if value.is_null() {
169                continue;
170            }
171
172            if let Some(field) = self.get_field(column_id) {
173                column_id
174                    .serialize(&mut serializer)
175                    .context(SerializeFieldSnafu)?;
176                field.serialize(&mut serializer, &value)?;
177            } else {
178                // TODO(weny): handle the error.
179                common_telemetry::warn!("Column {} is not in primary key, skipping", column_id);
180            }
181        }
182        Ok(())
183    }
184
185    pub fn encode_raw_tag_value<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
186    where
187        I: Iterator<Item = (ColumnId, &'a [u8])>,
188    {
189        for (tag_column_id, tag_value) in row {
190            let value_len = tag_value.len();
191            buffer.reserve(6 + value_len / 8 * 9);
192            buffer.put_u32(tag_column_id);
193            buffer.put_u8(1);
194            buffer.put_u8(!tag_value.is_empty() as u8);
195
196            // Manual implementation of memcomparable::ser::Serializer::serialize_bytes
197            // to avoid byte-by-byte put.
198            let mut len = 0;
199            let num_chucks = value_len / 8;
200            let remainder = value_len % 8;
201
202            for idx in 0..num_chucks {
203                buffer.extend_from_slice(&tag_value[idx * 8..idx * 8 + 8]);
204                len += 8;
205                // append an extra byte that signals the number of significant bytes in this chunk
206                // 1-8: many bytes were significant and this group is the last group
207                // 9: all 8 bytes were significant and there is more data to come
208                let extra = if len == value_len { 8 } else { 9 };
209                buffer.put_u8(extra);
210            }
211
212            if remainder != 0 {
213                buffer.extend_from_slice(&tag_value[len..value_len]);
214                buffer.put_bytes(0, 8 - remainder);
215                buffer.put_u8(remainder as u8);
216            }
217        }
218        Ok(())
219    }
220
221    /// Encodes the given bytes into a [`SparseValues`].
222    pub fn encode_internal(&self, table_id: u32, tsid: u64, buffer: &mut Vec<u8>) -> Result<()> {
223        buffer.reserve_exact(22);
224        buffer.put_u32(RESERVED_COLUMN_ID_TABLE_ID);
225        buffer.put_u8(1);
226        buffer.put_u32(table_id);
227        buffer.put_u32(RESERVED_COLUMN_ID_TSID);
228        buffer.put_u8(1);
229        buffer.put_u64(tsid);
230        Ok(())
231    }
232
233    /// Decodes the given bytes into a [`SparseValues`].
234    fn decode_sparse(&self, bytes: &[u8]) -> Result<SparseValues> {
235        let mut deserializer = Deserializer::new(bytes);
236        let mut values = SparseValues::new(HashMap::new());
237
238        let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
239        let value = self.inner.table_id_field.deserialize(&mut deserializer)?;
240        values.insert(column_id, value);
241
242        let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
243        let value = self.inner.tsid_field.deserialize(&mut deserializer)?;
244        values.insert(column_id, value);
245        while deserializer.has_remaining() {
246            let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
247            let value = self.inner.label_field.deserialize(&mut deserializer)?;
248            values.insert(column_id, value);
249        }
250
251        Ok(values)
252    }
253
254    /// Decodes the given bytes into a [`Value`].
255    fn decode_leftmost(&self, bytes: &[u8]) -> Result<Option<Value>> {
256        let mut deserializer = Deserializer::new(bytes);
257        // Skip the column id.
258        deserializer.advance(COLUMN_ID_ENCODE_SIZE);
259        let value = self.inner.table_id_field.deserialize(&mut deserializer)?;
260        Ok(Some(value))
261    }
262
263    /// Returns the offset of the given column id in the given primary key.
264    pub fn has_column(
265        &self,
266        pk: &[u8],
267        offsets_map: &mut HashMap<u32, usize>,
268        column_id: ColumnId,
269    ) -> Option<usize> {
270        if offsets_map.is_empty() {
271            let mut deserializer = Deserializer::new(pk);
272            let mut offset = 0;
273            while deserializer.has_remaining() {
274                let column_id = u32::deserialize(&mut deserializer).unwrap();
275                offset += 4;
276                offsets_map.insert(column_id, offset);
277                let Some(field) = self.get_field(column_id) else {
278                    break;
279                };
280
281                let skip = field.skip_deserialize(pk, &mut deserializer).unwrap();
282                offset += skip;
283            }
284
285            offsets_map.get(&column_id).copied()
286        } else {
287            offsets_map.get(&column_id).copied()
288        }
289    }
290
291    /// Decode value at `offset` in `pk`.
292    pub fn decode_value_at(&self, pk: &[u8], offset: usize, column_id: ColumnId) -> Result<Value> {
293        let mut deserializer = Deserializer::new(pk);
294        deserializer.advance(offset);
295        // Safety: checked by `has_column`
296        let field = self.get_field(column_id).unwrap();
297        field.deserialize(&mut deserializer)
298    }
299
300    /// Returns the encoded bytes of the given `column_id` in `pk`.
301    ///
302    /// Returns `Ok(None)` if the `column_id` is missing in `pk`.
303    pub fn encoded_value_for_column<'a>(
304        &self,
305        pk: &'a [u8],
306        offsets_map: &mut HashMap<u32, usize>,
307        column_id: ColumnId,
308    ) -> Result<Option<&'a [u8]>> {
309        let Some(offset) = self.has_column(pk, offsets_map, column_id) else {
310            return Ok(None);
311        };
312
313        let Some(field) = self.get_field(column_id) else {
314            return Ok(None);
315        };
316
317        let mut deserializer = Deserializer::new(pk);
318        deserializer.advance(offset);
319        let len = field.skip_deserialize(pk, &mut deserializer)?;
320        Ok(Some(&pk[offset..offset + len]))
321    }
322}
323
324impl PrimaryKeyCodec for SparsePrimaryKeyCodec {
325    fn encode_key_value(&self, _key_value: &KeyValue, _buffer: &mut Vec<u8>) -> Result<()> {
326        UnsupportedOperationSnafu {
327            err_msg: "The encode_key_value method is not supported in SparsePrimaryKeyCodec.",
328        }
329        .fail()
330    }
331
332    fn encode_values(&self, values: &[(ColumnId, Value)], buffer: &mut Vec<u8>) -> Result<()> {
333        self.encode_to_vec(values.iter().map(|v| (v.0, v.1.as_value_ref())), buffer)
334    }
335
336    fn encode_value_refs(
337        &self,
338        values: &[(ColumnId, ValueRef)],
339        buffer: &mut Vec<u8>,
340    ) -> Result<()> {
341        self.encode_to_vec(values.iter().map(|v| (v.0, v.1.clone())), buffer)
342    }
343
344    fn estimated_size(&self) -> Option<usize> {
345        None
346    }
347
348    fn num_fields(&self) -> Option<usize> {
349        None
350    }
351
352    fn encoding(&self) -> PrimaryKeyEncoding {
353        PrimaryKeyEncoding::Sparse
354    }
355
356    fn primary_key_filter(
357        &self,
358        metadata: &RegionMetadataRef,
359        filters: Arc<Vec<SimpleFilterEvaluator>>,
360        skip_partition_column: bool,
361    ) -> Box<dyn PrimaryKeyFilter> {
362        Box::new(SparsePrimaryKeyFilter::new(
363            metadata.clone(),
364            filters,
365            self.clone(),
366            skip_partition_column,
367        ))
368    }
369
370    fn decode(&self, bytes: &[u8]) -> Result<CompositeValues> {
371        Ok(CompositeValues::Sparse(self.decode_sparse(bytes)?))
372    }
373
374    fn decode_leftmost(&self, bytes: &[u8]) -> Result<Option<Value>> {
375        self.decode_leftmost(bytes)
376    }
377}
378
379/// Field with column id.
380pub struct FieldWithId {
381    pub field: SortField,
382    pub column_id: ColumnId,
383}
384
385/// A special encoder for memtable.
386pub struct SparseEncoder {
387    fields: Vec<FieldWithId>,
388}
389
390impl SparseEncoder {
391    pub fn new(fields: Vec<FieldWithId>) -> Self {
392        Self { fields }
393    }
394
395    pub fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
396    where
397        I: Iterator<Item = ValueRef<'a>>,
398    {
399        let mut serializer = Serializer::new(buffer);
400        for (value, field) in row.zip(self.fields.iter()) {
401            if !value.is_null() {
402                field
403                    .column_id
404                    .serialize(&mut serializer)
405                    .context(SerializeFieldSnafu)?;
406                field.field.serialize(&mut serializer, &value)?;
407            }
408        }
409        Ok(())
410    }
411}
412
413#[cfg(test)]
414mod tests {
415    use std::sync::Arc;
416
417    use api::v1::SemanticType;
418    use common_query::prelude::{greptime_timestamp, greptime_value};
419    use common_time::Timestamp;
420    use common_time::timestamp::TimeUnit;
421    use datatypes::schema::ColumnSchema;
422    use datatypes::value::{OrderedFloat, Value};
423    use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
424    use store_api::metric_engine_consts::{
425        DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME,
426    };
427    use store_api::storage::{ColumnId, RegionId};
428
429    use super::*;
430
431    fn test_region_metadata() -> RegionMetadataRef {
432        let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
433        builder
434            .push_column_metadata(ColumnMetadata {
435                column_schema: ColumnSchema::new(
436                    DATA_SCHEMA_TABLE_ID_COLUMN_NAME,
437                    ConcreteDataType::uint32_datatype(),
438                    false,
439                ),
440                semantic_type: SemanticType::Tag,
441                column_id: ReservedColumnId::table_id(),
442            })
443            .push_column_metadata(ColumnMetadata {
444                column_schema: ColumnSchema::new(
445                    DATA_SCHEMA_TSID_COLUMN_NAME,
446                    ConcreteDataType::uint64_datatype(),
447                    false,
448                ),
449                semantic_type: SemanticType::Tag,
450                column_id: ReservedColumnId::tsid(),
451            })
452            .push_column_metadata(ColumnMetadata {
453                column_schema: ColumnSchema::new("pod", ConcreteDataType::string_datatype(), true),
454                semantic_type: SemanticType::Tag,
455                column_id: 1,
456            })
457            .push_column_metadata(ColumnMetadata {
458                column_schema: ColumnSchema::new(
459                    "namespace",
460                    ConcreteDataType::string_datatype(),
461                    true,
462                ),
463                semantic_type: SemanticType::Tag,
464                column_id: 2,
465            })
466            .push_column_metadata(ColumnMetadata {
467                column_schema: ColumnSchema::new(
468                    "container",
469                    ConcreteDataType::string_datatype(),
470                    true,
471                ),
472                semantic_type: SemanticType::Tag,
473                column_id: 3,
474            })
475            .push_column_metadata(ColumnMetadata {
476                column_schema: ColumnSchema::new(
477                    "pod_name",
478                    ConcreteDataType::string_datatype(),
479                    true,
480                ),
481                semantic_type: SemanticType::Tag,
482                column_id: 4,
483            })
484            .push_column_metadata(ColumnMetadata {
485                column_schema: ColumnSchema::new(
486                    "pod_ip",
487                    ConcreteDataType::string_datatype(),
488                    true,
489                ),
490                semantic_type: SemanticType::Tag,
491                column_id: 5,
492            })
493            .push_column_metadata(ColumnMetadata {
494                column_schema: ColumnSchema::new(
495                    greptime_value(),
496                    ConcreteDataType::float64_datatype(),
497                    false,
498                ),
499                semantic_type: SemanticType::Field,
500                column_id: 6,
501            })
502            .push_column_metadata(ColumnMetadata {
503                column_schema: ColumnSchema::new(
504                    greptime_timestamp(),
505                    ConcreteDataType::timestamp_nanosecond_datatype(),
506                    false,
507                ),
508                semantic_type: SemanticType::Timestamp,
509                column_id: 7,
510            })
511            .primary_key(vec![
512                ReservedColumnId::table_id(),
513                ReservedColumnId::tsid(),
514                1,
515                2,
516                3,
517                4,
518                5,
519            ]);
520        let metadata = builder.build().unwrap();
521        Arc::new(metadata)
522    }
523
524    #[test]
525    fn test_sparse_value_new_and_get_or_null() {
526        let mut values = HashMap::new();
527        values.insert(1, Value::Int32(42));
528        let sparse_value = SparseValues::new(values);
529
530        assert_eq!(sparse_value.get_or_null(1), &Value::Int32(42));
531        assert_eq!(sparse_value.get_or_null(2), &Value::Null);
532    }
533
534    #[test]
535    fn test_sparse_value_insert() {
536        let mut sparse_value = SparseValues::new(HashMap::new());
537        sparse_value.insert(1, Value::Int32(42));
538
539        assert_eq!(sparse_value.get_or_null(1), &Value::Int32(42));
540    }
541
542    fn test_row() -> Vec<(ColumnId, ValueRef<'static>)> {
543        vec![
544            (RESERVED_COLUMN_ID_TABLE_ID, ValueRef::UInt32(42)),
545            (
546                RESERVED_COLUMN_ID_TSID,
547                ValueRef::UInt64(123843349035232323),
548            ),
549            // label: pod
550            (1, ValueRef::String("greptime-frontend-6989d9899-22222")),
551            // label: namespace
552            (2, ValueRef::String("greptime-cluster")),
553            // label: container
554            (3, ValueRef::String("greptime-frontend-6989d9899-22222")),
555            // label: pod_name
556            (4, ValueRef::String("greptime-frontend-6989d9899-22222")),
557            // label: pod_ip
558            (5, ValueRef::String("10.10.10.10")),
559            // field: greptime_value
560            (6, ValueRef::Float64(OrderedFloat(1.0))),
561            // field: greptime_timestamp
562            (
563                7,
564                ValueRef::Timestamp(Timestamp::new(1618876800000000000, TimeUnit::Nanosecond)),
565            ),
566        ]
567    }
568
569    #[test]
570    fn test_encode_by_short_cuts() {
571        let region_metadata = test_region_metadata();
572        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
573        let mut buffer = Vec::new();
574        let internal_columns = [
575            (RESERVED_COLUMN_ID_TABLE_ID, ValueRef::UInt32(1024)),
576            (RESERVED_COLUMN_ID_TSID, ValueRef::UInt64(42)),
577        ];
578        let tags = [
579            (1, "greptime-frontend-6989d9899-22222"),
580            (2, "greptime-cluster"),
581            (3, "greptime-frontend-6989d9899-22222"),
582            (4, "greptime-frontend-6989d9899-22222"),
583            (5, "10.10.10.10"),
584        ];
585        codec
586            .encode_to_vec(internal_columns.into_iter(), &mut buffer)
587            .unwrap();
588        codec
589            .encode_to_vec(
590                tags.iter()
591                    .map(|(col_id, tag_value)| (*col_id, ValueRef::String(tag_value))),
592                &mut buffer,
593            )
594            .unwrap();
595
596        let mut buffer_by_raw_encoding = Vec::new();
597        codec
598            .encode_internal(1024, 42, &mut buffer_by_raw_encoding)
599            .unwrap();
600        let tags: Vec<_> = tags
601            .into_iter()
602            .map(|(col_id, tag_value)| (col_id, tag_value.as_bytes()))
603            .collect();
604        codec
605            .encode_raw_tag_value(
606                tags.iter().map(|(c, b)| (*c, *b)),
607                &mut buffer_by_raw_encoding,
608            )
609            .unwrap();
610        assert_eq!(buffer, buffer_by_raw_encoding);
611    }
612
613    #[test]
614    fn test_encode_to_vec() {
615        let region_metadata = test_region_metadata();
616        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
617        let mut buffer = Vec::new();
618
619        let row = test_row();
620        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
621        assert!(!buffer.is_empty());
622        let sparse_value = codec.decode_sparse(&buffer).unwrap();
623        assert_eq!(
624            sparse_value.get_or_null(RESERVED_COLUMN_ID_TABLE_ID),
625            &Value::UInt32(42)
626        );
627        assert_eq!(
628            sparse_value.get_or_null(1),
629            &Value::String("greptime-frontend-6989d9899-22222".into())
630        );
631        assert_eq!(
632            sparse_value.get_or_null(2),
633            &Value::String("greptime-cluster".into())
634        );
635        assert_eq!(
636            sparse_value.get_or_null(3),
637            &Value::String("greptime-frontend-6989d9899-22222".into())
638        );
639        assert_eq!(
640            sparse_value.get_or_null(4),
641            &Value::String("greptime-frontend-6989d9899-22222".into())
642        );
643        assert_eq!(
644            sparse_value.get_or_null(5),
645            &Value::String("10.10.10.10".into())
646        );
647    }
648
649    #[test]
650    fn test_decode_leftmost() {
651        let region_metadata = test_region_metadata();
652        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
653        let mut buffer = Vec::new();
654        let row = test_row();
655        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
656        assert!(!buffer.is_empty());
657        let result = codec.decode_leftmost(&buffer).unwrap().unwrap();
658        assert_eq!(result, Value::UInt32(42));
659    }
660
661    #[test]
662    fn test_has_column() {
663        let region_metadata = test_region_metadata();
664        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
665        let mut buffer = Vec::new();
666        let row = test_row();
667        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
668        assert!(!buffer.is_empty());
669
670        let mut offsets_map = HashMap::new();
671        for column_id in [
672            RESERVED_COLUMN_ID_TABLE_ID,
673            RESERVED_COLUMN_ID_TSID,
674            1,
675            2,
676            3,
677            4,
678            5,
679        ] {
680            let offset = codec.has_column(&buffer, &mut offsets_map, column_id);
681            assert!(offset.is_some());
682        }
683
684        let offset = codec.has_column(&buffer, &mut offsets_map, 6);
685        assert!(offset.is_none());
686    }
687
688    #[test]
689    fn test_decode_value_at() {
690        let region_metadata = test_region_metadata();
691        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
692        let mut buffer = Vec::new();
693        let row = test_row();
694        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
695        assert!(!buffer.is_empty());
696
697        let row = test_row();
698        let mut offsets_map = HashMap::new();
699        for column_id in [
700            RESERVED_COLUMN_ID_TABLE_ID,
701            RESERVED_COLUMN_ID_TSID,
702            1,
703            2,
704            3,
705            4,
706            5,
707        ] {
708            let offset = codec
709                .has_column(&buffer, &mut offsets_map, column_id)
710                .unwrap();
711            let value = codec.decode_value_at(&buffer, offset, column_id).unwrap();
712            let expected_value = row
713                .iter()
714                .find(|(id, _)| *id == column_id)
715                .unwrap()
716                .1
717                .clone();
718            assert_eq!(value.as_value_ref(), expected_value);
719        }
720    }
721
722    #[test]
723    fn test_encoded_value_for_column() {
724        let region_metadata = test_region_metadata();
725        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
726        let mut buffer = Vec::new();
727        let row = test_row();
728        codec
729            .encode_to_vec(row.clone().into_iter(), &mut buffer)
730            .unwrap();
731        assert!(!buffer.is_empty());
732
733        let mut offsets_map = HashMap::new();
734        for column_id in [
735            RESERVED_COLUMN_ID_TABLE_ID,
736            RESERVED_COLUMN_ID_TSID,
737            1,
738            2,
739            3,
740            4,
741            5,
742        ] {
743            let encoded_value = codec
744                .encoded_value_for_column(&buffer, &mut offsets_map, column_id)
745                .unwrap()
746                .unwrap();
747            let expected_value = row
748                .iter()
749                .find(|(id, _)| *id == column_id)
750                .unwrap()
751                .1
752                .clone();
753            let data_type = match column_id {
754                RESERVED_COLUMN_ID_TABLE_ID => ConcreteDataType::uint32_datatype(),
755                RESERVED_COLUMN_ID_TSID => ConcreteDataType::uint64_datatype(),
756                _ => ConcreteDataType::string_datatype(),
757            };
758            let field = SortField::new(data_type);
759            let mut expected_encoded = Vec::new();
760            let mut serializer = Serializer::new(&mut expected_encoded);
761            field.serialize(&mut serializer, &expected_value).unwrap();
762            assert_eq!(encoded_value, expected_encoded.as_slice());
763        }
764
765        for column_id in [6_u32, 7_u32, 999_u32] {
766            let encoded_value = codec
767                .encoded_value_for_column(&buffer, &mut offsets_map, column_id)
768                .unwrap();
769            assert!(encoded_value.is_none());
770        }
771    }
772}