Skip to main content

mito_codec/row_converter/
sparse.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{HashMap, HashSet};
16use std::sync::Arc;
17
18use bytes::BufMut;
19use common_recordbatch::filter::SimpleFilterEvaluator;
20use datatypes::prelude::ConcreteDataType;
21use datatypes::value::{Value, ValueRef};
22use memcomparable::{Deserializer, Serializer};
23use serde::{Deserialize, Serialize};
24use snafu::ResultExt;
25use store_api::codec::PrimaryKeyEncoding;
26use store_api::metadata::RegionMetadataRef;
27use store_api::storage::ColumnId;
28use store_api::storage::consts::ReservedColumnId;
29
30use crate::error::{DeserializeFieldSnafu, Result, SerializeFieldSnafu, UnsupportedOperationSnafu};
31use crate::key_values::KeyValue;
32use crate::primary_key_filter::SparsePrimaryKeyFilter;
33use crate::row_converter::dense::SortField;
34use crate::row_converter::{CompositeValues, PrimaryKeyCodec, PrimaryKeyFilter};
35
36/// A codec for sparse key of metrics.
37///
38/// ## Encoding format
39/// Each primary key is encoded as a sequence of `(column_id, value)` pairs:
40/// - The first two fields are always the reserved `table_id` (uint32) and `tsid` (uint64).
41/// - User-defined labels follow, sorted by **column name** in lexicographical order.
42/// - Null values are omitted (not encoded).
43///
44/// The `column_id` is encoded as a 4-byte big-endian integer, and the value is encoded
45/// using memcomparable serialization.
46///
47/// `decode_leftmost` always decodes the first value from the encoded bytes (i.e., the
48/// `table_id` field).
49///
50/// ## Requirements
51/// It requires the input primary key columns are sorted by the column name in lexicographical order.
52/// It encodes the column id of the physical region.
53#[derive(Clone, Debug)]
54pub struct SparsePrimaryKeyCodec {
55    inner: Arc<SparsePrimaryKeyCodecInner>,
56}
57
58#[derive(Debug)]
59struct SparsePrimaryKeyCodecInner {
60    // Internal fields
61    table_id_field: SortField,
62    // Internal fields
63    tsid_field: SortField,
64    // User defined label field
65    label_field: SortField,
66    // Columns in primary key
67    //
68    // None means all unknown columns is primary key(`Self::label_field`).
69    columns: Option<HashSet<ColumnId>>,
70}
71
72/// Sparse values representation.
73///
74/// A map of [`ColumnId`] to [`Value`].
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub struct SparseValues {
77    values: HashMap<ColumnId, Value>,
78}
79
80impl SparseValues {
81    /// Creates a new [`SparseValues`] instance.
82    pub fn new(values: HashMap<ColumnId, Value>) -> Self {
83        Self { values }
84    }
85
86    /// Returns the value of the given column, or [`Value::Null`] if the column is not present.
87    pub fn get_or_null(&self, column_id: ColumnId) -> &Value {
88        self.values.get(&column_id).unwrap_or(&Value::Null)
89    }
90
91    /// Returns the value of the given column, or [`None`] if the column is not present.
92    pub fn get(&self, column_id: &ColumnId) -> Option<&Value> {
93        self.values.get(column_id)
94    }
95
96    /// Inserts a new value into the [`SparseValues`].
97    pub fn insert(&mut self, column_id: ColumnId, value: Value) {
98        self.values.insert(column_id, value);
99    }
100
101    /// Returns an iterator over all stored column id/value pairs.
102    pub fn iter(&self) -> impl Iterator<Item = (&ColumnId, &Value)> {
103        self.values.iter()
104    }
105}
106
107/// The column id of the tsid.
108pub const RESERVED_COLUMN_ID_TSID: ColumnId = ReservedColumnId::tsid();
109/// The column id of the table id.
110pub const RESERVED_COLUMN_ID_TABLE_ID: ColumnId = ReservedColumnId::table_id();
111/// The size of the column id in the encoded sparse row.
112pub const COLUMN_ID_ENCODE_SIZE: usize = 4;
113
114impl SparsePrimaryKeyCodec {
115    /// Creates a new [`SparsePrimaryKeyCodec`] instance.
116    pub fn from_columns(columns_ids: impl Iterator<Item = ColumnId>) -> Self {
117        let columns = columns_ids.collect();
118        Self {
119            inner: Arc::new(SparsePrimaryKeyCodecInner {
120                table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
121                tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
122                label_field: SortField::new(ConcreteDataType::string_datatype()),
123                columns: Some(columns),
124            }),
125        }
126    }
127
128    /// Creates a new [`SparsePrimaryKeyCodec`] instance.
129    pub fn new(region_metadata: &RegionMetadataRef) -> Self {
130        Self::from_columns(region_metadata.primary_key_columns().map(|c| c.column_id))
131    }
132
133    /// Returns a new [`SparsePrimaryKeyCodec`] instance.
134    ///
135    /// It treats all unknown columns as primary key(label field).
136    pub fn schemaless() -> Self {
137        Self {
138            inner: Arc::new(SparsePrimaryKeyCodecInner {
139                table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
140                tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
141                label_field: SortField::new(ConcreteDataType::string_datatype()),
142                columns: None,
143            }),
144        }
145    }
146
147    /// Creates a new [`SparsePrimaryKeyCodec`] instance with additional label `fields`.
148    pub fn with_fields(fields: Vec<(ColumnId, SortField)>) -> Self {
149        Self {
150            inner: Arc::new(SparsePrimaryKeyCodecInner {
151                columns: Some(fields.iter().map(|f| f.0).collect()),
152                table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
153                tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
154                label_field: SortField::new(ConcreteDataType::string_datatype()),
155            }),
156        }
157    }
158
159    /// Returns the field of the given column id.
160    fn get_field(&self, column_id: ColumnId) -> Option<&SortField> {
161        // if the `columns` is not specified, all unknown columns is primary key(label field).
162        if let Some(columns) = &self.inner.columns
163            && !columns.contains(&column_id)
164        {
165            return None;
166        }
167
168        match column_id {
169            RESERVED_COLUMN_ID_TABLE_ID => Some(&self.inner.table_id_field),
170            RESERVED_COLUMN_ID_TSID => Some(&self.inner.tsid_field),
171            _ => Some(&self.inner.label_field),
172        }
173    }
174
175    /// Encodes the given bytes into a [`SparseValues`].
176    pub fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
177    where
178        I: Iterator<Item = (ColumnId, ValueRef<'a>)>,
179    {
180        let mut serializer = Serializer::new(buffer);
181        for (column_id, value) in row {
182            if value.is_null() {
183                continue;
184            }
185
186            if let Some(field) = self.get_field(column_id) {
187                column_id
188                    .serialize(&mut serializer)
189                    .context(SerializeFieldSnafu)?;
190                field.serialize(&mut serializer, &value)?;
191            } else {
192                // TODO(weny): handle the error.
193                common_telemetry::warn!("Column {} is not in primary key, skipping", column_id);
194            }
195        }
196        Ok(())
197    }
198
199    pub fn encode_raw_tag_value<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
200    where
201        I: Iterator<Item = (ColumnId, &'a [u8])>,
202    {
203        for (tag_column_id, tag_value) in row {
204            let value_len = tag_value.len();
205            buffer.reserve(6 + value_len / 8 * 9);
206            buffer.put_u32(tag_column_id);
207            buffer.put_u8(1);
208            buffer.put_u8(!tag_value.is_empty() as u8);
209
210            // Manual implementation of memcomparable::ser::Serializer::serialize_bytes
211            // to avoid byte-by-byte put.
212            let mut len = 0;
213            let num_chucks = value_len / 8;
214            let remainder = value_len % 8;
215
216            for idx in 0..num_chucks {
217                buffer.extend_from_slice(&tag_value[idx * 8..idx * 8 + 8]);
218                len += 8;
219                // append an extra byte that signals the number of significant bytes in this chunk
220                // 1-8: many bytes were significant and this group is the last group
221                // 9: all 8 bytes were significant and there is more data to come
222                let extra = if len == value_len { 8 } else { 9 };
223                buffer.put_u8(extra);
224            }
225
226            if remainder != 0 {
227                buffer.extend_from_slice(&tag_value[len..value_len]);
228                buffer.put_bytes(0, 8 - remainder);
229                buffer.put_u8(remainder as u8);
230            }
231        }
232        Ok(())
233    }
234
235    /// Encodes the given bytes into a [`SparseValues`].
236    pub fn encode_internal(&self, table_id: u32, tsid: u64, buffer: &mut Vec<u8>) -> Result<()> {
237        buffer.reserve_exact(22);
238        buffer.put_u32(RESERVED_COLUMN_ID_TABLE_ID);
239        buffer.put_u8(1);
240        buffer.put_u32(table_id);
241        buffer.put_u32(RESERVED_COLUMN_ID_TSID);
242        buffer.put_u8(1);
243        buffer.put_u64(tsid);
244        Ok(())
245    }
246
247    /// Decodes the given bytes into a [`SparseValues`].
248    fn decode_sparse(&self, bytes: &[u8]) -> Result<SparseValues> {
249        let mut deserializer = Deserializer::new(bytes);
250        let mut values = SparseValues::new(HashMap::new());
251
252        let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
253        let value = self.inner.table_id_field.deserialize(&mut deserializer)?;
254        values.insert(column_id, value);
255
256        let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
257        let value = self.inner.tsid_field.deserialize(&mut deserializer)?;
258        values.insert(column_id, value);
259        while deserializer.has_remaining() {
260            let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
261            let value = self.inner.label_field.deserialize(&mut deserializer)?;
262            values.insert(column_id, value);
263        }
264
265        Ok(values)
266    }
267
268    /// Decodes the given bytes into a [`Value`].
269    fn decode_leftmost(&self, bytes: &[u8]) -> Result<Option<Value>> {
270        let mut deserializer = Deserializer::new(bytes);
271        // Skip the column id.
272        deserializer.advance(COLUMN_ID_ENCODE_SIZE);
273        let value = self.inner.table_id_field.deserialize(&mut deserializer)?;
274        Ok(Some(value))
275    }
276
277    /// Returns the offset of the given column id in the given primary key.
278    pub fn has_column(
279        &self,
280        pk: &[u8],
281        offsets_map: &mut HashMap<u32, usize>,
282        column_id: ColumnId,
283    ) -> Option<usize> {
284        if offsets_map.is_empty() {
285            let mut deserializer = Deserializer::new(pk);
286            let mut offset = 0;
287            while deserializer.has_remaining() {
288                let column_id = u32::deserialize(&mut deserializer).unwrap();
289                offset += 4;
290                offsets_map.insert(column_id, offset);
291                let Some(field) = self.get_field(column_id) else {
292                    break;
293                };
294
295                let skip = field.skip_deserialize(pk, &mut deserializer).unwrap();
296                offset += skip;
297            }
298
299            offsets_map.get(&column_id).copied()
300        } else {
301            offsets_map.get(&column_id).copied()
302        }
303    }
304
305    /// Decode value at `offset` in `pk`.
306    pub fn decode_value_at(&self, pk: &[u8], offset: usize, column_id: ColumnId) -> Result<Value> {
307        let mut deserializer = Deserializer::new(pk);
308        deserializer.advance(offset);
309        // Safety: checked by `has_column`
310        let field = self.get_field(column_id).unwrap();
311        field.deserialize(&mut deserializer)
312    }
313
314    /// Returns the encoded bytes of the given `column_id` in `pk`.
315    ///
316    /// Returns `Ok(None)` if the `column_id` is missing in `pk`.
317    pub fn encoded_value_for_column<'a>(
318        &self,
319        pk: &'a [u8],
320        offsets_map: &mut HashMap<u32, usize>,
321        column_id: ColumnId,
322    ) -> Result<Option<&'a [u8]>> {
323        let Some(offset) = self.has_column(pk, offsets_map, column_id) else {
324            return Ok(None);
325        };
326
327        let Some(field) = self.get_field(column_id) else {
328            return Ok(None);
329        };
330
331        let mut deserializer = Deserializer::new(pk);
332        deserializer.advance(offset);
333        let len = field.skip_deserialize(pk, &mut deserializer)?;
334        Ok(Some(&pk[offset..offset + len]))
335    }
336}
337
338impl PrimaryKeyCodec for SparsePrimaryKeyCodec {
339    fn encode_key_value(&self, _key_value: &KeyValue, _buffer: &mut Vec<u8>) -> Result<()> {
340        UnsupportedOperationSnafu {
341            err_msg: "The encode_key_value method is not supported in SparsePrimaryKeyCodec.",
342        }
343        .fail()
344    }
345
346    fn encode_values(&self, values: &[(ColumnId, Value)], buffer: &mut Vec<u8>) -> Result<()> {
347        self.encode_to_vec(values.iter().map(|v| (v.0, v.1.as_value_ref())), buffer)
348    }
349
350    fn encode_value_refs(
351        &self,
352        values: &[(ColumnId, ValueRef)],
353        buffer: &mut Vec<u8>,
354    ) -> Result<()> {
355        self.encode_to_vec(values.iter().map(|v| (v.0, v.1.clone())), buffer)
356    }
357
358    fn estimated_size(&self) -> Option<usize> {
359        None
360    }
361
362    fn num_fields(&self) -> Option<usize> {
363        None
364    }
365
366    fn encoding(&self) -> PrimaryKeyEncoding {
367        PrimaryKeyEncoding::Sparse
368    }
369
370    fn primary_key_filter(
371        &self,
372        metadata: &RegionMetadataRef,
373        filters: Arc<Vec<SimpleFilterEvaluator>>,
374        skip_partition_column: bool,
375    ) -> Box<dyn PrimaryKeyFilter> {
376        Box::new(SparsePrimaryKeyFilter::new(
377            metadata.clone(),
378            filters,
379            self.clone(),
380            skip_partition_column,
381        ))
382    }
383
384    fn decode(&self, bytes: &[u8]) -> Result<CompositeValues> {
385        Ok(CompositeValues::Sparse(self.decode_sparse(bytes)?))
386    }
387
388    fn decode_leftmost(&self, bytes: &[u8]) -> Result<Option<Value>> {
389        self.decode_leftmost(bytes)
390    }
391}
392
393/// Field with column id.
394pub struct FieldWithId {
395    pub field: SortField,
396    pub column_id: ColumnId,
397}
398
399/// A special encoder for memtable.
400pub struct SparseEncoder {
401    fields: Vec<FieldWithId>,
402}
403
404impl SparseEncoder {
405    pub fn new(fields: Vec<FieldWithId>) -> Self {
406        Self { fields }
407    }
408
409    pub fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
410    where
411        I: Iterator<Item = ValueRef<'a>>,
412    {
413        let mut serializer = Serializer::new(buffer);
414        for (value, field) in row.zip(self.fields.iter()) {
415            if !value.is_null() {
416                field
417                    .column_id
418                    .serialize(&mut serializer)
419                    .context(SerializeFieldSnafu)?;
420                field.field.serialize(&mut serializer, &value)?;
421            }
422        }
423        Ok(())
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use std::sync::Arc;
430
431    use api::v1::SemanticType;
432    use common_query::prelude::{greptime_timestamp, greptime_value};
433    use common_time::Timestamp;
434    use common_time::timestamp::TimeUnit;
435    use datatypes::schema::ColumnSchema;
436    use datatypes::value::{OrderedFloat, Value};
437    use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
438    use store_api::metric_engine_consts::{
439        DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME,
440    };
441    use store_api::storage::{ColumnId, RegionId};
442
443    use super::*;
444
445    fn test_region_metadata() -> RegionMetadataRef {
446        let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
447        builder
448            .push_column_metadata(ColumnMetadata {
449                column_schema: ColumnSchema::new(
450                    DATA_SCHEMA_TABLE_ID_COLUMN_NAME,
451                    ConcreteDataType::uint32_datatype(),
452                    false,
453                ),
454                semantic_type: SemanticType::Tag,
455                column_id: ReservedColumnId::table_id(),
456            })
457            .push_column_metadata(ColumnMetadata {
458                column_schema: ColumnSchema::new(
459                    DATA_SCHEMA_TSID_COLUMN_NAME,
460                    ConcreteDataType::uint64_datatype(),
461                    false,
462                ),
463                semantic_type: SemanticType::Tag,
464                column_id: ReservedColumnId::tsid(),
465            })
466            .push_column_metadata(ColumnMetadata {
467                column_schema: ColumnSchema::new("pod", ConcreteDataType::string_datatype(), true),
468                semantic_type: SemanticType::Tag,
469                column_id: 1,
470            })
471            .push_column_metadata(ColumnMetadata {
472                column_schema: ColumnSchema::new(
473                    "namespace",
474                    ConcreteDataType::string_datatype(),
475                    true,
476                ),
477                semantic_type: SemanticType::Tag,
478                column_id: 2,
479            })
480            .push_column_metadata(ColumnMetadata {
481                column_schema: ColumnSchema::new(
482                    "container",
483                    ConcreteDataType::string_datatype(),
484                    true,
485                ),
486                semantic_type: SemanticType::Tag,
487                column_id: 3,
488            })
489            .push_column_metadata(ColumnMetadata {
490                column_schema: ColumnSchema::new(
491                    "pod_name",
492                    ConcreteDataType::string_datatype(),
493                    true,
494                ),
495                semantic_type: SemanticType::Tag,
496                column_id: 4,
497            })
498            .push_column_metadata(ColumnMetadata {
499                column_schema: ColumnSchema::new(
500                    "pod_ip",
501                    ConcreteDataType::string_datatype(),
502                    true,
503                ),
504                semantic_type: SemanticType::Tag,
505                column_id: 5,
506            })
507            .push_column_metadata(ColumnMetadata {
508                column_schema: ColumnSchema::new(
509                    greptime_value(),
510                    ConcreteDataType::float64_datatype(),
511                    false,
512                ),
513                semantic_type: SemanticType::Field,
514                column_id: 6,
515            })
516            .push_column_metadata(ColumnMetadata {
517                column_schema: ColumnSchema::new(
518                    greptime_timestamp(),
519                    ConcreteDataType::timestamp_nanosecond_datatype(),
520                    false,
521                ),
522                semantic_type: SemanticType::Timestamp,
523                column_id: 7,
524            })
525            .primary_key(vec![
526                ReservedColumnId::table_id(),
527                ReservedColumnId::tsid(),
528                1,
529                2,
530                3,
531                4,
532                5,
533            ]);
534        let metadata = builder.build().unwrap();
535        Arc::new(metadata)
536    }
537
538    #[test]
539    fn test_sparse_value_new_and_get_or_null() {
540        let mut values = HashMap::new();
541        values.insert(1, Value::Int32(42));
542        let sparse_value = SparseValues::new(values);
543
544        assert_eq!(sparse_value.get_or_null(1), &Value::Int32(42));
545        assert_eq!(sparse_value.get_or_null(2), &Value::Null);
546    }
547
548    #[test]
549    fn test_sparse_value_insert() {
550        let mut sparse_value = SparseValues::new(HashMap::new());
551        sparse_value.insert(1, Value::Int32(42));
552
553        assert_eq!(sparse_value.get_or_null(1), &Value::Int32(42));
554    }
555
556    fn test_row() -> Vec<(ColumnId, ValueRef<'static>)> {
557        vec![
558            (RESERVED_COLUMN_ID_TABLE_ID, ValueRef::UInt32(42)),
559            (
560                RESERVED_COLUMN_ID_TSID,
561                ValueRef::UInt64(123843349035232323),
562            ),
563            // label: pod
564            (1, ValueRef::String("greptime-frontend-6989d9899-22222")),
565            // label: namespace
566            (2, ValueRef::String("greptime-cluster")),
567            // label: container
568            (3, ValueRef::String("greptime-frontend-6989d9899-22222")),
569            // label: pod_name
570            (4, ValueRef::String("greptime-frontend-6989d9899-22222")),
571            // label: pod_ip
572            (5, ValueRef::String("10.10.10.10")),
573            // field: greptime_value
574            (6, ValueRef::Float64(OrderedFloat(1.0))),
575            // field: greptime_timestamp
576            (
577                7,
578                ValueRef::Timestamp(Timestamp::new(1618876800000000000, TimeUnit::Nanosecond)),
579            ),
580        ]
581    }
582
583    #[test]
584    fn test_encode_by_short_cuts() {
585        let region_metadata = test_region_metadata();
586        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
587        let mut buffer = Vec::new();
588        let internal_columns = [
589            (RESERVED_COLUMN_ID_TABLE_ID, ValueRef::UInt32(1024)),
590            (RESERVED_COLUMN_ID_TSID, ValueRef::UInt64(42)),
591        ];
592        let tags = [
593            (1, "greptime-frontend-6989d9899-22222"),
594            (2, "greptime-cluster"),
595            (3, "greptime-frontend-6989d9899-22222"),
596            (4, "greptime-frontend-6989d9899-22222"),
597            (5, "10.10.10.10"),
598        ];
599        codec
600            .encode_to_vec(internal_columns.into_iter(), &mut buffer)
601            .unwrap();
602        codec
603            .encode_to_vec(
604                tags.iter()
605                    .map(|(col_id, tag_value)| (*col_id, ValueRef::String(tag_value))),
606                &mut buffer,
607            )
608            .unwrap();
609
610        let mut buffer_by_raw_encoding = Vec::new();
611        codec
612            .encode_internal(1024, 42, &mut buffer_by_raw_encoding)
613            .unwrap();
614        let tags: Vec<_> = tags
615            .into_iter()
616            .map(|(col_id, tag_value)| (col_id, tag_value.as_bytes()))
617            .collect();
618        codec
619            .encode_raw_tag_value(
620                tags.iter().map(|(c, b)| (*c, *b)),
621                &mut buffer_by_raw_encoding,
622            )
623            .unwrap();
624        assert_eq!(buffer, buffer_by_raw_encoding);
625    }
626
627    #[test]
628    fn test_encode_to_vec() {
629        let region_metadata = test_region_metadata();
630        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
631        let mut buffer = Vec::new();
632
633        let row = test_row();
634        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
635        assert!(!buffer.is_empty());
636        let sparse_value = codec.decode_sparse(&buffer).unwrap();
637        assert_eq!(
638            sparse_value.get_or_null(RESERVED_COLUMN_ID_TABLE_ID),
639            &Value::UInt32(42)
640        );
641        assert_eq!(
642            sparse_value.get_or_null(1),
643            &Value::String("greptime-frontend-6989d9899-22222".into())
644        );
645        assert_eq!(
646            sparse_value.get_or_null(2),
647            &Value::String("greptime-cluster".into())
648        );
649        assert_eq!(
650            sparse_value.get_or_null(3),
651            &Value::String("greptime-frontend-6989d9899-22222".into())
652        );
653        assert_eq!(
654            sparse_value.get_or_null(4),
655            &Value::String("greptime-frontend-6989d9899-22222".into())
656        );
657        assert_eq!(
658            sparse_value.get_or_null(5),
659            &Value::String("10.10.10.10".into())
660        );
661    }
662
663    #[test]
664    fn test_decode_leftmost() {
665        let region_metadata = test_region_metadata();
666        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
667        let mut buffer = Vec::new();
668        let row = test_row();
669        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
670        assert!(!buffer.is_empty());
671        let result = codec.decode_leftmost(&buffer).unwrap().unwrap();
672        assert_eq!(result, Value::UInt32(42));
673    }
674
675    #[test]
676    fn test_has_column() {
677        let region_metadata = test_region_metadata();
678        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
679        let mut buffer = Vec::new();
680        let row = test_row();
681        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
682        assert!(!buffer.is_empty());
683
684        let mut offsets_map = HashMap::new();
685        for column_id in [
686            RESERVED_COLUMN_ID_TABLE_ID,
687            RESERVED_COLUMN_ID_TSID,
688            1,
689            2,
690            3,
691            4,
692            5,
693        ] {
694            let offset = codec.has_column(&buffer, &mut offsets_map, column_id);
695            assert!(offset.is_some());
696        }
697
698        let offset = codec.has_column(&buffer, &mut offsets_map, 6);
699        assert!(offset.is_none());
700    }
701
702    #[test]
703    fn test_decode_value_at() {
704        let region_metadata = test_region_metadata();
705        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
706        let mut buffer = Vec::new();
707        let row = test_row();
708        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
709        assert!(!buffer.is_empty());
710
711        let row = test_row();
712        let mut offsets_map = HashMap::new();
713        for column_id in [
714            RESERVED_COLUMN_ID_TABLE_ID,
715            RESERVED_COLUMN_ID_TSID,
716            1,
717            2,
718            3,
719            4,
720            5,
721        ] {
722            let offset = codec
723                .has_column(&buffer, &mut offsets_map, column_id)
724                .unwrap();
725            let value = codec.decode_value_at(&buffer, offset, column_id).unwrap();
726            let expected_value = row
727                .iter()
728                .find(|(id, _)| *id == column_id)
729                .unwrap()
730                .1
731                .clone();
732            assert_eq!(value.as_value_ref(), expected_value);
733        }
734    }
735
736    #[test]
737    fn test_encoded_value_for_column() {
738        let region_metadata = test_region_metadata();
739        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
740        let mut buffer = Vec::new();
741        let row = test_row();
742        codec
743            .encode_to_vec(row.clone().into_iter(), &mut buffer)
744            .unwrap();
745        assert!(!buffer.is_empty());
746
747        let mut offsets_map = HashMap::new();
748        for column_id in [
749            RESERVED_COLUMN_ID_TABLE_ID,
750            RESERVED_COLUMN_ID_TSID,
751            1,
752            2,
753            3,
754            4,
755            5,
756        ] {
757            let encoded_value = codec
758                .encoded_value_for_column(&buffer, &mut offsets_map, column_id)
759                .unwrap()
760                .unwrap();
761            let expected_value = row
762                .iter()
763                .find(|(id, _)| *id == column_id)
764                .unwrap()
765                .1
766                .clone();
767            let data_type = match column_id {
768                RESERVED_COLUMN_ID_TABLE_ID => ConcreteDataType::uint32_datatype(),
769                RESERVED_COLUMN_ID_TSID => ConcreteDataType::uint64_datatype(),
770                _ => ConcreteDataType::string_datatype(),
771            };
772            let field = SortField::new(data_type);
773            let mut expected_encoded = Vec::new();
774            let mut serializer = Serializer::new(&mut expected_encoded);
775            field.serialize(&mut serializer, &expected_value).unwrap();
776            assert_eq!(encoded_value, expected_encoded.as_slice());
777        }
778
779        for column_id in [6_u32, 7_u32, 999_u32] {
780            let encoded_value = codec
781                .encoded_value_for_column(&buffer, &mut offsets_map, column_id)
782                .unwrap();
783            assert!(encoded_value.is_none());
784        }
785    }
786}