Skip to main content

mito_codec/row_converter/
sparse.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{HashMap, HashSet};
16use std::sync::Arc;
17
18use bytes::BufMut;
19use common_recordbatch::filter::SimpleFilterEvaluator;
20use datatypes::prelude::ConcreteDataType;
21use datatypes::value::{Value, ValueRef};
22use memcomparable::{Deserializer, Serializer};
23use serde::{Deserialize, Serialize};
24use snafu::ResultExt;
25use store_api::codec::PrimaryKeyEncoding;
26use store_api::metadata::RegionMetadataRef;
27use store_api::storage::ColumnId;
28use store_api::storage::consts::ReservedColumnId;
29
30use crate::error::{DeserializeFieldSnafu, Result, SerializeFieldSnafu, UnsupportedOperationSnafu};
31use crate::key_values::KeyValue;
32use crate::primary_key_filter::SparsePrimaryKeyFilter;
33use crate::row_converter::dense::SortField;
34use crate::row_converter::{CompositeValues, PrimaryKeyCodec, PrimaryKeyFilter};
35
36/// A codec for sparse key of metrics.
37///
38/// ## Encoding format
39/// Each primary key is encoded as a sequence of `(column_id, value)` pairs:
40/// - The first two fields are always the reserved `table_id` (uint32) and `tsid` (uint64).
41/// - User-defined labels follow, sorted by **column name** in lexicographical order.
42/// - Null values are omitted (not encoded).
43///
44/// The `column_id` is encoded as a 4-byte big-endian integer, and the value is encoded
45/// using memcomparable serialization.
46///
47/// `decode_leftmost` always decodes the first value from the encoded bytes (i.e., the
48/// `table_id` field).
49///
50/// ## Requirements
51/// It requires the input primary key columns are sorted by the column name in lexicographical order.
52/// It encodes the column id of the physical region.
53#[derive(Clone, Debug)]
54pub struct SparsePrimaryKeyCodec {
55    inner: Arc<SparsePrimaryKeyCodecInner>,
56}
57
58#[derive(Debug)]
59struct SparsePrimaryKeyCodecInner {
60    // Internal fields
61    table_id_field: SortField,
62    // Internal fields
63    tsid_field: SortField,
64    // User defined label field
65    label_field: SortField,
66    // Columns in primary key
67    //
68    // None means all unknown columns is primary key(`Self::label_field`).
69    columns: Option<HashSet<ColumnId>>,
70}
71
72/// Sparse values representation.
73///
74/// Callers must not insert a column id that is already present; otherwise
75/// the existing entry will shadow the newly inserted value on lookup.
76#[derive(Debug, Clone, PartialEq, Eq, Default)]
77pub struct SparseValues {
78    values: Vec<(ColumnId, Value)>,
79}
80
81impl SparseValues {
82    /// Creates an empty [`SparseValues`].
83    pub fn new() -> Self {
84        Self { values: Vec::new() }
85    }
86
87    /// Creates an empty [`SparseValues`] with space reserved for `cap` entries.
88    pub fn with_capacity(cap: usize) -> Self {
89        Self {
90            values: Vec::with_capacity(cap),
91        }
92    }
93
94    /// Returns the value of the given column, or [`Value::Null`] if the column is not present.
95    pub fn get_or_null(&self, column_id: ColumnId) -> &Value {
96        for (id, value) in &self.values {
97            if *id == column_id {
98                return value;
99            }
100        }
101        &Value::Null
102    }
103
104    /// Returns the value of the given column, or [`None`] if the column is not present.
105    pub fn get(&self, column_id: &ColumnId) -> Option<&Value> {
106        for (id, value) in &self.values {
107            if id == column_id {
108                return Some(value);
109            }
110        }
111        None
112    }
113
114    /// Appends a new `(column_id, value)` pair.
115    ///
116    /// Append-only: the caller must ensure `column_id` is not already present.
117    pub fn insert(&mut self, column_id: ColumnId, value: Value) {
118        self.values.push((column_id, value));
119    }
120
121    /// Returns an iterator over all stored column id/value pairs.
122    pub fn iter(&self) -> impl Iterator<Item = (&ColumnId, &Value)> {
123        self.values.iter().map(|(id, value)| (id, value))
124    }
125}
126
127/// The column id of the tsid.
128pub const RESERVED_COLUMN_ID_TSID: ColumnId = ReservedColumnId::tsid();
129/// The column id of the table id.
130pub const RESERVED_COLUMN_ID_TABLE_ID: ColumnId = ReservedColumnId::table_id();
131/// The size of the column id in the encoded sparse row.
132pub const COLUMN_ID_ENCODE_SIZE: usize = 4;
133
134// Fixed byte offsets for reserved columns in the sparse encoding.
135// Layout: [table_id_col_id: 4B][marker: 1B][table_id: 4B][tsid_col_id: 4B][marker: 1B][tsid: 8B]
136/// Byte offset to the table_id value (after its 4-byte column id).
137const TABLE_ID_VALUE_OFFSET: usize = COLUMN_ID_ENCODE_SIZE;
138/// Byte offset to the tsid value (after 9-byte table_id entry + 4-byte tsid column id).
139const TSID_VALUE_OFFSET: usize = COLUMN_ID_ENCODE_SIZE + 5 + COLUMN_ID_ENCODE_SIZE;
140/// Byte offset where tag columns start (after 9-byte table_id + 13-byte tsid entries).
141const TAGS_START_OFFSET: usize = COLUMN_ID_ENCODE_SIZE + 5 + COLUMN_ID_ENCODE_SIZE + 9;
142
143/// Inline capacity for the small-vec fast path of [`SparseOffsetsCache`].
144///
145/// Most sparse primary keys carry only a handful of tags, so a linear scan
146/// over a short `Vec` beats a `HashMap` lookup. Tags beyond this capacity
147/// spill into the overflow `HashMap`.
148const SPARSE_OFFSETS_INLINE_CAP: usize = 32;
149
150/// A lazily populated cache of tag column offsets inside a sparse primary key.
151#[derive(Debug, Clone)]
152pub struct SparseOffsetsCache {
153    /// Small-vec fast path. Reserves [`SPARSE_OFFSETS_INLINE_CAP`] slots on
154    /// the first insert.
155    inline: Vec<(ColumnId, usize)>,
156    /// Overflow for columns beyond the inline capacity. Lazily allocated.
157    overflow: HashMap<ColumnId, usize>,
158    /// Next byte position in the pk to resume parsing from.
159    cursor: usize,
160    /// True once the decoder has walked past the last tag column (or stopped
161    /// on an unknown column id); no further offsets can be discovered.
162    finished: bool,
163}
164
165impl Default for SparseOffsetsCache {
166    fn default() -> Self {
167        Self::new()
168    }
169}
170
171impl SparseOffsetsCache {
172    pub fn new() -> Self {
173        Self {
174            inline: Vec::new(),
175            overflow: HashMap::new(),
176            cursor: TAGS_START_OFFSET,
177            finished: false,
178        }
179    }
180
181    pub fn clear(&mut self) {
182        self.inline.clear();
183        self.overflow.clear();
184        self.cursor = TAGS_START_OFFSET;
185        self.finished = false;
186    }
187
188    /// Returns the cached offset for `column_id`, if any.
189    fn get(&self, column_id: ColumnId) -> Option<usize> {
190        for entry in &self.inline {
191            if entry.0 == column_id {
192                return Some(entry.1);
193            }
194        }
195        self.overflow.get(&column_id).copied()
196    }
197
198    /// Records a new `(column_id, offset)` entry.
199    fn insert(&mut self, column_id: ColumnId, offset: usize) {
200        if self.inline.len() < SPARSE_OFFSETS_INLINE_CAP {
201            if self.inline.capacity() == 0 {
202                self.inline.reserve_exact(SPARSE_OFFSETS_INLINE_CAP);
203            }
204            self.inline.push((column_id, offset));
205        } else {
206            self.overflow.insert(column_id, offset);
207        }
208    }
209
210    #[cfg(test)]
211    fn contains(&self, column_id: ColumnId) -> bool {
212        self.get(column_id).is_some()
213    }
214}
215
216impl SparsePrimaryKeyCodec {
217    /// Creates a new [`SparsePrimaryKeyCodec`] instance.
218    pub fn from_columns(columns_ids: impl Iterator<Item = ColumnId>) -> Self {
219        let columns = columns_ids.collect();
220        Self {
221            inner: Arc::new(SparsePrimaryKeyCodecInner {
222                table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
223                tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
224                label_field: SortField::new(ConcreteDataType::string_datatype()),
225                columns: Some(columns),
226            }),
227        }
228    }
229
230    /// Creates a new [`SparsePrimaryKeyCodec`] instance.
231    pub fn new(region_metadata: &RegionMetadataRef) -> Self {
232        Self::from_columns(region_metadata.primary_key_columns().map(|c| c.column_id))
233    }
234
235    /// Returns a new [`SparsePrimaryKeyCodec`] instance.
236    ///
237    /// It treats all unknown columns as primary key(label field).
238    pub fn schemaless() -> Self {
239        Self {
240            inner: Arc::new(SparsePrimaryKeyCodecInner {
241                table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
242                tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
243                label_field: SortField::new(ConcreteDataType::string_datatype()),
244                columns: None,
245            }),
246        }
247    }
248
249    /// Creates a new [`SparsePrimaryKeyCodec`] instance with additional label `fields`.
250    pub fn with_fields(fields: Vec<(ColumnId, SortField)>) -> Self {
251        Self {
252            inner: Arc::new(SparsePrimaryKeyCodecInner {
253                columns: Some(fields.iter().map(|f| f.0).collect()),
254                table_id_field: SortField::new(ConcreteDataType::uint32_datatype()),
255                tsid_field: SortField::new(ConcreteDataType::uint64_datatype()),
256                label_field: SortField::new(ConcreteDataType::string_datatype()),
257            }),
258        }
259    }
260
261    /// Returns the field of the given column id.
262    fn get_field(&self, column_id: ColumnId) -> Option<&SortField> {
263        // if the `columns` is not specified, all unknown columns is primary key(label field).
264        if let Some(columns) = &self.inner.columns
265            && !columns.contains(&column_id)
266        {
267            return None;
268        }
269
270        match column_id {
271            RESERVED_COLUMN_ID_TABLE_ID => Some(&self.inner.table_id_field),
272            RESERVED_COLUMN_ID_TSID => Some(&self.inner.tsid_field),
273            _ => Some(&self.inner.label_field),
274        }
275    }
276
277    /// Encodes the given bytes into a [`SparseValues`].
278    pub fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
279    where
280        I: Iterator<Item = (ColumnId, ValueRef<'a>)>,
281    {
282        let mut serializer = Serializer::new(buffer);
283        for (column_id, value) in row {
284            if value.is_null() {
285                continue;
286            }
287
288            if let Some(field) = self.get_field(column_id) {
289                column_id
290                    .serialize(&mut serializer)
291                    .context(SerializeFieldSnafu)?;
292                field.serialize(&mut serializer, &value)?;
293            } else {
294                // TODO(weny): handle the error.
295                common_telemetry::warn!("Column {} is not in primary key, skipping", column_id);
296            }
297        }
298        Ok(())
299    }
300
301    pub fn encode_raw_tag_value<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
302    where
303        I: Iterator<Item = (ColumnId, &'a [u8])>,
304    {
305        for (tag_column_id, tag_value) in row {
306            let value_len = tag_value.len();
307            buffer.reserve(6 + value_len / 8 * 9);
308            buffer.put_u32(tag_column_id);
309            buffer.put_u8(1);
310            buffer.put_u8(!tag_value.is_empty() as u8);
311
312            // Manual implementation of memcomparable::ser::Serializer::serialize_bytes
313            // to avoid byte-by-byte put.
314            let mut len = 0;
315            let num_chucks = value_len / 8;
316            let remainder = value_len % 8;
317
318            for idx in 0..num_chucks {
319                buffer.extend_from_slice(&tag_value[idx * 8..idx * 8 + 8]);
320                len += 8;
321                // append an extra byte that signals the number of significant bytes in this chunk
322                // 1-8: many bytes were significant and this group is the last group
323                // 9: all 8 bytes were significant and there is more data to come
324                let extra = if len == value_len { 8 } else { 9 };
325                buffer.put_u8(extra);
326            }
327
328            if remainder != 0 {
329                buffer.extend_from_slice(&tag_value[len..value_len]);
330                buffer.put_bytes(0, 8 - remainder);
331                buffer.put_u8(remainder as u8);
332            }
333        }
334        Ok(())
335    }
336
337    /// Encodes the given bytes into a [`SparseValues`].
338    pub fn encode_internal(&self, table_id: u32, tsid: u64, buffer: &mut Vec<u8>) -> Result<()> {
339        buffer.reserve_exact(22);
340        buffer.put_u32(RESERVED_COLUMN_ID_TABLE_ID);
341        buffer.put_u8(1);
342        buffer.put_u32(table_id);
343        buffer.put_u32(RESERVED_COLUMN_ID_TSID);
344        buffer.put_u8(1);
345        buffer.put_u64(tsid);
346        Ok(())
347    }
348
349    /// Decodes the given bytes into a [`SparseValues`].
350    fn decode_sparse(&self, bytes: &[u8]) -> Result<SparseValues> {
351        let mut deserializer = Deserializer::new(bytes);
352        let mut values = SparseValues::with_capacity(16);
353
354        let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
355        let value = self.inner.table_id_field.deserialize(&mut deserializer)?;
356        values.insert(column_id, value);
357
358        let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
359        let value = self.inner.tsid_field.deserialize(&mut deserializer)?;
360        values.insert(column_id, value);
361        while deserializer.has_remaining() {
362            let column_id = u32::deserialize(&mut deserializer).context(DeserializeFieldSnafu)?;
363            let value = self.inner.label_field.deserialize(&mut deserializer)?;
364            values.insert(column_id, value);
365        }
366
367        Ok(values)
368    }
369
370    /// Decodes the given bytes into a [`Value`].
371    fn decode_leftmost(&self, bytes: &[u8]) -> Result<Option<Value>> {
372        let mut deserializer = Deserializer::new(bytes);
373        // Skip the column id.
374        deserializer.advance(COLUMN_ID_ENCODE_SIZE);
375        let value = self.inner.table_id_field.deserialize(&mut deserializer)?;
376        Ok(Some(value))
377    }
378
379    /// Returns the offset of the given column id in the given primary key.
380    ///
381    /// The pk must start with the table_id + tsid prefix written by
382    /// `encode_internal`.
383    ///
384    /// # Panics
385    ///
386    /// Panics if `pk` is not a well-formed sparse primary key produced by
387    /// this codec (e.g. truncated or otherwise malformed bytes).
388    pub fn has_column(
389        &self,
390        pk: &[u8],
391        cache: &mut SparseOffsetsCache,
392        column_id: ColumnId,
393    ) -> Option<usize> {
394        // Decoding is lazy: on each call we only advance the cache's cursor as
395        // far as needed to answer the query. A column that has already been
396        // seen returns immediately; a column we haven't reached yet causes the
397        // parser to resume from `cache.cursor` and stop as soon as the column
398        // is located. Once the cursor walks off the end (or hits an unknown
399        // column id) the cache is marked finished, so subsequent misses are
400        // O(1).
401        // table_id and tsid are at fixed offsets.
402        match column_id {
403            RESERVED_COLUMN_ID_TABLE_ID => return Some(TABLE_ID_VALUE_OFFSET),
404            RESERVED_COLUMN_ID_TSID => return Some(TSID_VALUE_OFFSET),
405            _ => {}
406        }
407
408        if let Some(offset) = cache.get(column_id) {
409            return Some(offset);
410        }
411        if cache.finished {
412            return None;
413        }
414
415        let mut deserializer = Deserializer::new(pk);
416        deserializer.advance(cache.cursor);
417        let mut offset = cache.cursor;
418        while deserializer.has_remaining() {
419            let col = u32::deserialize(&mut deserializer).unwrap();
420            offset += COLUMN_ID_ENCODE_SIZE;
421            let value_offset = offset;
422            cache.insert(col, value_offset);
423            let Some(field) = self.get_field(col) else {
424                cache.finished = true;
425                cache.cursor = offset;
426                return None;
427            };
428
429            let skip = field.skip_deserialize(pk, &mut deserializer).unwrap();
430            offset += skip;
431            cache.cursor = offset;
432            if col == column_id {
433                return Some(value_offset);
434            }
435        }
436
437        cache.finished = true;
438        None
439    }
440
441    /// Decode value at `offset` in `pk`.
442    pub fn decode_value_at(&self, pk: &[u8], offset: usize, column_id: ColumnId) -> Result<Value> {
443        let mut deserializer = Deserializer::new(pk);
444        deserializer.advance(offset);
445        // Safety: checked by `has_column`
446        let field = self.get_field(column_id).unwrap();
447        field.deserialize(&mut deserializer)
448    }
449
450    /// Returns the encoded bytes of the given `column_id` in `pk`.
451    ///
452    /// Returns `Ok(None)` if the `column_id` is missing in `pk`.
453    pub fn encoded_value_for_column<'a>(
454        &self,
455        pk: &'a [u8],
456        cache: &mut SparseOffsetsCache,
457        column_id: ColumnId,
458    ) -> Result<Option<&'a [u8]>> {
459        let Some(offset) = self.has_column(pk, cache, column_id) else {
460            return Ok(None);
461        };
462
463        let Some(field) = self.get_field(column_id) else {
464            return Ok(None);
465        };
466
467        let mut deserializer = Deserializer::new(pk);
468        deserializer.advance(offset);
469        let len = field.skip_deserialize(pk, &mut deserializer)?;
470        Ok(Some(&pk[offset..offset + len]))
471    }
472}
473
474impl PrimaryKeyCodec for SparsePrimaryKeyCodec {
475    fn encode_key_value(&self, _key_value: &KeyValue, _buffer: &mut Vec<u8>) -> Result<()> {
476        UnsupportedOperationSnafu {
477            err_msg: "The encode_key_value method is not supported in SparsePrimaryKeyCodec.",
478        }
479        .fail()
480    }
481
482    fn encode_values(&self, values: &[(ColumnId, Value)], buffer: &mut Vec<u8>) -> Result<()> {
483        self.encode_to_vec(values.iter().map(|v| (v.0, v.1.as_value_ref())), buffer)
484    }
485
486    fn encode_value_refs(
487        &self,
488        values: &[(ColumnId, ValueRef)],
489        buffer: &mut Vec<u8>,
490    ) -> Result<()> {
491        self.encode_to_vec(values.iter().map(|v| (v.0, v.1.clone())), buffer)
492    }
493
494    fn estimated_size(&self) -> Option<usize> {
495        None
496    }
497
498    fn num_fields(&self) -> Option<usize> {
499        None
500    }
501
502    fn encoding(&self) -> PrimaryKeyEncoding {
503        PrimaryKeyEncoding::Sparse
504    }
505
506    fn primary_key_filter(
507        &self,
508        metadata: &RegionMetadataRef,
509        filters: Arc<Vec<SimpleFilterEvaluator>>,
510    ) -> Box<dyn PrimaryKeyFilter> {
511        Box::new(SparsePrimaryKeyFilter::new(
512            metadata.clone(),
513            filters,
514            self.clone(),
515        ))
516    }
517
518    fn decode(&self, bytes: &[u8]) -> Result<CompositeValues> {
519        Ok(CompositeValues::Sparse(self.decode_sparse(bytes)?))
520    }
521
522    fn decode_leftmost(&self, bytes: &[u8]) -> Result<Option<Value>> {
523        self.decode_leftmost(bytes)
524    }
525}
526
527/// Field with column id.
528pub struct FieldWithId {
529    pub field: SortField,
530    pub column_id: ColumnId,
531}
532
533/// A special encoder for memtable.
534pub struct SparseEncoder {
535    fields: Vec<FieldWithId>,
536}
537
538impl SparseEncoder {
539    pub fn new(fields: Vec<FieldWithId>) -> Self {
540        Self { fields }
541    }
542
543    pub fn encode_to_vec<'a, I>(&self, row: I, buffer: &mut Vec<u8>) -> Result<()>
544    where
545        I: Iterator<Item = ValueRef<'a>>,
546    {
547        let mut serializer = Serializer::new(buffer);
548        for (value, field) in row.zip(self.fields.iter()) {
549            if !value.is_null() {
550                field
551                    .column_id
552                    .serialize(&mut serializer)
553                    .context(SerializeFieldSnafu)?;
554                field.field.serialize(&mut serializer, &value)?;
555            }
556        }
557        Ok(())
558    }
559}
560
561#[cfg(test)]
562mod tests {
563    use std::sync::Arc;
564
565    use api::v1::SemanticType;
566    use common_query::prelude::{greptime_timestamp, greptime_value};
567    use common_time::Timestamp;
568    use common_time::timestamp::TimeUnit;
569    use datatypes::schema::ColumnSchema;
570    use datatypes::value::{OrderedFloat, Value};
571    use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
572    use store_api::metric_engine_consts::{
573        DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME,
574    };
575    use store_api::storage::{ColumnId, RegionId};
576
577    use super::*;
578
579    fn test_region_metadata() -> RegionMetadataRef {
580        let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
581        builder
582            .push_column_metadata(ColumnMetadata {
583                column_schema: ColumnSchema::new(
584                    DATA_SCHEMA_TABLE_ID_COLUMN_NAME,
585                    ConcreteDataType::uint32_datatype(),
586                    false,
587                ),
588                semantic_type: SemanticType::Tag,
589                column_id: ReservedColumnId::table_id(),
590            })
591            .push_column_metadata(ColumnMetadata {
592                column_schema: ColumnSchema::new(
593                    DATA_SCHEMA_TSID_COLUMN_NAME,
594                    ConcreteDataType::uint64_datatype(),
595                    false,
596                ),
597                semantic_type: SemanticType::Tag,
598                column_id: ReservedColumnId::tsid(),
599            })
600            .push_column_metadata(ColumnMetadata {
601                column_schema: ColumnSchema::new("pod", ConcreteDataType::string_datatype(), true),
602                semantic_type: SemanticType::Tag,
603                column_id: 1,
604            })
605            .push_column_metadata(ColumnMetadata {
606                column_schema: ColumnSchema::new(
607                    "namespace",
608                    ConcreteDataType::string_datatype(),
609                    true,
610                ),
611                semantic_type: SemanticType::Tag,
612                column_id: 2,
613            })
614            .push_column_metadata(ColumnMetadata {
615                column_schema: ColumnSchema::new(
616                    "container",
617                    ConcreteDataType::string_datatype(),
618                    true,
619                ),
620                semantic_type: SemanticType::Tag,
621                column_id: 3,
622            })
623            .push_column_metadata(ColumnMetadata {
624                column_schema: ColumnSchema::new(
625                    "pod_name",
626                    ConcreteDataType::string_datatype(),
627                    true,
628                ),
629                semantic_type: SemanticType::Tag,
630                column_id: 4,
631            })
632            .push_column_metadata(ColumnMetadata {
633                column_schema: ColumnSchema::new(
634                    "pod_ip",
635                    ConcreteDataType::string_datatype(),
636                    true,
637                ),
638                semantic_type: SemanticType::Tag,
639                column_id: 5,
640            })
641            .push_column_metadata(ColumnMetadata {
642                column_schema: ColumnSchema::new(
643                    greptime_value(),
644                    ConcreteDataType::float64_datatype(),
645                    false,
646                ),
647                semantic_type: SemanticType::Field,
648                column_id: 6,
649            })
650            .push_column_metadata(ColumnMetadata {
651                column_schema: ColumnSchema::new(
652                    greptime_timestamp(),
653                    ConcreteDataType::timestamp_nanosecond_datatype(),
654                    false,
655                ),
656                semantic_type: SemanticType::Timestamp,
657                column_id: 7,
658            })
659            .primary_key(vec![
660                ReservedColumnId::table_id(),
661                ReservedColumnId::tsid(),
662                1,
663                2,
664                3,
665                4,
666                5,
667            ]);
668        let metadata = builder.build().unwrap();
669        Arc::new(metadata)
670    }
671
672    #[test]
673    fn test_sparse_value_new_and_get_or_null() {
674        let mut sparse_value = SparseValues::new();
675        sparse_value.insert(1, Value::Int32(42));
676
677        assert_eq!(sparse_value.get_or_null(1), &Value::Int32(42));
678        assert_eq!(sparse_value.get_or_null(2), &Value::Null);
679    }
680
681    #[test]
682    fn test_sparse_value_insert() {
683        let mut sparse_value = SparseValues::new();
684        sparse_value.insert(1, Value::Int32(42));
685
686        assert_eq!(sparse_value.get_or_null(1), &Value::Int32(42));
687    }
688
689    fn test_row() -> Vec<(ColumnId, ValueRef<'static>)> {
690        vec![
691            (RESERVED_COLUMN_ID_TABLE_ID, ValueRef::UInt32(42)),
692            (
693                RESERVED_COLUMN_ID_TSID,
694                ValueRef::UInt64(123843349035232323),
695            ),
696            // label: pod
697            (1, ValueRef::String("greptime-frontend-6989d9899-22222")),
698            // label: namespace
699            (2, ValueRef::String("greptime-cluster")),
700            // label: container
701            (3, ValueRef::String("greptime-frontend-6989d9899-22222")),
702            // label: pod_name
703            (4, ValueRef::String("greptime-frontend-6989d9899-22222")),
704            // label: pod_ip
705            (5, ValueRef::String("10.10.10.10")),
706            // field: greptime_value
707            (6, ValueRef::Float64(OrderedFloat(1.0))),
708            // field: greptime_timestamp
709            (
710                7,
711                ValueRef::Timestamp(Timestamp::new(1618876800000000000, TimeUnit::Nanosecond)),
712            ),
713        ]
714    }
715
716    #[test]
717    fn test_encode_by_short_cuts() {
718        let region_metadata = test_region_metadata();
719        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
720        let mut buffer = Vec::new();
721        let internal_columns = [
722            (RESERVED_COLUMN_ID_TABLE_ID, ValueRef::UInt32(1024)),
723            (RESERVED_COLUMN_ID_TSID, ValueRef::UInt64(42)),
724        ];
725        let tags = [
726            (1, "greptime-frontend-6989d9899-22222"),
727            (2, "greptime-cluster"),
728            (3, "greptime-frontend-6989d9899-22222"),
729            (4, "greptime-frontend-6989d9899-22222"),
730            (5, "10.10.10.10"),
731        ];
732        codec
733            .encode_to_vec(internal_columns.into_iter(), &mut buffer)
734            .unwrap();
735        codec
736            .encode_to_vec(
737                tags.iter()
738                    .map(|(col_id, tag_value)| (*col_id, ValueRef::String(tag_value))),
739                &mut buffer,
740            )
741            .unwrap();
742
743        let mut buffer_by_raw_encoding = Vec::new();
744        codec
745            .encode_internal(1024, 42, &mut buffer_by_raw_encoding)
746            .unwrap();
747        let tags: Vec<_> = tags
748            .into_iter()
749            .map(|(col_id, tag_value)| (col_id, tag_value.as_bytes()))
750            .collect();
751        codec
752            .encode_raw_tag_value(
753                tags.iter().map(|(c, b)| (*c, *b)),
754                &mut buffer_by_raw_encoding,
755            )
756            .unwrap();
757        assert_eq!(buffer, buffer_by_raw_encoding);
758    }
759
760    #[test]
761    fn test_encode_to_vec() {
762        let region_metadata = test_region_metadata();
763        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
764        let mut buffer = Vec::new();
765
766        let row = test_row();
767        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
768        assert!(!buffer.is_empty());
769        let sparse_value = codec.decode_sparse(&buffer).unwrap();
770        assert_eq!(
771            sparse_value.get_or_null(RESERVED_COLUMN_ID_TABLE_ID),
772            &Value::UInt32(42)
773        );
774        assert_eq!(
775            sparse_value.get_or_null(1),
776            &Value::String("greptime-frontend-6989d9899-22222".into())
777        );
778        assert_eq!(
779            sparse_value.get_or_null(2),
780            &Value::String("greptime-cluster".into())
781        );
782        assert_eq!(
783            sparse_value.get_or_null(3),
784            &Value::String("greptime-frontend-6989d9899-22222".into())
785        );
786        assert_eq!(
787            sparse_value.get_or_null(4),
788            &Value::String("greptime-frontend-6989d9899-22222".into())
789        );
790        assert_eq!(
791            sparse_value.get_or_null(5),
792            &Value::String("10.10.10.10".into())
793        );
794    }
795
796    #[test]
797    fn test_decode_leftmost() {
798        let region_metadata = test_region_metadata();
799        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
800        let mut buffer = Vec::new();
801        let row = test_row();
802        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
803        assert!(!buffer.is_empty());
804        let result = codec.decode_leftmost(&buffer).unwrap().unwrap();
805        assert_eq!(result, Value::UInt32(42));
806    }
807
808    #[test]
809    fn test_has_column() {
810        let region_metadata = test_region_metadata();
811        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
812        let mut buffer = Vec::new();
813        let row = test_row();
814        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
815        assert!(!buffer.is_empty());
816
817        let mut offsets_map = SparseOffsetsCache::new();
818        for column_id in [
819            RESERVED_COLUMN_ID_TABLE_ID,
820            RESERVED_COLUMN_ID_TSID,
821            1,
822            2,
823            3,
824            4,
825            5,
826        ] {
827            let offset = codec.has_column(&buffer, &mut offsets_map, column_id);
828            assert!(offset.is_some());
829        }
830
831        let offset = codec.has_column(&buffer, &mut offsets_map, 6);
832        assert!(offset.is_none());
833    }
834
835    #[test]
836    fn test_has_column_lazy_resume() {
837        let region_metadata = test_region_metadata();
838        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
839        let mut buffer = Vec::new();
840        codec
841            .encode_to_vec(test_row().into_iter(), &mut buffer)
842            .unwrap();
843
844        let mut cache = SparseOffsetsCache::new();
845        // Look up an early column: only a prefix of tags is decoded.
846        assert!(codec.has_column(&buffer, &mut cache, 1).is_some());
847        assert!(!cache.finished);
848        assert!(cache.contains(1));
849        assert!(!cache.contains(5));
850
851        // A later column resumes from the cursor.
852        assert!(codec.has_column(&buffer, &mut cache, 5).is_some());
853        assert!(cache.contains(5));
854
855        // An earlier column that was already cached still resolves.
856        assert!(codec.has_column(&buffer, &mut cache, 2).is_some());
857
858        // A non-existent column walks off the end and marks the cache finished.
859        assert!(codec.has_column(&buffer, &mut cache, 999).is_none());
860        assert!(cache.finished);
861        // Further misses are O(1).
862        assert!(codec.has_column(&buffer, &mut cache, 998).is_none());
863    }
864
865    #[test]
866    fn test_decode_value_at() {
867        let region_metadata = test_region_metadata();
868        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
869        let mut buffer = Vec::new();
870        let row = test_row();
871        codec.encode_to_vec(row.into_iter(), &mut buffer).unwrap();
872        assert!(!buffer.is_empty());
873
874        let row = test_row();
875        let mut offsets_map = SparseOffsetsCache::new();
876        for column_id in [
877            RESERVED_COLUMN_ID_TABLE_ID,
878            RESERVED_COLUMN_ID_TSID,
879            1,
880            2,
881            3,
882            4,
883            5,
884        ] {
885            let offset = codec
886                .has_column(&buffer, &mut offsets_map, column_id)
887                .unwrap();
888            let value = codec.decode_value_at(&buffer, offset, column_id).unwrap();
889            let expected_value = row
890                .iter()
891                .find(|(id, _)| *id == column_id)
892                .unwrap()
893                .1
894                .clone();
895            assert_eq!(value.as_value_ref(), expected_value);
896        }
897    }
898
899    #[test]
900    fn test_encoded_value_for_column() {
901        let region_metadata = test_region_metadata();
902        let codec = SparsePrimaryKeyCodec::new(&region_metadata);
903        let mut buffer = Vec::new();
904        let row = test_row();
905        codec
906            .encode_to_vec(row.clone().into_iter(), &mut buffer)
907            .unwrap();
908        assert!(!buffer.is_empty());
909
910        let mut offsets_map = SparseOffsetsCache::new();
911        for column_id in [
912            RESERVED_COLUMN_ID_TABLE_ID,
913            RESERVED_COLUMN_ID_TSID,
914            1,
915            2,
916            3,
917            4,
918            5,
919        ] {
920            let encoded_value = codec
921                .encoded_value_for_column(&buffer, &mut offsets_map, column_id)
922                .unwrap()
923                .unwrap();
924            let expected_value = row
925                .iter()
926                .find(|(id, _)| *id == column_id)
927                .unwrap()
928                .1
929                .clone();
930            let data_type = match column_id {
931                RESERVED_COLUMN_ID_TABLE_ID => ConcreteDataType::uint32_datatype(),
932                RESERVED_COLUMN_ID_TSID => ConcreteDataType::uint64_datatype(),
933                _ => ConcreteDataType::string_datatype(),
934            };
935            let field = SortField::new(data_type);
936            let mut expected_encoded = Vec::new();
937            let mut serializer = Serializer::new(&mut expected_encoded);
938            field.serialize(&mut serializer, &expected_value).unwrap();
939            assert_eq!(encoded_value, expected_encoded.as_slice());
940        }
941
942        for column_id in [6_u32, 7_u32, 999_u32] {
943            let encoded_value = codec
944                .encoded_value_for_column(&buffer, &mut offsets_map, column_id)
945                .unwrap();
946            assert!(encoded_value.is_none());
947        }
948    }
949}