1use std::collections::{BTreeMap, HashMap};
16use std::hash::Hasher;
17
18use api::v1::value::ValueData;
19use api::v1::{ColumnDataType, ColumnSchema, Row, Rows, SemanticType, Value};
20use datatypes::value::ValueRef;
21use fxhash::FxHasher;
22use mito_codec::row_converter::SparsePrimaryKeyCodec;
23use smallvec::SmallVec;
24use snafu::ResultExt;
25use store_api::codec::PrimaryKeyEncoding;
26use store_api::metadata::ColumnMetadata;
27use store_api::metric_engine_consts::{
28 DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME,
29};
30use store_api::storage::consts::{PRIMARY_KEY_COLUMN_NAME, ReservedColumnId};
31use store_api::storage::{ColumnId, TableId};
32
33use crate::error::{EncodePrimaryKeySnafu, Result, TableIdCountMismatchSnafu};
34
35pub struct RowModifier {
43 codec: SparsePrimaryKeyCodec,
44}
45
46#[derive(Clone, Copy)]
48pub(crate) enum TableIdInput<'a> {
49 Single(TableId),
50 Batch(&'a [TableId]),
51}
52
53impl<'a> TableIdInput<'a> {
54 fn table_id_for_row(&self, row_idx: usize) -> TableId {
55 match self {
56 TableIdInput::Single(table_id) => *table_id,
57 TableIdInput::Batch(table_ids) => table_ids[row_idx],
58 }
59 }
60}
61
62impl Default for RowModifier {
63 fn default() -> Self {
64 Self {
65 codec: SparsePrimaryKeyCodec::schemaless(),
66 }
67 }
68}
69
70impl RowModifier {
71 pub(crate) fn modify_rows(
73 &self,
74 iter: RowsIter,
75 table_ids: TableIdInput<'_>,
76 encoding: PrimaryKeyEncoding,
77 ) -> Result<Rows> {
78 let row_count = iter.rows.rows.len();
79 Self::validate_table_id_count(table_ids, row_count)?;
80 match encoding {
81 PrimaryKeyEncoding::Sparse => self.modify_rows_sparse(iter, table_ids),
82 PrimaryKeyEncoding::Dense => self.modify_rows_dense(iter, table_ids),
83 }
84 }
85
86 fn modify_rows_sparse(&self, mut iter: RowsIter, table_ids: TableIdInput<'_>) -> Result<Rows> {
89 let num_column = iter.rows.schema.len();
90 let num_primary_key_column = iter.index.num_primary_key_column;
91 let num_output_column = num_column - num_primary_key_column + 1;
93
94 let mut buffer = vec![];
95
96 for (row_index, mut row_iter) in iter.iter_mut().enumerate() {
97 let table_id = table_ids.table_id_for_row(row_index);
98 let (table_id_value, tsid) = Self::fill_internal_columns(table_id, &row_iter);
99 let mut values = Vec::with_capacity(num_output_column);
100 buffer.clear();
101 let internal_columns = [
102 (
103 ReservedColumnId::table_id(),
104 api::helper::pb_value_to_value_ref(&table_id_value, None),
105 ),
106 (
107 ReservedColumnId::tsid(),
108 api::helper::pb_value_to_value_ref(&tsid, None),
109 ),
110 ];
111 self.codec
112 .encode_to_vec(internal_columns.into_iter(), &mut buffer)
113 .context(EncodePrimaryKeySnafu)?;
114 self.codec
115 .encode_to_vec(row_iter.primary_keys(), &mut buffer)
116 .context(EncodePrimaryKeySnafu)?;
117
118 values.push(ValueData::BinaryValue(buffer.clone()).into());
119 values.extend(row_iter.remaining());
120 *row_iter.row = Row { values };
122 }
123
124 let mut schema = Vec::with_capacity(num_output_column);
126 schema.push(ColumnSchema {
127 column_name: PRIMARY_KEY_COLUMN_NAME.to_string(),
128 datatype: ColumnDataType::Binary as i32,
129 semantic_type: SemanticType::Tag as _,
130 datatype_extension: None,
131 options: None,
132 });
133 schema.extend(iter.remaining_columns());
134 iter.rows.schema = schema;
135
136 Ok(iter.rows)
137 }
138
139 fn modify_rows_dense(&self, mut iter: RowsIter, table_ids: TableIdInput<'_>) -> Result<Rows> {
142 iter.rows.schema.push(ColumnSchema {
144 column_name: DATA_SCHEMA_TABLE_ID_COLUMN_NAME.to_string(),
145 datatype: ColumnDataType::Uint32 as i32,
146 semantic_type: SemanticType::Tag as _,
147 datatype_extension: None,
148 options: None,
149 });
150 iter.rows.schema.push(ColumnSchema {
152 column_name: DATA_SCHEMA_TSID_COLUMN_NAME.to_string(),
153 datatype: ColumnDataType::Uint64 as i32,
154 semantic_type: SemanticType::Tag as _,
155 datatype_extension: None,
156 options: None,
157 });
158 for (row_index, row_iter) in iter.iter_mut().enumerate() {
159 let table_id = table_ids.table_id_for_row(row_index);
160 let (table_id_value, tsid) = Self::fill_internal_columns(table_id, &row_iter);
161 row_iter.row.values.push(table_id_value);
162 row_iter.row.values.push(tsid);
163 }
164
165 Ok(iter.rows)
166 }
167
168 fn validate_table_id_count(table_ids: TableIdInput<'_>, row_count: usize) -> Result<()> {
169 if let TableIdInput::Batch(table_ids) = table_ids
170 && table_ids.len() != row_count
171 {
172 return TableIdCountMismatchSnafu {
173 expected: row_count,
174 actual: table_ids.len(),
175 }
176 .fail();
177 }
178 Ok(())
179 }
180
181 pub fn fill_internal_columns(table_id: TableId, iter: &RowIter<'_>) -> (Value, Value) {
183 let ts_id = if !iter.has_null_labels() {
184 let mut ts_id_gen = TsidGenerator::new(iter.index.label_name_hash);
186 for (_, value) in iter.primary_keys_with_name() {
187 if let Some(ValueData::StringValue(string)) = &value.value_data {
189 ts_id_gen.write_str(string);
190 } else {
191 unreachable!(
192 "Should not contain null or non-string value: {:?}, table id: {}",
193 value, table_id
194 );
195 }
196 }
197 ts_id_gen.finish()
198 } else {
199 let mut hasher = TsidGenerator::default();
201 for (name, value) in iter.primary_keys_with_name() {
203 if let Some(ValueData::StringValue(_)) = &value.value_data {
205 hasher.write_str(name);
206 }
207 }
208 let label_name_hash = hasher.finish();
209
210 let mut final_hasher = TsidGenerator::new(label_name_hash);
212 for (_, value) in iter.primary_keys_with_name() {
213 if let Some(ValueData::StringValue(value)) = &value.value_data {
214 final_hasher.write_str(value);
215 }
216 }
217 final_hasher.finish()
218 };
219
220 (
221 ValueData::U32Value(table_id).into(),
222 ValueData::U64Value(ts_id).into(),
223 )
224 }
225}
226
227#[derive(Default)]
229pub struct TsidGenerator {
230 hasher: FxHasher,
231}
232
233impl TsidGenerator {
234 pub fn new(label_name_hash: u64) -> Self {
235 let mut hasher = FxHasher::default();
236 hasher.write_u64(label_name_hash);
237 Self { hasher }
238 }
239
240 pub fn write_str(&mut self, value: &str) {
242 self.hasher.write(value.as_bytes());
243 self.hasher.write_u8(0xff);
244 }
245
246 pub fn finish(&mut self) -> u64 {
248 self.hasher.finish()
249 }
250}
251
252#[derive(Debug, Clone, Copy)]
254struct ValueIndex {
255 column_id: ColumnId,
256 index: usize,
257}
258
259struct IterIndex {
261 indices: Vec<ValueIndex>,
262 num_primary_key_column: usize,
263 label_name_hash: u64,
265}
266
267impl IterIndex {
268 fn new(
269 row_schema: &[ColumnSchema],
270 physical_columns: &HashMap<String, ColumnMetadata>,
271 ) -> Self {
272 let mut reserved_indices = SmallVec::<[ValueIndex; 2]>::new();
273 let mut primary_key_indices = BTreeMap::new();
275 let mut field_indices = SmallVec::<[ValueIndex; 1]>::new();
276 let mut ts_index = None;
277 for (idx, col) in row_schema.iter().enumerate() {
278 match col.semantic_type() {
279 SemanticType::Tag => match col.column_name.as_str() {
280 DATA_SCHEMA_TABLE_ID_COLUMN_NAME => {
281 reserved_indices.push(ValueIndex {
282 column_id: ReservedColumnId::table_id(),
283 index: idx,
284 });
285 }
286 DATA_SCHEMA_TSID_COLUMN_NAME => {
287 reserved_indices.push(ValueIndex {
288 column_id: ReservedColumnId::tsid(),
289 index: idx,
290 });
291 }
292 _ => {
293 primary_key_indices.insert(
295 col.column_name.as_str(),
296 ValueIndex {
297 column_id: physical_columns
298 .get(&col.column_name)
299 .unwrap()
300 .column_id,
301 index: idx,
302 },
303 );
304 }
305 },
306 SemanticType::Field => {
307 field_indices.push(ValueIndex {
308 column_id: physical_columns.get(&col.column_name).unwrap().column_id,
309 index: idx,
310 });
311 }
312 SemanticType::Timestamp => {
313 ts_index = Some(ValueIndex {
314 column_id: physical_columns.get(&col.column_name).unwrap().column_id,
315 index: idx,
316 });
317 }
318 }
319 }
320 let num_primary_key_column = primary_key_indices.len() + reserved_indices.len();
321 let mut indices = Vec::with_capacity(num_primary_key_column + 2);
322 indices.extend(reserved_indices);
323 let mut label_name_hasher = TsidGenerator::default();
324 for (pk_name, pk_index) in primary_key_indices {
325 label_name_hasher.write_str(pk_name);
327 indices.push(pk_index);
328 }
329 let label_name_hash = label_name_hasher.finish();
330
331 indices.extend(ts_index);
332 indices.extend(field_indices);
333 IterIndex {
334 indices,
335 num_primary_key_column,
336 label_name_hash,
337 }
338 }
339}
340
341pub struct RowsIter {
343 rows: Rows,
344 index: IterIndex,
345}
346
347impl RowsIter {
348 pub fn new(rows: Rows, physical_columns: &HashMap<String, ColumnMetadata>) -> Self {
349 let index: IterIndex = IterIndex::new(&rows.schema, physical_columns);
350 Self { rows, index }
351 }
352
353 pub fn iter_mut(&mut self) -> impl Iterator<Item = RowIter<'_>> {
355 self.rows.rows.iter_mut().map(|row| RowIter {
356 row,
357 index: &self.index,
358 schema: &self.rows.schema,
359 })
360 }
361
362 fn remaining_columns(&mut self) -> impl Iterator<Item = ColumnSchema> + '_ {
364 self.index.indices[self.index.num_primary_key_column..]
365 .iter()
366 .map(|idx| std::mem::take(&mut self.rows.schema[idx.index]))
367 }
368}
369
370pub struct RowIter<'a> {
372 row: &'a mut Row,
373 index: &'a IterIndex,
374 schema: &'a Vec<ColumnSchema>,
375}
376
377impl RowIter<'_> {
378 fn primary_keys_with_name(&self) -> impl Iterator<Item = (&String, &Value)> {
380 self.index.indices[..self.index.num_primary_key_column]
381 .iter()
382 .map(|idx| {
383 (
384 &self.schema[idx.index].column_name,
385 &self.row.values[idx.index],
386 )
387 })
388 }
389
390 fn has_null_labels(&self) -> bool {
392 self.index.indices[..self.index.num_primary_key_column]
393 .iter()
394 .any(|idx| self.row.values[idx.index].value_data.is_none())
395 }
396
397 pub fn primary_keys(&self) -> impl Iterator<Item = (ColumnId, ValueRef<'_>)> {
399 self.index.indices[..self.index.num_primary_key_column]
400 .iter()
401 .map(|idx| {
402 (
403 idx.column_id,
404 api::helper::pb_value_to_value_ref(
405 &self.row.values[idx.index],
406 self.schema[idx.index].datatype_extension.as_ref(),
407 ),
408 )
409 })
410 }
411
412 fn remaining(&mut self) -> impl Iterator<Item = Value> + '_ {
414 self.index.indices[self.index.num_primary_key_column..]
415 .iter()
416 .map(|idx| std::mem::take(&mut self.row.values[idx.index]))
417 }
418
419 pub fn value_at(&self, idx: usize) -> &Value {
423 &self.row.values[idx]
424 }
425}
426
427#[cfg(test)]
428mod tests {
429 use std::collections::HashMap;
430
431 use api::v1::{Row, Rows};
432 use store_api::codec::PrimaryKeyEncoding;
433
434 use super::*;
435 use crate::error::Error;
436
437 fn test_schema() -> Vec<ColumnSchema> {
438 vec![
439 ColumnSchema {
440 column_name: "namespace".to_string(),
441 datatype: ColumnDataType::String as i32,
442 semantic_type: SemanticType::Tag as _,
443 datatype_extension: None,
444 options: None,
445 },
446 ColumnSchema {
447 column_name: "host".to_string(),
448 datatype: ColumnDataType::String as i32,
449 semantic_type: SemanticType::Tag as _,
450 datatype_extension: None,
451 options: None,
452 },
453 ]
454 }
455
456 fn test_row(v1: &str, v2: &str) -> Row {
457 Row {
458 values: vec![
459 ValueData::StringValue(v1.to_string()).into(),
460 ValueData::StringValue(v2.to_string()).into(),
461 ],
462 }
463 }
464
465 fn make_info(name: &str, column_id: ColumnId) -> ColumnMetadata {
466 ColumnMetadata {
467 column_schema: datatypes::schema::ColumnSchema::new(
468 name.to_string(),
469 datatypes::prelude::ConcreteDataType::string_datatype(),
470 false,
471 ),
472 semantic_type: SemanticType::Tag,
473 column_id,
474 }
475 }
476
477 fn test_name_to_column_id() -> HashMap<String, ColumnMetadata> {
478 HashMap::from([
479 ("namespace".to_string(), make_info("namespace", 1)),
480 ("host".to_string(), make_info("host", 2)),
481 ])
482 }
483
484 #[test]
485 fn test_encode_sparse() {
486 let name_to_column_id = test_name_to_column_id();
487 let encoder = RowModifier::default();
488 let table_id = 1025;
489 let schema = test_schema();
490 let row = test_row("greptimedb", "127.0.0.1");
491 let rows = Rows {
492 schema,
493 rows: vec![row],
494 };
495 let rows_iter = RowsIter::new(rows, &name_to_column_id);
496 let result = encoder
497 .modify_rows(
498 rows_iter,
499 TableIdInput::Single(table_id),
500 PrimaryKeyEncoding::Sparse,
501 )
502 .unwrap();
503 assert_eq!(result.rows[0].values.len(), 1);
504 let encoded_primary_key = vec![
505 128, 0, 0, 4, 1, 0, 0, 4, 1, 128, 0, 0, 3, 1, 37, 196, 242, 181, 117, 224, 7, 137, 0,
506 0, 0, 2, 1, 1, 49, 50, 55, 46, 48, 46, 48, 46, 9, 49, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
507 1, 1, 1, 103, 114, 101, 112, 116, 105, 109, 101, 9, 100, 98, 0, 0, 0, 0, 0, 0, 2,
508 ];
509 assert_eq!(
510 result.rows[0].values[0],
511 ValueData::BinaryValue(encoded_primary_key).into()
512 );
513 assert_eq!(result.schema, expected_sparse_schema());
514 }
515
516 fn expected_sparse_schema() -> Vec<ColumnSchema> {
517 vec![ColumnSchema {
518 column_name: PRIMARY_KEY_COLUMN_NAME.to_string(),
519 datatype: ColumnDataType::Binary as i32,
520 semantic_type: SemanticType::Tag as _,
521 datatype_extension: None,
522 options: None,
523 }]
524 }
525
526 fn expected_dense_schema() -> Vec<ColumnSchema> {
527 vec![
528 ColumnSchema {
529 column_name: "namespace".to_string(),
530 datatype: ColumnDataType::String as i32,
531 semantic_type: SemanticType::Tag as _,
532 datatype_extension: None,
533 options: None,
534 },
535 ColumnSchema {
536 column_name: "host".to_string(),
537 datatype: ColumnDataType::String as i32,
538 semantic_type: SemanticType::Tag as _,
539 datatype_extension: None,
540 options: None,
541 },
542 ColumnSchema {
543 column_name: DATA_SCHEMA_TABLE_ID_COLUMN_NAME.to_string(),
544 datatype: ColumnDataType::Uint32 as i32,
545 semantic_type: SemanticType::Tag as _,
546 datatype_extension: None,
547 options: None,
548 },
549 ColumnSchema {
550 column_name: DATA_SCHEMA_TSID_COLUMN_NAME.to_string(),
551 datatype: ColumnDataType::Uint64 as i32,
552 semantic_type: SemanticType::Tag as _,
553 datatype_extension: None,
554 options: None,
555 },
556 ]
557 }
558
559 #[test]
560 fn test_encode_dense() {
561 let name_to_column_id = test_name_to_column_id();
562 let encoder = RowModifier::default();
563 let table_id = 1025;
564 let schema = test_schema();
565 let row = test_row("greptimedb", "127.0.0.1");
566 let rows = Rows {
567 schema,
568 rows: vec![row],
569 };
570 let rows_iter = RowsIter::new(rows, &name_to_column_id);
571 let result = encoder
572 .modify_rows(
573 rows_iter,
574 TableIdInput::Single(table_id),
575 PrimaryKeyEncoding::Dense,
576 )
577 .unwrap();
578 assert_eq!(
579 result.rows[0].values[0],
580 ValueData::StringValue("greptimedb".to_string()).into()
581 );
582 assert_eq!(
583 result.rows[0].values[1],
584 ValueData::StringValue("127.0.0.1".to_string()).into()
585 );
586 assert_eq!(result.rows[0].values[2], ValueData::U32Value(1025).into());
587 assert_eq!(
588 result.rows[0].values[3],
589 ValueData::U64Value(2721566936019240841).into()
590 );
591 assert_eq!(result.schema, expected_dense_schema());
592 }
593
594 #[test]
595 fn test_table_id_count_mismatch() {
596 let name_to_column_id = test_name_to_column_id();
597 let encoder = RowModifier::default();
598 let schema = test_schema();
599 let rows = Rows {
600 schema,
601 rows: vec![test_row("a", "b"), test_row("c", "d")],
602 };
603 let rows_iter = RowsIter::new(rows, &name_to_column_id);
604 let table_ids = [1025];
605 let err = encoder
606 .modify_rows(
607 rows_iter,
608 TableIdInput::Batch(&table_ids),
609 PrimaryKeyEncoding::Dense,
610 )
611 .unwrap_err();
612 assert!(matches!(
613 err,
614 Error::TableIdCountMismatch {
615 expected: 2,
616 actual: 1,
617 ..
618 }
619 ));
620 }
621
622 #[test]
623 fn test_fill_internal_columns() {
624 let name_to_column_id = test_name_to_column_id();
625 let table_id = 1025;
626 let schema = test_schema();
627 let row = test_row("greptimedb", "127.0.0.1");
628 let rows = Rows {
629 schema,
630 rows: vec![row],
631 };
632 let mut rows_iter = RowsIter::new(rows, &name_to_column_id);
633 let row_iter = rows_iter.iter_mut().next().unwrap();
634 let (encoded_table_id, tsid) = RowModifier::fill_internal_columns(table_id, &row_iter);
635 assert_eq!(encoded_table_id, ValueData::U32Value(1025).into());
636 assert_eq!(tsid, ValueData::U64Value(2721566936019240841).into());
637
638 let schema = vec![
640 ColumnSchema {
641 column_name: "host".to_string(),
642 datatype: ColumnDataType::String as i32,
643 semantic_type: SemanticType::Tag as _,
644 datatype_extension: None,
645 options: None,
646 },
647 ColumnSchema {
648 column_name: "namespace".to_string(),
649 datatype: ColumnDataType::String as i32,
650 semantic_type: SemanticType::Tag as _,
651 datatype_extension: None,
652 options: None,
653 },
654 ];
655 let row = test_row("127.0.0.1", "greptimedb");
656 let rows = Rows {
657 schema,
658 rows: vec![row],
659 };
660 let mut rows_iter = RowsIter::new(rows, &name_to_column_id);
661 let row_iter = rows_iter.iter_mut().next().unwrap();
662 let (encoded_table_id, tsid) = RowModifier::fill_internal_columns(table_id, &row_iter);
663 assert_eq!(encoded_table_id, ValueData::U32Value(1025).into());
664 assert_eq!(tsid, ValueData::U64Value(2721566936019240841).into());
665 }
666
667 fn create_multi_label_schema(labels: &[&str]) -> Vec<ColumnSchema> {
669 labels
670 .iter()
671 .map(|name| ColumnSchema {
672 column_name: name.to_string(),
673 datatype: ColumnDataType::String as i32,
674 semantic_type: SemanticType::Tag as _,
675 datatype_extension: None,
676 options: None,
677 })
678 .collect()
679 }
680
681 fn create_name_to_column_id(labels: &[&str]) -> HashMap<String, ColumnMetadata> {
683 labels
684 .iter()
685 .enumerate()
686 .map(|(idx, name)| (name.to_string(), make_info(name, idx as ColumnId + 1)))
687 .collect()
688 }
689
690 fn create_row_with_values(values: &[&str]) -> Row {
692 Row {
693 values: values
694 .iter()
695 .map(|v| ValueData::StringValue(v.to_string()).into())
696 .collect(),
697 }
698 }
699
700 fn create_row_with_nulls(values: &[Option<&str>]) -> Row {
702 Row {
703 values: values
704 .iter()
705 .map(|v| {
706 v.map(|s| ValueData::StringValue(s.to_string()).into())
707 .unwrap_or(Value { value_data: None })
708 })
709 .collect(),
710 }
711 }
712
713 fn extract_tsid(
715 schema: Vec<ColumnSchema>,
716 row: Row,
717 name_to_column_id: &HashMap<String, ColumnMetadata>,
718 table_id: TableId,
719 ) -> u64 {
720 let rows = Rows {
721 schema,
722 rows: vec![row],
723 };
724 let mut rows_iter = RowsIter::new(rows, name_to_column_id);
725 let row_iter = rows_iter.iter_mut().next().unwrap();
726 let (_, tsid_value) = RowModifier::fill_internal_columns(table_id, &row_iter);
727 match tsid_value.value_data {
728 Some(ValueData::U64Value(tsid)) => tsid,
729 _ => panic!("Expected U64Value for TSID"),
730 }
731 }
732
733 #[test]
734 fn test_tsid_same_for_different_label_orders() {
735 let table_id = 1025;
738
739 let schema1 = create_multi_label_schema(&["a", "b", "c"]);
741 let name_to_column_id1 = create_name_to_column_id(&["a", "b", "c"]);
742 let row1 = create_row_with_values(&["A", "B", "C"]);
743 let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
744
745 let schema2 = create_multi_label_schema(&["b", "a", "c"]);
747 let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]);
748 let row2 = create_row_with_values(&["B", "A", "C"]);
749 let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
750
751 let schema3 = create_multi_label_schema(&["c", "b", "a"]);
753 let name_to_column_id3 = create_name_to_column_id(&["a", "b", "c"]);
754 let row3 = create_row_with_values(&["C", "B", "A"]);
755 let tsid3 = extract_tsid(schema3, row3, &name_to_column_id3, table_id);
756
757 assert_eq!(
760 tsid1, tsid2,
761 "TSID should be same for different column orders"
762 );
763 assert_eq!(
764 tsid2, tsid3,
765 "TSID should be same for different column orders"
766 );
767 }
768
769 #[test]
770 fn test_tsid_same_with_null_labels() {
771 let table_id = 1025;
773
774 let schema1 = create_multi_label_schema(&["a", "b"]);
776 let name_to_column_id1 = create_name_to_column_id(&["a", "b"]);
777 let row1 = create_row_with_values(&["A", "B"]);
778 let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
779
780 let schema2 = create_multi_label_schema(&["a", "b", "c"]);
782 let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]);
783 let row2 = create_row_with_nulls(&[Some("A"), Some("B"), None]);
784 let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
785
786 assert_eq!(
788 tsid1, tsid2,
789 "TSID should be same when only difference is null label values"
790 );
791 }
792
793 #[test]
794 fn test_tsid_same_with_multiple_null_labels() {
795 let table_id = 1025;
797
798 let schema1 = create_multi_label_schema(&["a", "b"]);
800 let name_to_column_id1 = create_name_to_column_id(&["a", "b"]);
801 let row1 = create_row_with_values(&["A", "B"]);
802 let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
803
804 let schema2 = create_multi_label_schema(&["a", "b", "c", "d"]);
806 let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c", "d"]);
807 let row2 = create_row_with_nulls(&[Some("A"), Some("B"), None, None]);
808 let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
809
810 assert_eq!(
811 tsid1, tsid2,
812 "TSID should be same when only difference is multiple null label values"
813 );
814 }
815
816 #[test]
817 fn test_tsid_different_with_different_non_null_values() {
818 let table_id = 1025;
820
821 let schema1 = create_multi_label_schema(&["a", "b"]);
823 let name_to_column_id1 = create_name_to_column_id(&["a", "b"]);
824 let row1 = create_row_with_values(&["A", "B"]);
825 let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
826
827 let schema2 = create_multi_label_schema(&["a", "b"]);
829 let name_to_column_id2 = create_name_to_column_id(&["a", "b"]);
830 let row2 = create_row_with_values(&["A", "C"]);
831 let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
832
833 assert_ne!(
834 tsid1, tsid2,
835 "TSID should be different when label values differ"
836 );
837 }
838
839 #[test]
840 fn test_tsid_fast_path_vs_slow_path_consistency() {
841 let table_id = 1025;
844
845 let schema_fast = create_multi_label_schema(&["a", "b"]);
847 let name_to_column_id_fast = create_name_to_column_id(&["a", "b"]);
848 let row_fast = create_row_with_values(&["A", "B"]);
849 let tsid_fast = extract_tsid(schema_fast, row_fast, &name_to_column_id_fast, table_id);
850
851 let schema_slow = create_multi_label_schema(&["a", "b", "c"]);
853 let name_to_column_id_slow = create_name_to_column_id(&["a", "b", "c"]);
854 let row_slow = create_row_with_nulls(&[Some("A"), Some("B"), None]);
855 let tsid_slow = extract_tsid(schema_slow, row_slow, &name_to_column_id_slow, table_id);
856
857 assert_eq!(
858 tsid_fast, tsid_slow,
859 "Fast path and slow path should produce same TSID for same non-null values"
860 );
861 }
862
863 #[test]
864 fn test_tsid_with_null_in_middle() {
865 let table_id = 1025;
867
868 let schema1 = create_multi_label_schema(&["a", "b", "c"]);
870 let name_to_column_id1 = create_name_to_column_id(&["a", "b", "c"]);
871 let row1 = create_row_with_values(&["A", "B", "C"]);
872 let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id);
873
874 let schema2 = create_multi_label_schema(&["a", "b", "c"]);
876 let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]);
877 let row2 = create_row_with_nulls(&[Some("A"), None, Some("C")]);
878 let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id);
879
880 assert_ne!(
885 tsid1, tsid2,
886 "TSID should be different when a non-null value becomes null"
887 );
888
889 let schema3 = create_multi_label_schema(&["a", "c"]);
891 let name_to_column_id3 = create_name_to_column_id(&["a", "c"]);
892 let row3 = create_row_with_values(&["A", "C"]);
893 let tsid3 = extract_tsid(schema3, row3, &name_to_column_id3, table_id);
894
895 assert_eq!(
897 tsid2, tsid3,
898 "TSID should be same when null label is ignored"
899 );
900 }
901
902 #[test]
903 fn test_tsid_all_null_labels() {
904 let table_id = 1025;
906
907 let schema = create_multi_label_schema(&["a", "b", "c"]);
909 let name_to_column_id = create_name_to_column_id(&["a", "b", "c"]);
910 let row = create_row_with_nulls(&[None, None, None]);
911 let tsid = extract_tsid(schema.clone(), row, &name_to_column_id, table_id);
912
913 let row2 = create_row_with_nulls(&[None, None, None]);
918 let tsid2 = extract_tsid(schema, row2, &name_to_column_id, table_id);
919 assert_eq!(
920 tsid, tsid2,
921 "TSID should be consistent when all label values are null"
922 );
923 }
924}