From 931556dbd3ca651136826d4d8861ae101991b124 Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Tue, 2 Dec 2025 00:38:29 -0800 Subject: [PATCH] perf(metric-engine)!: Replace mur3 with fxhash for faster TSID generation (#7316) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat/change-tsid-gen: perf(metric-engine): replace mur3 with fxhash for faster TSID generation - Switches from mur3::Hasher128 to fxhash::FxHasher for TSID hashing - Pre-computes label-name hash when no nulls are present, avoiding redundant work - Adds fast-path for rows without nulls; falls back to slow path otherwise - Updates Cargo.toml and lockfile to reflect dependency change Signed-off-by: Lei, HUANG * feat/change-tsid-gen: fix: only check primary-key labels for null when re-using cached hash - Rename has_null() → has_null_labels() and restrict the check to the primary-key columns so that non-label NULLs do not force a full TSID re-computation. - Update expected hashes in tests to match the new logic. Signed-off-by: Lei, HUANG * feat/change-tsid-gen: test: add comprehensive TSID generation tests for label ordering and null handling Signed-off-by: Lei, HUANG * feat/change-tsid-gen: bench: add criterion benchmark for TSID generator - Compare original mur3 vs current fxhash fast/slow paths - Test 2, 5, 10 label sets plus null-value slow path - Add mur3 & criterion dev-deps; register bench target Signed-off-by: Lei, HUANG * feat/change-tsid-gen: test: stabilize metric-engine tests by fixing non-deterministic row order - Add ORDER BY to SELECTs in TTL tests to ensure consistent output - Update expected __tsid values after hash function change - Swap expected OTLP metric rows to match new ordering Signed-off-by: Lei, HUANG * feat/change-tsid-gen: refactor: simplify Default impls and remove redundant code - Replace manual Default for TsidGenerator with derive - Remove unnecessary into_iter() call - Simplify Option::unwrap_or_else to unwrap_or Signed-off-by: Lei, HUANG --------- Signed-off-by: Lei, HUANG --- Cargo.lock | 2 + src/metric-engine/Cargo.toml | 8 +- .../benches/bench_tsid_generator.rs | 273 +++++++++++++ src/metric-engine/src/engine/put.rs | 18 +- src/metric-engine/src/row_modifier.rs | 376 ++++++++++++++++-- tests-integration/tests/http.rs | 4 +- tests-integration/tests/region_migration.rs | 4 +- .../common/insert/logical_metric_table.result | 16 +- .../database_ttl_with_metric_engine.result | 12 +- .../ttl/database_ttl_with_metric_engine.sql | 6 +- .../common/ttl/metric_engine_ttl.result | 12 +- .../common/ttl/metric_engine_ttl.sql | 6 +- 12 files changed, 658 insertions(+), 79 deletions(-) create mode 100644 src/metric-engine/benches/bench_tsid_generator.rs diff --git a/Cargo.lock b/Cargo.lock index d8cafc0113..d93c7cd42e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7514,9 +7514,11 @@ dependencies = [ "common-test-util", "common-time", "common-wal", + "criterion 0.4.0", "datafusion", "datatypes", "futures-util", + "fxhash", "humantime-serde", "itertools 0.14.0", "lazy_static", diff --git a/src/metric-engine/Cargo.toml b/src/metric-engine/Cargo.toml index c601c10912..9beadade16 100644 --- a/src/metric-engine/Cargo.toml +++ b/src/metric-engine/Cargo.toml @@ -14,6 +14,7 @@ async-stream.workspace = true async-trait.workspace = true base64.workspace = true bytes.workspace = true +fxhash = "0.2" common-base.workspace = true common-error.workspace = true common-macro.workspace = true @@ -31,7 +32,6 @@ lazy_static = "1.4" mito-codec.workspace = true mito2.workspace = true moka.workspace = true -mur3 = "0.1" object-store.workspace = true prometheus.workspace = true serde.workspace = true @@ -47,6 +47,12 @@ common-meta = { workspace = true, features = ["testing"] } common-test-util.workspace = true mito2 = { workspace = true, features = ["test"] } common-wal = { workspace = true } +criterion = { version = "0.4", features = ["async", "async_tokio"] } +mur3 = "0.1" + +[[bench]] +name = "bench_tsid_generator" +harness = false [package.metadata.cargo-udeps.ignore] normal = ["aquamarine"] diff --git a/src/metric-engine/benches/bench_tsid_generator.rs b/src/metric-engine/benches/bench_tsid_generator.rs new file mode 100644 index 0000000000..2908bc67ce --- /dev/null +++ b/src/metric-engine/benches/bench_tsid_generator.rs @@ -0,0 +1,273 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::hash::Hasher; + +use criterion::{Criterion, black_box, criterion_group, criterion_main}; +use fxhash::FxHasher; +use mur3::Hasher128; + +// A random number (from original implementation) +const TSID_HASH_SEED: u32 = 846793005; + +/// Original TSID generator using mur3::Hasher128 +/// Hashes both label name and value for each label pair +struct OriginalTsidGenerator { + hasher: Hasher128, +} + +impl OriginalTsidGenerator { + fn new() -> Self { + Self { + hasher: Hasher128::with_seed(TSID_HASH_SEED), + } + } + + /// Writes a label pair (name and value) to the generator. + fn write_label(&mut self, name: &str, value: &str) { + use std::hash::Hash; + name.hash(&mut self.hasher); + value.hash(&mut self.hasher); + } + + /// Generates a new TSID. + fn finish(&mut self) -> u64 { + // TSID is 64 bits, simply truncate the 128 bits hash + let (hash, _) = self.hasher.finish128(); + hash + } +} + +/// Current TSID generator using fxhash::FxHasher +/// Fast path: pre-computes label name hash, only hashes values +struct CurrentTsidGenerator { + hasher: FxHasher, +} + +impl CurrentTsidGenerator { + fn new() -> Self { + Self { + hasher: FxHasher::default(), + } + } + + fn new_with_label_name_hash(label_name_hash: u64) -> Self { + let mut hasher = FxHasher::default(); + hasher.write_u64(label_name_hash); + Self { hasher } + } + + /// Writes a label value to the generator. + fn write_str(&mut self, value: &str) { + self.hasher.write(value.as_bytes()); + self.hasher.write_u8(0xff); + } + + /// Generates a new TSID. + fn finish(&mut self) -> u64 { + self.hasher.finish() + } +} + +/// Pre-computes label name hash (used in fast path) +fn compute_label_name_hash(labels: &[(&str, &str)]) -> u64 { + let mut hasher = FxHasher::default(); + for (name, _) in labels { + hasher.write(name.as_bytes()); + hasher.write_u8(0xff); + } + hasher.finish() +} + +fn bench_tsid_generator_small(c: &mut Criterion) { + let labels = vec![("namespace", "greptimedb"), ("host", "127.0.0.1")]; + + let mut group = c.benchmark_group("tsid_generator_small_2_labels"); + group.bench_function("original_mur3", |b| { + b.iter(|| { + let mut tsid_gen = OriginalTsidGenerator::new(); + for (name, value) in &labels { + tsid_gen.write_label(black_box(name), black_box(value)); + } + black_box(tsid_gen.finish()) + }) + }); + + let label_name_hash = compute_label_name_hash(&labels); + group.bench_function("current_fxhash_fast_path", |b| { + b.iter(|| { + let mut tsid_gen = + CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash)); + for (_, value) in &labels { + tsid_gen.write_str(black_box(value)); + } + black_box(tsid_gen.finish()) + }) + }); + + group.finish(); +} + +fn bench_tsid_generator_medium(c: &mut Criterion) { + let labels = vec![ + ("namespace", "greptimedb"), + ("host", "127.0.0.1"), + ("region", "us-west-2"), + ("env", "production"), + ("service", "api"), + ]; + + let mut group = c.benchmark_group("tsid_generator_medium_5_labels"); + group.bench_function("original_mur3", |b| { + b.iter(|| { + let mut tsid_gen = OriginalTsidGenerator::new(); + for (name, value) in &labels { + tsid_gen.write_label(black_box(name), black_box(value)); + } + black_box(tsid_gen.finish()) + }) + }); + + let label_name_hash = compute_label_name_hash(&labels); + group.bench_function("current_fxhash_fast_path", |b| { + b.iter(|| { + let mut tsid_gen = + CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash)); + for (_, value) in &labels { + tsid_gen.write_str(black_box(value)); + } + black_box(tsid_gen.finish()) + }) + }); + + group.finish(); +} + +fn bench_tsid_generator_large(c: &mut Criterion) { + let labels = vec![ + ("namespace", "greptimedb"), + ("host", "127.0.0.1"), + ("region", "us-west-2"), + ("env", "production"), + ("service", "api"), + ("version", "v1.0.0"), + ("cluster", "cluster-1"), + ("dc", "dc1"), + ("rack", "rack-1"), + ("pod", "pod-123"), + ]; + + let mut group = c.benchmark_group("tsid_generator_large_10_labels"); + group.bench_function("original_mur3", |b| { + b.iter(|| { + let mut tsid_gen = OriginalTsidGenerator::new(); + for (name, value) in &labels { + tsid_gen.write_label(black_box(name), black_box(value)); + } + black_box(tsid_gen.finish()) + }) + }); + + let label_name_hash = compute_label_name_hash(&labels); + group.bench_function("current_fxhash_fast_path", |b| { + b.iter(|| { + let mut tsid_gen = + CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash)); + for (_, value) in &labels { + tsid_gen.write_str(black_box(value)); + } + black_box(tsid_gen.finish()) + }) + }); + + group.finish(); +} + +fn bench_tsid_generator_slow_path(c: &mut Criterion) { + // Simulate slow path: some labels have null values (empty strings) + let labels_with_nulls = vec![ + ("namespace", "greptimedb"), + ("host", "127.0.0.1"), + ("region", ""), // null + ("env", "production"), + ]; + + let labels_all_non_null = vec![ + ("namespace", "greptimedb"), + ("host", "127.0.0.1"), + ("env", "production"), + ]; + + let mut group = c.benchmark_group("tsid_generator_slow_path_with_nulls"); + + // Original: always hashes name and value + group.bench_function("original_mur3_with_nulls", |b| { + b.iter(|| { + let mut tsid_gen = OriginalTsidGenerator::new(); + for (name, value) in &labels_with_nulls { + if !value.is_empty() { + tsid_gen.write_label(black_box(name), black_box(value)); + } + } + black_box(tsid_gen.finish()) + }) + }); + + // Current slow path: recomputes label name hash + group.bench_function("current_fxhash_slow_path", |b| { + b.iter(|| { + // Step 1: Compute label name hash for non-null labels + let mut name_hasher = CurrentTsidGenerator::new(); + for (name, value) in &labels_with_nulls { + if !value.is_empty() { + name_hasher.write_str(black_box(name)); + } + } + let label_name_hash = name_hasher.finish(); + + // Step 2: Use label name hash and hash values + let mut tsid_gen = CurrentTsidGenerator::new_with_label_name_hash(label_name_hash); + for (_, value) in &labels_with_nulls { + if !value.is_empty() { + tsid_gen.write_str(black_box(value)); + } + } + black_box(tsid_gen.finish()) + }) + }); + + // Current fast path: pre-computed (for comparison) + let label_name_hash = compute_label_name_hash(&labels_all_non_null); + group.bench_function("current_fxhash_fast_path_no_nulls", |b| { + b.iter(|| { + let mut tsid_gen = + CurrentTsidGenerator::new_with_label_name_hash(black_box(label_name_hash)); + for (_, value) in &labels_all_non_null { + tsid_gen.write_str(black_box(value)); + } + black_box(tsid_gen.finish()) + }) + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_tsid_generator_small, + bench_tsid_generator_medium, + bench_tsid_generator_large, + bench_tsid_generator_slow_path +); +criterion_main!(benches); diff --git a/src/metric-engine/src/engine/put.rs b/src/metric-engine/src/engine/put.rs index 0d4693ee42..c8c513f48c 100644 --- a/src/metric-engine/src/engine/put.rs +++ b/src/metric-engine/src/engine/put.rs @@ -272,15 +272,15 @@ mod tests { .unwrap(); let batches = RecordBatches::try_collect(stream).await.unwrap(); let expected = "\ -+-------------------------+----------------+------------+----------------------+-------+ -| greptime_timestamp | greptime_value | __table_id | __tsid | job | -+-------------------------+----------------+------------+----------------------+-------+ -| 1970-01-01T00:00:00 | 0.0 | 3 | 12881218023286672757 | tag_0 | -| 1970-01-01T00:00:00.001 | 1.0 | 3 | 12881218023286672757 | tag_0 | -| 1970-01-01T00:00:00.002 | 2.0 | 3 | 12881218023286672757 | tag_0 | -| 1970-01-01T00:00:00.003 | 3.0 | 3 | 12881218023286672757 | tag_0 | -| 1970-01-01T00:00:00.004 | 4.0 | 3 | 12881218023286672757 | tag_0 | -+-------------------------+----------------+------------+----------------------+-------+"; ++-------------------------+----------------+------------+---------------------+-------+ +| greptime_timestamp | greptime_value | __table_id | __tsid | job | ++-------------------------+----------------+------------+---------------------+-------+ +| 1970-01-01T00:00:00 | 0.0 | 3 | 2955007454552897459 | tag_0 | +| 1970-01-01T00:00:00.001 | 1.0 | 3 | 2955007454552897459 | tag_0 | +| 1970-01-01T00:00:00.002 | 2.0 | 3 | 2955007454552897459 | tag_0 | +| 1970-01-01T00:00:00.003 | 3.0 | 3 | 2955007454552897459 | tag_0 | +| 1970-01-01T00:00:00.004 | 4.0 | 3 | 2955007454552897459 | tag_0 | ++-------------------------+----------------+------------+---------------------+-------+"; assert_eq!(expected, batches.pretty_print().unwrap(), "physical region"); // read data from logical region diff --git a/src/metric-engine/src/row_modifier.rs b/src/metric-engine/src/row_modifier.rs index 4759a76215..0732359c39 100644 --- a/src/metric-engine/src/row_modifier.rs +++ b/src/metric-engine/src/row_modifier.rs @@ -13,11 +13,12 @@ // limitations under the License. use std::collections::{BTreeMap, HashMap}; -use std::hash::Hash; +use std::hash::Hasher; use api::v1::value::ValueData; use api::v1::{ColumnDataType, ColumnSchema, Row, Rows, SemanticType, Value}; use datatypes::value::ValueRef; +use fxhash::FxHasher; use mito_codec::row_converter::SparsePrimaryKeyCodec; use smallvec::SmallVec; use snafu::ResultExt; @@ -30,9 +31,6 @@ use store_api::storage::{ColumnId, TableId}; use crate::error::{EncodePrimaryKeySnafu, Result}; -// A random number -const TSID_HASH_SEED: u32 = 846793005; - /// A row modifier modifies [`Rows`]. /// /// - For [`PrimaryKeyEncoding::Sparse`] encoding, @@ -75,6 +73,7 @@ impl RowModifier { let num_output_column = num_column - num_primary_key_column + 1; let mut buffer = vec![]; + for mut iter in iter.iter_mut() { let (table_id, tsid) = Self::fill_internal_columns(table_id, &iter); let mut values = Vec::with_capacity(num_output_column); @@ -147,47 +146,72 @@ impl RowModifier { /// Fills internal columns of a row with table name and a hash of tag values. pub fn fill_internal_columns(table_id: TableId, iter: &RowIter<'_>) -> (Value, Value) { - let mut hasher = TsidGenerator::default(); - for (name, value) in iter.primary_keys_with_name() { - // The type is checked before. So only null is ignored. - if let Some(ValueData::StringValue(string)) = &value.value_data { - hasher.write_label(name, string); + let ts_id = if !iter.has_null_labels() { + // No null labels in row, we can safely reuse the precomputed label name hash. + let mut ts_id_gen = TsidGenerator::new(iter.index.label_name_hash); + for (_, value) in iter.primary_keys_with_name() { + // The type is checked before. So only null is ignored. + if let Some(ValueData::StringValue(string)) = &value.value_data { + ts_id_gen.write_str(string); + } else { + unreachable!( + "Should not contain null or non-string value: {:?}, table id: {}", + value, table_id + ); + } } - } - let hash = hasher.finish(); + ts_id_gen.finish() + } else { + // Slow path: row contains null, recompute label hash + let mut hasher = TsidGenerator::default(); + // 1. Find out label names with non-null values and get the hash. + for (name, value) in iter.primary_keys_with_name() { + // The type is checked before. So only null is ignored. + if let Some(ValueData::StringValue(_)) = &value.value_data { + hasher.write_str(name); + } + } + let label_name_hash = hasher.finish(); + + // 2. Use label name hash as seed and continue with label values. + let mut final_hasher = TsidGenerator::new(label_name_hash); + for (_, value) in iter.primary_keys_with_name() { + if let Some(ValueData::StringValue(value)) = &value.value_data { + final_hasher.write_str(value); + } + } + final_hasher.finish() + }; ( ValueData::U32Value(table_id).into(), - ValueData::U64Value(hash).into(), + ValueData::U64Value(ts_id).into(), ) } } /// Tsid generator. +#[derive(Default)] pub struct TsidGenerator { - hasher: mur3::Hasher128, -} - -impl Default for TsidGenerator { - fn default() -> Self { - Self { - hasher: mur3::Hasher128::with_seed(TSID_HASH_SEED), - } - } + hasher: FxHasher, } impl TsidGenerator { + pub fn new(label_name_hash: u64) -> Self { + let mut hasher = FxHasher::default(); + hasher.write_u64(label_name_hash); + Self { hasher } + } + /// Writes a label pair to the generator. - pub fn write_label(&mut self, name: &str, value: &str) { - name.hash(&mut self.hasher); - value.hash(&mut self.hasher); + pub fn write_str(&mut self, value: &str) { + self.hasher.write(value.as_bytes()); + self.hasher.write_u8(0xff); } /// Generates a new TSID. pub fn finish(&mut self) -> u64 { - // TSID is 64 bits, simply truncate the 128 bits hash - let (hash, _) = self.hasher.finish128(); - hash + self.hasher.finish() } } @@ -202,6 +226,8 @@ struct ValueIndex { struct IterIndex { indices: Vec, num_primary_key_column: usize, + /// Precomputed hash for label names. + label_name_hash: u64, } impl IterIndex { @@ -252,15 +278,22 @@ impl IterIndex { } } let num_primary_key_column = primary_key_indices.len() + reserved_indices.len(); - let indices = reserved_indices - .into_iter() - .chain(primary_key_indices.values().cloned()) - .chain(ts_index) - .chain(field_indices) - .collect(); + let mut indices = Vec::with_capacity(num_primary_key_column + 2); + indices.extend(reserved_indices); + let mut label_name_hasher = TsidGenerator::default(); + for (pk_name, pk_index) in primary_key_indices { + // primary_key_indices already sorted. + label_name_hasher.write_str(pk_name); + indices.push(pk_index); + } + let label_name_hash = label_name_hasher.finish(); + + indices.extend(ts_index); + indices.extend(field_indices); IterIndex { indices, num_primary_key_column, + label_name_hash, } } } @@ -314,6 +347,13 @@ impl RowIter<'_> { }) } + /// Returns true if any label in current row is null. + fn has_null_labels(&self) -> bool { + self.index.indices[..self.index.num_primary_key_column] + .iter() + .any(|idx| self.row.values[idx.index].value_data.is_none()) + } + /// Returns the primary keys. pub fn primary_keys(&self) -> impl Iterator)> { self.index.indices[..self.index.num_primary_key_column] @@ -399,9 +439,9 @@ mod tests { let result = encoder.modify_rows_sparse(rows_iter, table_id).unwrap(); assert_eq!(result.rows[0].values.len(), 1); let encoded_primary_key = vec![ - 128, 0, 0, 4, 1, 0, 0, 4, 1, 128, 0, 0, 3, 1, 131, 9, 166, 190, 173, 37, 39, 240, 0, 0, - 0, 2, 1, 1, 49, 50, 55, 46, 48, 46, 48, 46, 9, 49, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, - 1, 1, 103, 114, 101, 112, 116, 105, 109, 101, 9, 100, 98, 0, 0, 0, 0, 0, 0, 2, + 128, 0, 0, 4, 1, 0, 0, 4, 1, 128, 0, 0, 3, 1, 37, 196, 242, 181, 117, 224, 7, 137, 0, + 0, 0, 2, 1, 1, 49, 50, 55, 46, 48, 46, 48, 46, 9, 49, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 1, 1, 1, 103, 114, 101, 112, 116, 105, 109, 101, 9, 100, 98, 0, 0, 0, 0, 0, 0, 2, ]; assert_eq!( result.rows[0].values[0], @@ -477,7 +517,7 @@ mod tests { assert_eq!(result.rows[0].values[2], ValueData::U32Value(1025).into()); assert_eq!( result.rows[0].values[3], - ValueData::U64Value(9442261431637846000).into() + ValueData::U64Value(2721566936019240841).into() ); assert_eq!(result.schema, expected_dense_schema()); } @@ -496,7 +536,7 @@ mod tests { let row_iter = rows_iter.iter_mut().next().unwrap(); let (encoded_table_id, tsid) = RowModifier::fill_internal_columns(table_id, &row_iter); assert_eq!(encoded_table_id, ValueData::U32Value(1025).into()); - assert_eq!(tsid, ValueData::U64Value(9442261431637846000).into()); + assert_eq!(tsid, ValueData::U64Value(2721566936019240841).into()); // Change the column order let schema = vec![ @@ -524,6 +564,264 @@ mod tests { let row_iter = rows_iter.iter_mut().next().unwrap(); let (encoded_table_id, tsid) = RowModifier::fill_internal_columns(table_id, &row_iter); assert_eq!(encoded_table_id, ValueData::U32Value(1025).into()); - assert_eq!(tsid, ValueData::U64Value(9442261431637846000).into()); + assert_eq!(tsid, ValueData::U64Value(2721566936019240841).into()); + } + + /// Helper function to create a schema with multiple label columns + fn create_multi_label_schema(labels: &[&str]) -> Vec { + labels + .iter() + .map(|name| ColumnSchema { + column_name: name.to_string(), + datatype: ColumnDataType::String as i32, + semantic_type: SemanticType::Tag as _, + datatype_extension: None, + options: None, + }) + .collect() + } + + /// Helper function to create a name_to_column_id map + fn create_name_to_column_id(labels: &[&str]) -> HashMap { + labels + .iter() + .enumerate() + .map(|(idx, name)| (name.to_string(), idx as ColumnId + 1)) + .collect() + } + + /// Helper function to create a row with string values + fn create_row_with_values(values: &[&str]) -> Row { + Row { + values: values + .iter() + .map(|v| ValueData::StringValue(v.to_string()).into()) + .collect(), + } + } + + /// Helper function to create a row with some null values + fn create_row_with_nulls(values: &[Option<&str>]) -> Row { + Row { + values: values + .iter() + .map(|v| { + v.map(|s| ValueData::StringValue(s.to_string()).into()) + .unwrap_or(Value { value_data: None }) + }) + .collect(), + } + } + + /// Helper function to extract TSID from a row + fn extract_tsid( + schema: Vec, + row: Row, + name_to_column_id: &HashMap, + table_id: TableId, + ) -> u64 { + let rows = Rows { + schema, + rows: vec![row], + }; + let mut rows_iter = RowsIter::new(rows, name_to_column_id); + let row_iter = rows_iter.iter_mut().next().unwrap(); + let (_, tsid_value) = RowModifier::fill_internal_columns(table_id, &row_iter); + match tsid_value.value_data { + Some(ValueData::U64Value(tsid)) => tsid, + _ => panic!("Expected U64Value for TSID"), + } + } + + #[test] + fn test_tsid_same_for_different_label_orders() { + // Test that rows with the same label name-value pairs but in different orders + // produce the same TSID + let table_id = 1025; + + // Schema 1: a, b, c + let schema1 = create_multi_label_schema(&["a", "b", "c"]); + let name_to_column_id1 = create_name_to_column_id(&["a", "b", "c"]); + let row1 = create_row_with_values(&["A", "B", "C"]); + let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id); + + // Schema 2: b, a, c (different order) + let schema2 = create_multi_label_schema(&["b", "a", "c"]); + let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]); + let row2 = create_row_with_values(&["B", "A", "C"]); + let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id); + + // Schema 3: c, b, a (another different order) + let schema3 = create_multi_label_schema(&["c", "b", "a"]); + let name_to_column_id3 = create_name_to_column_id(&["a", "b", "c"]); + let row3 = create_row_with_values(&["C", "B", "A"]); + let tsid3 = extract_tsid(schema3, row3, &name_to_column_id3, table_id); + + // All should have the same TSID since label names are sorted lexicographically + // and we're using the same label name-value pairs + assert_eq!( + tsid1, tsid2, + "TSID should be same for different column orders" + ); + assert_eq!( + tsid2, tsid3, + "TSID should be same for different column orders" + ); + } + + #[test] + fn test_tsid_same_with_null_labels() { + // Test that rows that differ only by null label values produce the same TSID + let table_id = 1025; + + // Row 1: a=A, b=B (no nulls, fast path) + let schema1 = create_multi_label_schema(&["a", "b"]); + let name_to_column_id1 = create_name_to_column_id(&["a", "b"]); + let row1 = create_row_with_values(&["A", "B"]); + let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id); + + // Row 2: a=A, b=B, c=null (has null, slow path) + let schema2 = create_multi_label_schema(&["a", "b", "c"]); + let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]); + let row2 = create_row_with_nulls(&[Some("A"), Some("B"), None]); + let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id); + + // Both should have the same TSID since null labels are ignored + assert_eq!( + tsid1, tsid2, + "TSID should be same when only difference is null label values" + ); + } + + #[test] + fn test_tsid_same_with_multiple_null_labels() { + // Test with multiple null labels + let table_id = 1025; + + // Row 1: a=A, b=B (no nulls) + let schema1 = create_multi_label_schema(&["a", "b"]); + let name_to_column_id1 = create_name_to_column_id(&["a", "b"]); + let row1 = create_row_with_values(&["A", "B"]); + let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id); + + // Row 2: a=A, b=B, c=null, d=null (multiple nulls) + let schema2 = create_multi_label_schema(&["a", "b", "c", "d"]); + let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c", "d"]); + let row2 = create_row_with_nulls(&[Some("A"), Some("B"), None, None]); + let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id); + + assert_eq!( + tsid1, tsid2, + "TSID should be same when only difference is multiple null label values" + ); + } + + #[test] + fn test_tsid_different_with_different_non_null_values() { + // Test that rows with different non-null values produce different TSIDs + let table_id = 1025; + + // Row 1: a=A, b=B + let schema1 = create_multi_label_schema(&["a", "b"]); + let name_to_column_id1 = create_name_to_column_id(&["a", "b"]); + let row1 = create_row_with_values(&["A", "B"]); + let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id); + + // Row 2: a=A, b=C (different value for b) + let schema2 = create_multi_label_schema(&["a", "b"]); + let name_to_column_id2 = create_name_to_column_id(&["a", "b"]); + let row2 = create_row_with_values(&["A", "C"]); + let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id); + + assert_ne!( + tsid1, tsid2, + "TSID should be different when label values differ" + ); + } + + #[test] + fn test_tsid_fast_path_vs_slow_path_consistency() { + // Test that fast path (no nulls) and slow path (with nulls) produce + // the same TSID for the same non-null label values + let table_id = 1025; + + // Fast path: a=A, b=B (no nulls) + let schema_fast = create_multi_label_schema(&["a", "b"]); + let name_to_column_id_fast = create_name_to_column_id(&["a", "b"]); + let row_fast = create_row_with_values(&["A", "B"]); + let tsid_fast = extract_tsid(schema_fast, row_fast, &name_to_column_id_fast, table_id); + + // Slow path: a=A, b=B, c=null (has null, triggers slow path) + let schema_slow = create_multi_label_schema(&["a", "b", "c"]); + let name_to_column_id_slow = create_name_to_column_id(&["a", "b", "c"]); + let row_slow = create_row_with_nulls(&[Some("A"), Some("B"), None]); + let tsid_slow = extract_tsid(schema_slow, row_slow, &name_to_column_id_slow, table_id); + + assert_eq!( + tsid_fast, tsid_slow, + "Fast path and slow path should produce same TSID for same non-null values" + ); + } + + #[test] + fn test_tsid_with_null_in_middle() { + // Test with null in the middle of labels + let table_id = 1025; + + // Row 1: a=A, b=B, c=C + let schema1 = create_multi_label_schema(&["a", "b", "c"]); + let name_to_column_id1 = create_name_to_column_id(&["a", "b", "c"]); + let row1 = create_row_with_values(&["A", "B", "C"]); + let tsid1 = extract_tsid(schema1, row1, &name_to_column_id1, table_id); + + // Row 2: a=A, b=null, c=C (null in middle) + let schema2 = create_multi_label_schema(&["a", "b", "c"]); + let name_to_column_id2 = create_name_to_column_id(&["a", "b", "c"]); + let row2 = create_row_with_nulls(&[Some("A"), None, Some("C")]); + let tsid2 = extract_tsid(schema2, row2, &name_to_column_id2, table_id); + + // Should be different because b is null in row2 but B in row1 + // Actually wait, let me reconsider - if b is null, it should be ignored + // So row2 should be equivalent to a=A, c=C + // But row1 is a=A, b=B, c=C, so they should be different + assert_ne!( + tsid1, tsid2, + "TSID should be different when a non-null value becomes null" + ); + + // Row 3: a=A, c=C (no b at all, equivalent to row2) + let schema3 = create_multi_label_schema(&["a", "c"]); + let name_to_column_id3 = create_name_to_column_id(&["a", "c"]); + let row3 = create_row_with_values(&["A", "C"]); + let tsid3 = extract_tsid(schema3, row3, &name_to_column_id3, table_id); + + // Row2 (a=A, b=null, c=C) should be same as row3 (a=A, c=C) + assert_eq!( + tsid2, tsid3, + "TSID should be same when null label is ignored" + ); + } + + #[test] + fn test_tsid_all_null_labels() { + // Test with all labels being null + let table_id = 1025; + + // Row with all nulls + let schema = create_multi_label_schema(&["a", "b", "c"]); + let name_to_column_id = create_name_to_column_id(&["a", "b", "c"]); + let row = create_row_with_nulls(&[None, None, None]); + let tsid = extract_tsid(schema.clone(), row, &name_to_column_id, table_id); + + // Should still produce a TSID (based on label names only when all values are null) + // This tests that the slow path handles the case where all values are null + // The TSID will be based on the label name hash only + // Test that it's consistent - same schema with all nulls should produce same TSID + let row2 = create_row_with_nulls(&[None, None, None]); + let tsid2 = extract_tsid(schema, row2, &name_to_column_id, table_id); + assert_eq!( + tsid, tsid2, + "TSID should be consistent when all label values are null" + ); } } diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index f5c1542ef8..2bafe469a5 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -4327,7 +4327,7 @@ pub async fn test_otlp_metrics_new(store_type: StorageType) { .await; // select metrics data - let expected = "[[1753780559836,0.0052544,\"arm64\",\"claude-code\",\"claude-3-5-haiku-20241022\",\"25.0.0\",\"com.anthropic.claude_code\",\"\",\"1.0.62\",\"claude-code\",\"1.0.62\",\"736525A3-F5D4-496B-933E-827AF23A5B97\",\"ghostty\",\"6DA02FD9-B5C5-4E61-9355-9FE8EC9A0CF4\"],[1753780559836,2.244618,\"arm64\",\"claude-code\",\"claude-sonnet-4-20250514\",\"25.0.0\",\"com.anthropic.claude_code\",\"\",\"1.0.62\",\"claude-code\",\"1.0.62\",\"736525A3-F5D4-496B-933E-827AF23A5B97\",\"ghostty\",\"6DA02FD9-B5C5-4E61-9355-9FE8EC9A0CF4\"]]"; + let expected = "[[1753780559836,2.244618,\"arm64\",\"claude-code\",\"claude-sonnet-4-20250514\",\"25.0.0\",\"com.anthropic.claude_code\",\"\",\"1.0.62\",\"claude-code\",\"1.0.62\",\"736525A3-F5D4-496B-933E-827AF23A5B97\",\"ghostty\",\"6DA02FD9-B5C5-4E61-9355-9FE8EC9A0CF4\"],[1753780559836,0.0052544,\"arm64\",\"claude-code\",\"claude-3-5-haiku-20241022\",\"25.0.0\",\"com.anthropic.claude_code\",\"\",\"1.0.62\",\"claude-code\",\"1.0.62\",\"736525A3-F5D4-496B-933E-827AF23A5B97\",\"ghostty\",\"6DA02FD9-B5C5-4E61-9355-9FE8EC9A0CF4\"]]"; validate_data( "otlp_metrics_all_select", &client, @@ -4399,7 +4399,7 @@ pub async fn test_otlp_metrics_new(store_type: StorageType) { .await; // select metrics data - let expected = "[[1753780559836,2.244618,\"claude-code\",\"claude-sonnet-4-20250514\",\"darwin\",\"25.0.0\",\"claude-code\",\"1.0.62\",\"736525A3-F5D4-496B-933E-827AF23A5B97\",\"ghostty\",\"6DA02FD9-B5C5-4E61-9355-9FE8EC9A0CF4\"],[1753780559836,0.0052544,\"claude-code\",\"claude-3-5-haiku-20241022\",\"darwin\",\"25.0.0\",\"claude-code\",\"1.0.62\",\"736525A3-F5D4-496B-933E-827AF23A5B97\",\"ghostty\",\"6DA02FD9-B5C5-4E61-9355-9FE8EC9A0CF4\"]]"; + let expected = "[[1753780559836,0.0052544,\"claude-code\",\"claude-3-5-haiku-20241022\",\"darwin\",\"25.0.0\",\"claude-code\",\"1.0.62\",\"736525A3-F5D4-496B-933E-827AF23A5B97\",\"ghostty\",\"6DA02FD9-B5C5-4E61-9355-9FE8EC9A0CF4\"],[1753780559836,2.244618,\"claude-code\",\"claude-sonnet-4-20250514\",\"darwin\",\"25.0.0\",\"claude-code\",\"1.0.62\",\"736525A3-F5D4-496B-933E-827AF23A5B97\",\"ghostty\",\"6DA02FD9-B5C5-4E61-9355-9FE8EC9A0CF4\"]]"; validate_data( "otlp_metrics_select", &client, diff --git a/tests-integration/tests/region_migration.rs b/tests-integration/tests/region_migration.rs index 9440b46aeb..b9f106e183 100644 --- a/tests-integration/tests/region_migration.rs +++ b/tests-integration/tests/region_migration.rs @@ -363,7 +363,7 @@ pub async fn test_metric_table_region_migration_by_sql( let result = cluster .frontend .instance - .do_query("select * from t1", query_ctx.clone()) + .do_query("select * from t1 order by host desc", query_ctx.clone()) .await .remove(0); @@ -379,7 +379,7 @@ pub async fn test_metric_table_region_migration_by_sql( let result = cluster .frontend .instance - .do_query("select * from t2", query_ctx) + .do_query("select * from t2 order by job desc", query_ctx) .await .remove(0); diff --git a/tests/cases/standalone/common/insert/logical_metric_table.result b/tests/cases/standalone/common/insert/logical_metric_table.result index ad6142050d..80e765ccd6 100644 --- a/tests/cases/standalone/common/insert/logical_metric_table.result +++ b/tests/cases/standalone/common/insert/logical_metric_table.result @@ -37,8 +37,8 @@ SELECT * from t2; +------+-------------------------+-----+ | job | ts | val | +------+-------------------------+-----+ -| job2 | 1970-01-01T00:00:00.001 | 1.0 | | job1 | 1970-01-01T00:00:00 | 0.0 | +| job2 | 1970-01-01T00:00:00.001 | 1.0 | +------+-------------------------+-----+ DROP TABLE t1; @@ -67,10 +67,10 @@ SELECT ts, val, __tsid, host, job FROM phy; +-------------------------+-----+----------------------+-------+------+ | ts | val | __tsid | host | job | +-------------------------+-----+----------------------+-------+------+ -| 1970-01-01T00:00:00.001 | 1.0 | 1128149335081630826 | host2 | | -| 1970-01-01T00:00:00 | 0.0 | 18067404594631612786 | host1 | | -| 1970-01-01T00:00:00.001 | 1.0 | 2176048834144407834 | | job2 | -| 1970-01-01T00:00:00 | 0.0 | 15980333303142110493 | | job1 | +| 1970-01-01T00:00:00.001 | 1.0 | 7947983149541006936 | host2 | | +| 1970-01-01T00:00:00 | 0.0 | 13882403126406556045 | host1 | | +| 1970-01-01T00:00:00 | 0.0 | 6248409809737953425 | | job1 | +| 1970-01-01T00:00:00.001 | 1.0 | 12867770218286207316 | | job2 | +-------------------------+-----+----------------------+-------+------+ DROP TABLE phy; @@ -123,8 +123,8 @@ SELECT * from t2; +------+-------------------------+-----+ | job | ts | val | +------+-------------------------+-----+ -| job2 | 1970-01-01T00:00:00.001 | 1.0 | | job1 | 1970-01-01T00:00:00 | 0.0 | +| job2 | 1970-01-01T00:00:00.001 | 1.0 | +------+-------------------------+-----+ ADMIN flush_table('phy'); @@ -154,10 +154,10 @@ SELECT * from t2; +------+-------------------------+-----+ | job | ts | val | +------+-------------------------+-----+ -| job2 | 1970-01-01T00:00:00.001 | 1.0 | | job3 | 1970-01-01T00:00:00 | 0.0 | -| job4 | 1970-01-01T00:00:00.001 | 1.0 | | job1 | 1970-01-01T00:00:00 | 0.0 | +| job4 | 1970-01-01T00:00:00.001 | 1.0 | +| job2 | 1970-01-01T00:00:00.001 | 1.0 | +------+-------------------------+-----+ DROP TABLE t1; diff --git a/tests/cases/standalone/common/ttl/database_ttl_with_metric_engine.result b/tests/cases/standalone/common/ttl/database_ttl_with_metric_engine.result index f326523031..ad59b7bd1a 100644 --- a/tests/cases/standalone/common/ttl/database_ttl_with_metric_engine.result +++ b/tests/cases/standalone/common/ttl/database_ttl_with_metric_engine.result @@ -22,14 +22,14 @@ INSERT INTO test_ttl(ts, val, host) VALUES Affected Rows: 3 -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; +-----+-------+ | val | host | +-----+-------+ +| 1.0 | host1 | | 2.0 | host2 | | 3.0 | host3 | -| 1.0 | host1 | +-----+-------+ -- SQLNESS SLEEP 2s @@ -83,26 +83,26 @@ ADMIN compact_table('phy'); +----------------------------+ --- should not be expired -- -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; +-----+-------+ | val | host | +-----+-------+ +| 1.0 | host1 | | 2.0 | host2 | | 3.0 | host3 | -| 1.0 | host1 | +-----+-------+ -- restart the db, ensure everything is ok -- SQLNESS ARG restart=true -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; +-----+-------+ | val | host | +-----+-------+ +| 1.0 | host1 | | 2.0 | host2 | | 3.0 | host3 | -| 1.0 | host1 | +-----+-------+ DROP TABLE test_ttl; diff --git a/tests/cases/standalone/common/ttl/database_ttl_with_metric_engine.sql b/tests/cases/standalone/common/ttl/database_ttl_with_metric_engine.sql index f4d37e7fba..d1deaf004a 100644 --- a/tests/cases/standalone/common/ttl/database_ttl_with_metric_engine.sql +++ b/tests/cases/standalone/common/ttl/database_ttl_with_metric_engine.sql @@ -13,7 +13,7 @@ INSERT INTO test_ttl(ts, val, host) VALUES (now(), 2, 'host2'), (now(), 3, 'host3'); -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; -- SQLNESS SLEEP 2s ADMIN flush_table('phy'); @@ -35,11 +35,11 @@ ADMIN flush_table('phy'); ADMIN compact_table('phy'); --- should not be expired -- -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; -- restart the db, ensure everything is ok -- SQLNESS ARG restart=true -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; DROP TABLE test_ttl; diff --git a/tests/cases/standalone/common/ttl/metric_engine_ttl.result b/tests/cases/standalone/common/ttl/metric_engine_ttl.result index badcb715f9..6152c0cd58 100644 --- a/tests/cases/standalone/common/ttl/metric_engine_ttl.result +++ b/tests/cases/standalone/common/ttl/metric_engine_ttl.result @@ -13,14 +13,14 @@ INSERT INTO test_ttl(ts, val, host) VALUES Affected Rows: 3 -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; +-----+-------+ | val | host | +-----+-------+ +| 1.0 | host1 | | 2.0 | host2 | | 3.0 | host3 | -| 1.0 | host1 | +-----+-------+ -- SQLNESS SLEEP 2s @@ -74,26 +74,26 @@ ADMIN compact_table('phy'); +----------------------------+ --- should not be expired -- -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; +-----+-------+ | val | host | +-----+-------+ +| 1.0 | host1 | | 2.0 | host2 | | 3.0 | host3 | -| 1.0 | host1 | +-----+-------+ -- restart the db, ensure everything is ok -- SQLNESS ARG restart=true -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; +-----+-------+ | val | host | +-----+-------+ +| 1.0 | host1 | | 2.0 | host2 | | 3.0 | host3 | -| 1.0 | host1 | +-----+-------+ DROP TABLE test_ttl; diff --git a/tests/cases/standalone/common/ttl/metric_engine_ttl.sql b/tests/cases/standalone/common/ttl/metric_engine_ttl.sql index a556bc1d9e..c5fce4e916 100644 --- a/tests/cases/standalone/common/ttl/metric_engine_ttl.sql +++ b/tests/cases/standalone/common/ttl/metric_engine_ttl.sql @@ -7,7 +7,7 @@ INSERT INTO test_ttl(ts, val, host) VALUES (now(), 2, 'host2'), (now(), 3, 'host3'); -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; -- SQLNESS SLEEP 2s ADMIN flush_table('phy'); @@ -29,11 +29,11 @@ ADMIN flush_table('phy'); ADMIN compact_table('phy'); --- should not be expired -- -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; -- restart the db, ensure everything is ok -- SQLNESS ARG restart=true -SELECT val, host FROM test_ttl; +SELECT val, host FROM test_ttl ORDER BY host; DROP TABLE test_ttl;