From fb2dd862d5b29f07b30511bce3e9919d0c789b8a Mon Sep 17 00:00:00 2001 From: Yao Noel Achi <43069141+ynachi@users.noreply.github.com> Date: Tue, 31 Mar 2026 03:51:09 +0200 Subject: [PATCH] fix: avoid cloning serialized view plans on resolve (#7882) fix: avoid cloning serialized view plan on resolve - Change `ViewInfoValue.view_info` from `Vec` to `common_base::bytes::Bytes` so resolving a view no longer clones the full serialized plan buffer on every decode. - To keep the change narrow, the metadata write boundary still accepts `Vec` and converts once when constructing/updating `ViewInfoValue`. The hot read path now uses a cheap clone of the stored bytes. - The benches introduced revealed up to 82% resolution time improvment. Signed-off-by: Yao ACHI --- src/catalog/src/table_source.rs | 7 +--- src/common/meta/src/key.rs | 8 ++-- src/common/meta/src/key/view_info.rs | 61 ++++++++++++++++++++++++---- 3 files changed, 59 insertions(+), 17 deletions(-) diff --git a/src/catalog/src/table_source.rs b/src/catalog/src/table_source.rs index f7ba51722f..fd78cc2573 100644 --- a/src/catalog/src/table_source.rs +++ b/src/catalog/src/table_source.rs @@ -15,7 +15,6 @@ use std::collections::HashMap; use std::sync::Arc; -use bytes::Bytes; use common_catalog::format_full_table_name; use common_query::logical_plan::{SubstraitPlanDecoderRef, rename_logical_plan_columns}; use datafusion::common::{ResolvedTableReference, TableReference}; @@ -151,11 +150,7 @@ impl DfTableSourceProvider { let catalog_list = Arc::new(DummyCatalogList::new(self.catalog_manager.clone())); let logical_plan = self .plan_decoder - .decode( - Bytes::from(view_info.view_info.clone()), - catalog_list, - false, - ) + .decode(view_info.view_info.clone().into(), catalog_list, false) .await .context(DecodePlanSnafu { name: &table.table_info().name, diff --git a/src/common/meta/src/key.rs b/src/common/meta/src/key.rs index 97b68f9b04..332c60f225 100644 --- a/src/common/meta/src/key.rs +++ b/src/common/meta/src/key.rs @@ -708,7 +708,7 @@ impl TableMetadataManager { // Creates view info let view_info_value = ViewInfoValue::new( - raw_logical_plan, + raw_logical_plan.into(), table_names, columns, plan_columns, @@ -1184,7 +1184,7 @@ impl TableMetadataManager { definition: String, ) -> Result<()> { let new_view_info_value = current_view_info_value.update( - new_view_info, + new_view_info.into(), table_names, columns, plan_columns, @@ -2752,7 +2752,7 @@ mod tests { let new_definition = "CREATE VIEW test AS SELECT * FROM b_table join c_table"; let current_view_info_value = DeserializedValueWithBytes::from_inner(ViewInfoValue::new( - logical_plan.clone(), + logical_plan.clone().into(), table_names, columns, plan_columns, @@ -2803,7 +2803,7 @@ mod tests { let wrong_definition = "wrong_definition"; let wrong_view_info_value = DeserializedValueWithBytes::from_inner(current_view_info_value.update( - wrong_view_info, + wrong_view_info.into(), new_table_names.clone(), new_columns.clone(), new_plan_columns.clone(), diff --git a/src/common/meta/src/key/view_info.rs b/src/common/meta/src/key/view_info.rs index 82be00b26d..5efc2c9bb7 100644 --- a/src/common/meta/src/key/view_info.rs +++ b/src/common/meta/src/key/view_info.rs @@ -16,6 +16,7 @@ use std::collections::{HashMap, HashSet}; use std::fmt::Display; use std::sync::Arc; +use common_base::bytes::Bytes; use serde::{Deserialize, Serialize}; use snafu::OptionExt; use table::metadata::TableId; @@ -31,9 +32,6 @@ use crate::kv_backend::KvBackendRef; use crate::kv_backend::txn::Txn; use crate::rpc::store::BatchGetRequest; -/// The VIEW logical plan encoded bytes -type RawViewLogicalPlan = Vec; - /// The key stores the metadata of the view. /// /// The layout: `__view_info/{view_id}`. @@ -86,7 +84,7 @@ impl MetadataKey<'_, ViewInfoKey> for ViewInfoKey { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct ViewInfoValue { // The encoded logical plan - pub view_info: RawViewLogicalPlan, + pub view_info: Bytes, // The resolved fully table names in logical plan pub table_names: HashSet, // The view columns @@ -100,7 +98,7 @@ pub struct ViewInfoValue { impl ViewInfoValue { pub fn new( - view_info: RawViewLogicalPlan, + view_info: Bytes, table_names: HashSet, columns: Vec, plan_columns: Vec, @@ -118,7 +116,7 @@ impl ViewInfoValue { pub(crate) fn update( &self, - new_view_info: RawViewLogicalPlan, + new_view_info: Bytes, table_names: HashSet, columns: Vec, plan_columns: Vec, @@ -305,7 +303,7 @@ mod tests { }; let value = ViewInfoValue { - view_info: vec![1, 2, 3], + view_info: Bytes::from([1, 2, 3].as_ref()), version: 1, table_names, columns: vec!["a".to_string()], @@ -316,4 +314,53 @@ mod tests { let deserialized = ViewInfoValue::try_from_raw_value(&serialized).unwrap(); assert_eq!(value, deserialized); } + + #[test] + fn test_deserialize_view_info_value_with_vec_u8() { + #[derive(Serialize)] + struct OldViewInfoValue { + view_info: Vec, + table_names: HashSet, + columns: Vec, + plan_columns: Vec, + definition: String, + version: u64, + } + + let table_names = { + let mut set = HashSet::new(); + set.insert(TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "a_table".to_string(), + }); + set.insert(TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "b_table".to_string(), + }); + set + }; + + let old_value = OldViewInfoValue { + view_info: vec![1, 2, 3], + table_names: table_names.clone(), + columns: vec!["a".to_string()], + plan_columns: vec!["number".to_string()], + definition: "CREATE VIEW test AS SELECT * FROM numbers".to_string(), + version: 1, + }; + + let serialized = serde_json::to_vec(&old_value).unwrap(); + let deserialized = ViewInfoValue::try_from_raw_value(&serialized).unwrap(); + + assert_eq!(deserialized.view_info, vec![1, 2, 3]); + assert_eq!(deserialized.table_names, table_names); + assert_eq!(deserialized.columns, vec!["a".to_string()]); + assert_eq!(deserialized.plan_columns, vec!["number".to_string()]); + assert_eq!( + deserialized.definition, + "CREATE VIEW test AS SELECT * FROM numbers" + ); + } }