fix: table resolving logic related to pg_catalog (#4580 )

* fix: table resolving logic related to pg_catalog refer to https://github.com/GreptimeTeam/greptimedb/issues/3560#issuecomment-2287794348 and #4543 * refactor: remove CatalogProtocol type * fix: sqlness * fix: forbid create database pg_catalog with mysql client * refactor: use QueryContext as arguments rather than Channel * refactor: pass None as default behaviour in information_schema * test: fix test
chore: update the document link in README.md (#4690 )
2025-12-27 08:29:59 +00:00 · 2024-09-09 00:47:59 +00:00 · 2024-09-07 15:27:32 +00:00 · 2024-09-07 04:28:11 +00:00 · 2024-09-06 08:36:49 +00:00 · 2024-09-06 08:29:20 +00:00
97 changed files with 3462 additions and 2486 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3156,6 +3156,7 @@ dependencies = [
 "arrow",
 "arrow-array",
 "arrow-schema",
+ "base64 0.21.7",
 "common-base",
 "common-decimal",
 "common-error",
@@ -3164,6 +3165,7 @@ dependencies = [
 "common-time",
 "datafusion-common",
 "enum_dispatch",
+ "greptime-proto",
 "num",
 "num-traits",
 "ordered-float 3.9.2",
@@ -4300,7 +4302,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 [[package]]
 name = "greptime-proto"
 version = "0.1.0"
-source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=c437b55725b7f5224fe9d46db21072b4a682ee4b#c437b55725b7f5224fe9d46db21072b4a682ee4b"
+source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=157cfdb52709e489cf1f3ce8e3042ed4ee8a524a#157cfdb52709e489cf1f3ce8e3042ed4ee8a524a"
 dependencies = [
 "prost 0.12.6",
 "serde",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -120,7 +120,7 @@ etcd-client = { version = "0.13" }
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "c437b55725b7f5224fe9d46db21072b4a682ee4b" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "157cfdb52709e489cf1f3ce8e3042ed4ee8a524a" }
 humantime = "2.1"
 humantime-serde = "1.1"
 itertools = "0.10"
--- a/README.md
+++ b/README.md
@@ -74,7 +74,7 @@ Our core developers have been building time-series data platforms for years. Bas

 * **Compatible with InfluxDB, Prometheus and more protocols**

-  Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/clients/overview).
+  Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/protocols/overview).

 ## Try GreptimeDB

--- a/src/api/src/region.rs
+++ b/src/api/src/region.rs
@@ -21,14 +21,14 @@ use greptime_proto::v1::region::RegionResponse as RegionResponseV1;
 #[derive(Debug)]
 pub struct RegionResponse {
    pub affected_rows: AffectedRows,
-    pub extension: HashMap<String, Vec<u8>>,
+    pub extensions: HashMap<String, Vec<u8>>,
 }

 impl RegionResponse {
    pub fn from_region_response(region_response: RegionResponseV1) -> Self {
        Self {
            affected_rows: region_response.affected_rows as _,
-            extension: region_response.extension,
+            extensions: region_response.extensions,
        }
    }

@@ -36,7 +36,7 @@ impl RegionResponse {
    pub fn new(affected_rows: AffectedRows) -> Self {
        Self {
            affected_rows,
-            extension: Default::default(),
+            extensions: Default::default(),
        }
    }
 }
--- a/src/catalog/src/kvbackend/manager.rs
+++ b/src/catalog/src/kvbackend/manager.rs
@@ -36,6 +36,7 @@ use futures_util::{StreamExt, TryStreamExt};
 use meta_client::client::MetaClient;
 use moka::sync::Cache;
 use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
+use session::context::{Channel, QueryContext};
 use snafu::prelude::*;
 use table::dist_table::DistTable;
 use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
@@ -152,7 +153,11 @@ impl CatalogManager for KvBackendCatalogManager {
        Ok(keys)
    }

-    async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
+    async fn schema_names(
+        &self,
+        catalog: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Result<Vec<String>> {
        let stream = self
            .table_metadata_manager
            .schema_manager()
@@ -163,12 +168,17 @@ impl CatalogManager for KvBackendCatalogManager {
            .map_err(BoxedError::new)
            .context(ListSchemasSnafu { catalog })?;

-        keys.extend(self.system_catalog.schema_names());
+        keys.extend(self.system_catalog.schema_names(query_ctx));

        Ok(keys.into_iter().collect())
    }

-    async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
+    async fn table_names(
+        &self,
+        catalog: &str,
+        schema: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Result<Vec<String>> {
        let stream = self
            .table_metadata_manager
            .table_name_manager()
@@ -181,7 +191,7 @@ impl CatalogManager for KvBackendCatalogManager {
            .into_iter()
            .map(|(k, _)| k)
            .collect::<Vec<_>>();
-        tables.extend_from_slice(&self.system_catalog.table_names(schema));
+        tables.extend_from_slice(&self.system_catalog.table_names(schema, query_ctx));

        Ok(tables.into_iter().collect())
    }
@@ -194,8 +204,13 @@ impl CatalogManager for KvBackendCatalogManager {
            .context(TableMetadataManagerSnafu)
    }

-    async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
-        if self.system_catalog.schema_exists(schema) {
+    async fn schema_exists(
+        &self,
+        catalog: &str,
+        schema: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Result<bool> {
+        if self.system_catalog.schema_exists(schema, query_ctx) {
            return Ok(true);
        }

@@ -206,8 +221,14 @@ impl CatalogManager for KvBackendCatalogManager {
            .context(TableMetadataManagerSnafu)
    }

-    async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
-        if self.system_catalog.table_exists(schema, table) {
+    async fn table_exists(
+        &self,
+        catalog: &str,
+        schema: &str,
+        table: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Result<bool> {
+        if self.system_catalog.table_exists(schema, table, query_ctx) {
            return Ok(true);
        }

@@ -225,10 +246,12 @@ impl CatalogManager for KvBackendCatalogManager {
        catalog_name: &str,
        schema_name: &str,
        table_name: &str,
+        query_ctx: Option<&QueryContext>,
    ) -> Result<Option<TableRef>> {
-        if let Some(table) = self
-            .system_catalog
-            .table(catalog_name, schema_name, table_name)
+        let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
+        if let Some(table) =
+            self.system_catalog
+                .table(catalog_name, schema_name, table_name, query_ctx)
        {
            return Ok(Some(table));
        }
@@ -236,23 +259,45 @@ impl CatalogManager for KvBackendCatalogManager {
        let table_cache: TableCacheRef = self.cache_registry.get().context(CacheNotFoundSnafu {
            name: "table_cache",
        })?;
-
-        table_cache
+        if let Some(table) = table_cache
            .get_by_ref(&TableName {
                catalog_name: catalog_name.to_string(),
                schema_name: schema_name.to_string(),
                table_name: table_name.to_string(),
            })
            .await
-            .context(GetTableCacheSnafu)
+            .context(GetTableCacheSnafu)?
+        {
+            return Ok(Some(table));
+        }
+
+        if channel == Channel::Postgres {
+            // falldown to pg_catalog
+            if let Some(table) =
+                self.system_catalog
+                    .table(catalog_name, PG_CATALOG_NAME, table_name, query_ctx)
+            {
+                return Ok(Some(table));
+            }
+        }
+
+        return Ok(None);
    }

-    fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result<TableRef>> {
+    fn tables<'a>(
+        &'a self,
+        catalog: &'a str,
+        schema: &'a str,
+        query_ctx: Option<&'a QueryContext>,
+    ) -> BoxStream<'a, Result<TableRef>> {
        let sys_tables = try_stream!({
            // System tables
-            let sys_table_names = self.system_catalog.table_names(schema);
+            let sys_table_names = self.system_catalog.table_names(schema, query_ctx);
            for table_name in sys_table_names {
-                if let Some(table) = self.system_catalog.table(catalog, schema, &table_name) {
+                if let Some(table) =
+                    self.system_catalog
+                        .table(catalog, schema, &table_name, query_ctx)
+                {
                    yield table;
                }
            }
@@ -320,18 +365,27 @@ struct SystemCatalog {
 }

 impl SystemCatalog {
-    // TODO(j0hn50n133): remove the duplicated hard-coded table names logic
-    fn schema_names(&self) -> Vec<String> {
-        vec![
-            INFORMATION_SCHEMA_NAME.to_string(),
-            PG_CATALOG_NAME.to_string(),
-        ]
+    fn schema_names(&self, query_ctx: Option<&QueryContext>) -> Vec<String> {
+        let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
+        match channel {
+            // pg_catalog only visible under postgres protocol
+            Channel::Postgres => vec![
+                INFORMATION_SCHEMA_NAME.to_string(),
+                PG_CATALOG_NAME.to_string(),
+            ],
+            _ => {
+                vec![INFORMATION_SCHEMA_NAME.to_string()]
+            }
+        }
    }

-    fn table_names(&self, schema: &str) -> Vec<String> {
+    fn table_names(&self, schema: &str, query_ctx: Option<&QueryContext>) -> Vec<String> {
+        let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
        match schema {
            INFORMATION_SCHEMA_NAME => self.information_schema_provider.table_names(),
-            PG_CATALOG_NAME => self.pg_catalog_provider.table_names(),
+            PG_CATALOG_NAME if channel == Channel::Postgres => {
+                self.pg_catalog_provider.table_names()
+            }
            DEFAULT_SCHEMA_NAME => {
                vec![NUMBERS_TABLE_NAME.to_string()]
            }
@@ -339,23 +393,35 @@ impl SystemCatalog {
        }
    }

-    fn schema_exists(&self, schema: &str) -> bool {
-        schema == INFORMATION_SCHEMA_NAME || schema == PG_CATALOG_NAME
+    fn schema_exists(&self, schema: &str, query_ctx: Option<&QueryContext>) -> bool {
+        let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
+        match channel {
+            Channel::Postgres => schema == PG_CATALOG_NAME || schema == INFORMATION_SCHEMA_NAME,
+            _ => schema == INFORMATION_SCHEMA_NAME,
+        }
    }

-    fn table_exists(&self, schema: &str, table: &str) -> bool {
+    fn table_exists(&self, schema: &str, table: &str, query_ctx: Option<&QueryContext>) -> bool {
+        let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
        if schema == INFORMATION_SCHEMA_NAME {
            self.information_schema_provider.table(table).is_some()
        } else if schema == DEFAULT_SCHEMA_NAME {
            table == NUMBERS_TABLE_NAME
-        } else if schema == PG_CATALOG_NAME {
+        } else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
            self.pg_catalog_provider.table(table).is_some()
        } else {
            false
        }
    }

-    fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Option<TableRef> {
+    fn table(
+        &self,
+        catalog: &str,
+        schema: &str,
+        table_name: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Option<TableRef> {
+        let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
        if schema == INFORMATION_SCHEMA_NAME {
            let information_schema_provider =
                self.catalog_cache.get_with_by_ref(catalog, move || {
@@ -366,7 +432,7 @@ impl SystemCatalog {
                    ))
                });
            information_schema_provider.table(table_name)
-        } else if schema == PG_CATALOG_NAME {
+        } else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
            if catalog == DEFAULT_CATALOG_NAME {
                self.pg_catalog_provider.table(table_name)
            } else {
--- a/src/catalog/src/lib.rs
+++ b/src/catalog/src/lib.rs
@@ -20,8 +20,10 @@ use std::fmt::{Debug, Formatter};
 use std::sync::Arc;

 use api::v1::CreateTableExpr;
+use common_catalog::consts::{INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME};
 use futures::future::BoxFuture;
 use futures_util::stream::BoxStream;
+use session::context::QueryContext;
 use table::metadata::TableId;
 use table::TableRef;

@@ -44,15 +46,35 @@ pub trait CatalogManager: Send + Sync {

    async fn catalog_names(&self) -> Result<Vec<String>>;

-    async fn schema_names(&self, catalog: &str) -> Result<Vec<String>>;
+    async fn schema_names(
+        &self,
+        catalog: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Result<Vec<String>>;

-    async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>>;
+    async fn table_names(
+        &self,
+        catalog: &str,
+        schema: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Result<Vec<String>>;

    async fn catalog_exists(&self, catalog: &str) -> Result<bool>;

-    async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool>;
+    async fn schema_exists(
+        &self,
+        catalog: &str,
+        schema: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Result<bool>;

-    async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool>;
+    async fn table_exists(
+        &self,
+        catalog: &str,
+        schema: &str,
+        table: &str,
+        query_ctx: Option<&QueryContext>,
+    ) -> Result<bool>;

    /// Returns the table by catalog, schema and table name.
    async fn table(
@@ -60,10 +82,25 @@ pub trait CatalogManager: Send + Sync {
        catalog: &str,
        schema: &str,
        table_name: &str,
+        query_ctx: Option<&QueryContext>,
    ) -> Result<Option<TableRef>>;

    /// Returns all tables with a stream by catalog and schema.
-    fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result<TableRef>>;
+    fn tables<'a>(
+        &'a self,
+        catalog: &'a str,
+        schema: &'a str,
+        query_ctx: Option<&'a QueryContext>,
+    ) -> BoxStream<'a, Result<TableRef>>;
+
+    /// Check if `schema` is a reserved schema name
+    fn is_reserved_schema_name(&self, schema: &str) -> bool {
+        // We have to check whether a schema name is reserved before create schema.
+        // We need this rather than use schema_exists directly because `pg_catalog` is
+        // only visible via postgres protocol. So if we don't check, a mysql client may
+        // create a schema named `pg_catalog` which is somehow malformed.
+        schema == INFORMATION_SCHEMA_NAME || schema == PG_CATALOG_NAME
+    }
 }

 pub type CatalogManagerRef = Arc<dyn CatalogManager>;
--- a/src/catalog/src/memory/manager.rs
+++ b/src/catalog/src/memory/manager.rs
@@ -26,6 +26,7 @@ use common_catalog::consts::{
 use common_meta::key::flow::FlowMetadataManager;
 use common_meta::kv_backend::memory::MemoryKvBackend;
 use futures_util::stream::BoxStream;
+use session::context::QueryContext;
 use snafu::OptionExt;
 use table::TableRef;

@@ -53,7 +54,11 @@ impl CatalogManager for MemoryCatalogManager {
        Ok(self.catalogs.read().unwrap().keys().cloned().collect())
    }

-    async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
+    async fn schema_names(
+        &self,
+        catalog: &str,
+        _query_ctx: Option<&QueryContext>,
+    ) -> Result<Vec<String>> {
        Ok(self
            .catalogs
            .read()
@@ -67,7 +72,12 @@ impl CatalogManager for MemoryCatalogManager {
            .collect())
    }

-    async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
+    async fn table_names(
+        &self,
+        catalog: &str,
+        schema: &str,
+        _query_ctx: Option<&QueryContext>,
+    ) -> Result<Vec<String>> {
        Ok(self
            .catalogs
            .read()
@@ -87,11 +97,22 @@ impl CatalogManager for MemoryCatalogManager {
        self.catalog_exist_sync(catalog)
    }

-    async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
+    async fn schema_exists(
+        &self,
+        catalog: &str,
+        schema: &str,
+        _query_ctx: Option<&QueryContext>,
+    ) -> Result<bool> {
        self.schema_exist_sync(catalog, schema)
    }

-    async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
+    async fn table_exists(
+        &self,
+        catalog: &str,
+        schema: &str,
+        table: &str,
+        _query_ctx: Option<&QueryContext>,
+    ) -> Result<bool> {
        let catalogs = self.catalogs.read().unwrap();
        Ok(catalogs
            .get(catalog)
@@ -108,6 +129,7 @@ impl CatalogManager for MemoryCatalogManager {
        catalog: &str,
        schema: &str,
        table_name: &str,
+        _query_ctx: Option<&QueryContext>,
    ) -> Result<Option<TableRef>> {
        let result = try {
            self.catalogs
@@ -121,7 +143,12 @@ impl CatalogManager for MemoryCatalogManager {
        Ok(result)
    }

-    fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result<TableRef>> {
+    fn tables<'a>(
+        &'a self,
+        catalog: &'a str,
+        schema: &'a str,
+        _query_ctx: Option<&QueryContext>,
+    ) -> BoxStream<'a, Result<TableRef>> {
        let catalogs = self.catalogs.read().unwrap();

        let Some(schemas) = catalogs.get(catalog) else {
@@ -371,11 +398,12 @@ mod tests {
                DEFAULT_CATALOG_NAME,
                DEFAULT_SCHEMA_NAME,
                NUMBERS_TABLE_NAME,
+                None,
            )
            .await
            .unwrap()
            .unwrap();
-        let stream = catalog_list.tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
+        let stream = catalog_list.tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, None);
        let tables = stream.try_collect::<Vec<_>>().await.unwrap();
        assert_eq!(tables.len(), 1);
        assert_eq!(
@@ -384,7 +412,12 @@ mod tests {
        );

        assert!(catalog_list
-            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists")
+            .table(
+                DEFAULT_CATALOG_NAME,
+                DEFAULT_SCHEMA_NAME,
+                "not_exists",
+                None
+            )
            .await
            .unwrap()
            .is_none());
@@ -411,7 +444,7 @@ mod tests {
        };
        catalog.register_table_sync(register_table_req).unwrap();
        assert!(catalog
-            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
+            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name, None)
            .await
            .unwrap()
            .is_some());
@@ -423,7 +456,7 @@ mod tests {
        };
        catalog.deregister_table_sync(deregister_table_req).unwrap();
        assert!(catalog
-            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
+            .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name, None)
            .await
            .unwrap()
            .is_none());
--- a/src/catalog/src/system_schema/information_schema/columns.rs
+++ b/src/catalog/src/system_schema/information_schema/columns.rs
@@ -257,8 +257,8 @@ impl InformationSchemaColumnsBuilder {
            .context(UpgradeWeakCatalogManagerRefSnafu)?;
        let predicates = Predicates::from_scan_request(&request);

-        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
-            let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
+        for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);

            while let Some(table) = stream.try_next().await? {
                let keys = &table.table_info().meta.primary_key_indices;
--- a/src/catalog/src/system_schema/information_schema/key_column_usage.rs
+++ b/src/catalog/src/system_schema/information_schema/key_column_usage.rs
@@ -212,8 +212,8 @@ impl InformationSchemaKeyColumnUsageBuilder {
            .context(UpgradeWeakCatalogManagerRefSnafu)?;
        let predicates = Predicates::from_scan_request(&request);

-        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
-            let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
+        for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);

            while let Some(table) = stream.try_next().await? {
                let mut primary_constraints = vec![];
--- a/src/catalog/src/system_schema/information_schema/partitions.rs
+++ b/src/catalog/src/system_schema/information_schema/partitions.rs
@@ -240,9 +240,9 @@ impl InformationSchemaPartitionsBuilder {

        let predicates = Predicates::from_scan_request(&request);

-        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+        for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
            let table_info_stream = catalog_manager
-                .tables(&catalog_name, &schema_name)
+                .tables(&catalog_name, &schema_name, None)
                .try_filter_map(|t| async move {
                    let table_info = t.table_info();
                    if table_info.table_type == TableType::Temporary {
--- a/src/catalog/src/system_schema/information_schema/region_peers.rs
+++ b/src/catalog/src/system_schema/information_schema/region_peers.rs
@@ -176,9 +176,9 @@ impl InformationSchemaRegionPeersBuilder {

        let predicates = Predicates::from_scan_request(&request);

-        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+        for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
            let table_id_stream = catalog_manager
-                .tables(&catalog_name, &schema_name)
+                .tables(&catalog_name, &schema_name, None)
                .try_filter_map(|t| async move {
                    let table_info = t.table_info();
                    if table_info.table_type == TableType::Temporary {
--- a/src/catalog/src/system_schema/information_schema/schemata.rs
+++ b/src/catalog/src/system_schema/information_schema/schemata.rs
@@ -171,7 +171,7 @@ impl InformationSchemaSchemataBuilder {
        let table_metadata_manager = utils::table_meta_manager(&self.catalog_manager)?;
        let predicates = Predicates::from_scan_request(&request);

-        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+        for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
            let opts = if let Some(table_metadata_manager) = &table_metadata_manager {
                table_metadata_manager
                    .schema_manager()
--- a/src/catalog/src/system_schema/information_schema/table_constraints.rs
+++ b/src/catalog/src/system_schema/information_schema/table_constraints.rs
@@ -176,8 +176,8 @@ impl InformationSchemaTableConstraintsBuilder {
            .context(UpgradeWeakCatalogManagerRefSnafu)?;
        let predicates = Predicates::from_scan_request(&request);

-        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
-            let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
+        for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);

            while let Some(table) = stream.try_next().await? {
                let keys = &table.table_info().meta.primary_key_indices;
--- a/src/catalog/src/system_schema/information_schema/tables.rs
+++ b/src/catalog/src/system_schema/information_schema/tables.rs
@@ -234,8 +234,8 @@ impl InformationSchemaTablesBuilder {
            .context(UpgradeWeakCatalogManagerRefSnafu)?;
        let predicates = Predicates::from_scan_request(&request);

-        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
-            let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
+        for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);

            while let Some(table) = stream.try_next().await? {
                let table_info = table.table_info();
--- a/src/catalog/src/system_schema/information_schema/views.rs
+++ b/src/catalog/src/system_schema/information_schema/views.rs
@@ -192,8 +192,8 @@ impl InformationSchemaViewsBuilder {
            .context(CastManagerSnafu)?
            .view_info_cache()?;

-        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
-            let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
+        for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);

            while let Some(table) = stream.try_next().await? {
                let table_info = table.table_info();
--- a/src/catalog/src/system_schema/pg_catalog.rs
+++ b/src/catalog/src/system_schema/pg_catalog.rs
@@ -18,15 +18,16 @@ mod pg_namespace;
 mod table_names;

 use std::collections::HashMap;
-use std::sync::{Arc, Weak};
+use std::sync::{Arc, LazyLock, Weak};

-use common_catalog::consts::{self, PG_CATALOG_NAME};
+use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, PG_CATALOG_NAME};
 use datatypes::schema::ColumnSchema;
 use lazy_static::lazy_static;
 use paste::paste;
 use pg_catalog_memory_table::get_schema_columns;
 use pg_class::PGClass;
 use pg_namespace::PGNamespace;
+use session::context::{Channel, QueryContext};
 use table::TableRef;
 pub use table_names::*;

@@ -142,3 +143,12 @@ impl SystemSchemaProviderInner for PGCatalogProvider {
        &self.catalog_name
    }
 }
+
+/// Provide query context to call the [`CatalogManager`]'s method.
+static PG_QUERY_CTX: LazyLock<QueryContext> = LazyLock::new(|| {
+    QueryContext::with_channel(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, Channel::Postgres)
+});
+
+fn query_ctx() -> Option<&'static QueryContext> {
+    Some(&PG_QUERY_CTX)
+}
--- a/src/catalog/src/system_schema/pg_catalog/pg_class.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_class.rs
@@ -32,7 +32,7 @@ use store_api::storage::ScanRequest;
 use table::metadata::TableType;

 use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
-use super::{OID_COLUMN_NAME, PG_CLASS};
+use super::{query_ctx, OID_COLUMN_NAME, PG_CLASS};
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
@@ -202,8 +202,11 @@ impl PGClassBuilder {
            .upgrade()
            .context(UpgradeWeakCatalogManagerRefSnafu)?;
        let predicates = Predicates::from_scan_request(&request);
-        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
-            let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
+        for schema_name in catalog_manager
+            .schema_names(&catalog_name, query_ctx())
+            .await?
+        {
+            let mut stream = catalog_manager.tables(&catalog_name, &schema_name, query_ctx());
            while let Some(table) = stream.try_next().await? {
                let table_info = table.table_info();
                self.add_class(
--- a/src/catalog/src/system_schema/pg_catalog/pg_namespace.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_namespace.rs
@@ -31,7 +31,7 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
 use snafu::{OptionExt, ResultExt};
 use store_api::storage::ScanRequest;

-use super::{PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
+use super::{query_ctx, PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
@@ -180,7 +180,10 @@ impl PGNamespaceBuilder {
            .upgrade()
            .context(UpgradeWeakCatalogManagerRefSnafu)?;
        let predicates = Predicates::from_scan_request(&request);
-        for schema_name in catalog_manager.schema_names(&catalog_name).await? {
+        for schema_name in catalog_manager
+            .schema_names(&catalog_name, query_ctx())
+            .await?
+        {
            self.add_namespace(&predicates, &schema_name);
        }
        self.finish()
--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -23,7 +23,7 @@ use datafusion::datasource::view::ViewTable;
 use datafusion::datasource::{provider_as_source, TableProvider};
 use datafusion::logical_expr::TableSource;
 use itertools::Itertools;
-use session::context::QueryContext;
+use session::context::QueryContextRef;
 use snafu::{ensure, OptionExt, ResultExt};
 use table::metadata::TableType;
 use table::table::adapter::DfTableProviderAdapter;
@@ -45,6 +45,7 @@ pub struct DfTableSourceProvider {
    disallow_cross_catalog_query: bool,
    default_catalog: String,
    default_schema: String,
+    query_ctx: QueryContextRef,
    plan_decoder: SubstraitPlanDecoderRef,
    enable_ident_normalization: bool,
 }
@@ -53,7 +54,7 @@ impl DfTableSourceProvider {
    pub fn new(
        catalog_manager: CatalogManagerRef,
        disallow_cross_catalog_query: bool,
-        query_ctx: &QueryContext,
+        query_ctx: QueryContextRef,
        plan_decoder: SubstraitPlanDecoderRef,
        enable_ident_normalization: bool,
    ) -> Self {
@@ -63,6 +64,7 @@ impl DfTableSourceProvider {
            resolved_tables: HashMap::new(),
            default_catalog: query_ctx.current_catalog().to_owned(),
            default_schema: query_ctx.current_schema(),
+            query_ctx,
            plan_decoder,
            enable_ident_normalization,
        }
@@ -71,8 +73,7 @@ impl DfTableSourceProvider {
    pub fn resolve_table_ref(&self, table_ref: TableReference) -> Result<ResolvedTableReference> {
        if self.disallow_cross_catalog_query {
            match &table_ref {
-                TableReference::Bare { .. } => (),
-                TableReference::Partial { .. } => {}
+                TableReference::Bare { .. } | TableReference::Partial { .. } => {}
                TableReference::Full {
                    catalog, schema, ..
                } => {
@@ -107,7 +108,7 @@ impl DfTableSourceProvider {

        let table = self
            .catalog_manager
-            .table(catalog_name, schema_name, table_name)
+            .table(catalog_name, schema_name, table_name, Some(&self.query_ctx))
            .await?
            .with_context(|| TableNotExistSnafu {
                table: format_full_table_name(catalog_name, schema_name, table_name),
@@ -210,12 +211,12 @@ mod tests {

    #[test]
    fn test_validate_table_ref() {
-        let query_ctx = &QueryContext::with("greptime", "public");
+        let query_ctx = Arc::new(QueryContext::with("greptime", "public"));

        let table_provider = DfTableSourceProvider::new(
            MemoryCatalogManager::with_default_setup(),
            true,
-            query_ctx,
+            query_ctx.clone(),
            DummyDecoder::arc(),
            true,
        );
@@ -308,7 +309,7 @@ mod tests {

    #[tokio::test]
    async fn test_resolve_view() {
-        let query_ctx = &QueryContext::with("greptime", "public");
+        let query_ctx = Arc::new(QueryContext::with("greptime", "public"));
        let backend = Arc::new(MemoryKvBackend::default());
        let layered_cache_builder = LayeredCacheRegistryBuilder::default()
            .add_cache_registry(CacheRegistryBuilder::default().build());
@@ -344,8 +345,13 @@ mod tests {
            .await
            .unwrap();

-        let mut table_provider =
-            DfTableSourceProvider::new(catalog_manager, true, query_ctx, MockDecoder::arc(), true);
+        let mut table_provider = DfTableSourceProvider::new(
+            catalog_manager,
+            true,
+            query_ctx.clone(),
+            MockDecoder::arc(),
+            true,
+        );

        // View not found
        let table_ref = TableReference::bare("not_exists_view");
--- a/src/catalog/src/table_source/dummy_catalog.rs
+++ b/src/catalog/src/table_source/dummy_catalog.rs
@@ -112,7 +112,7 @@ impl SchemaProvider for DummySchemaProvider {
    async fn table(&self, name: &str) -> datafusion::error::Result<Option<Arc<dyn TableProvider>>> {
        let table = self
            .catalog_manager
-            .table(&self.catalog_name, &self.schema_name, name)
+            .table(&self.catalog_name, &self.schema_name, name, None)
            .await?
            .with_context(|| TableNotExistSnafu {
                table: format_full_table_name(&self.catalog_name, &self.schema_name, name),
--- a/src/common/meta/src/ddl/alter_logical_tables.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables.rs
@@ -131,7 +131,7 @@ impl AlterLogicalTablesProcedure {
        let phy_raw_schemas = future::join_all(alter_region_tasks)
            .await
            .into_iter()
-            .map(|res| res.map(|mut res| res.extension.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
+            .map(|res| res.map(|mut res| res.extensions.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
            .collect::<Result<Vec<_>>>()?;

        if phy_raw_schemas.is_empty() {
--- a/src/common/meta/src/ddl/create_logical_tables.rs
+++ b/src/common/meta/src/ddl/create_logical_tables.rs
@@ -157,7 +157,7 @@ impl CreateLogicalTablesProcedure {
        let phy_raw_schemas = join_all(create_region_tasks)
            .await
            .into_iter()
-            .map(|res| res.map(|mut res| res.extension.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
+            .map(|res| res.map(|mut res| res.extensions.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
            .collect::<Result<Vec<_>>>()?;

        if phy_raw_schemas.is_empty() {
--- a/src/datanode/src/heartbeat.rs
+++ b/src/datanode/src/heartbeat.rs
@@ -324,10 +324,12 @@ impl HeartbeatTask {
                region_id: stat.region_id.as_u64(),
                engine: stat.engine,
                role: RegionRole::from(stat.role).into(),
-                // TODO(jeremy): w/rcus
+                // TODO(weny): w/rcus
                rcus: 0,
                wcus: 0,
                approximate_bytes: region_server.region_disk_usage(stat.region_id).unwrap_or(0),
+                // TODO(weny): add extensions
+                extensions: Default::default(),
            })
            .collect()
    }
--- a/src/datanode/src/region_server.rs
+++ b/src/datanode/src/region_server.rs
@@ -366,10 +366,10 @@ impl RegionServerHandler for RegionServer {

        // merge results by sum up affected rows and merge extensions.
        let mut affected_rows = 0;
-        let mut extension = HashMap::new();
+        let mut extensions = HashMap::new();
        for result in results {
            affected_rows += result.affected_rows;
-            extension.extend(result.extension);
+            extensions.extend(result.extensions);
        }

        Ok(RegionResponseV1 {
@@ -380,7 +380,7 @@ impl RegionServerHandler for RegionServer {
                }),
            }),
            affected_rows: affected_rows as _,
-            extension,
+            extensions,
        })
    }
 }
@@ -708,7 +708,7 @@ impl RegionServerInner {
                    .await?;
                Ok(RegionResponse {
                    affected_rows: result.affected_rows,
-                    extension: result.extension,
+                    extensions: result.extensions,
                })
            }
            Err(err) => {
--- a/src/datatypes/Cargo.toml
+++ b/src/datatypes/Cargo.toml
@@ -15,6 +15,7 @@ workspace = true
 arrow.workspace = true
 arrow-array.workspace = true
 arrow-schema.workspace = true
+base64.workspace = true
 common-base.workspace = true
 common-decimal.workspace = true
 common-error.workspace = true
@@ -23,6 +24,7 @@ common-telemetry.workspace = true
 common-time.workspace = true
 datafusion-common.workspace = true
 enum_dispatch = "0.3"
+greptime-proto.workspace = true
 num = "0.4"
 num-traits = "0.2"
 ordered-float = { version = "3.0", features = ["serde"] }
--- a/src/datatypes/src/value.rs
+++ b/src/datatypes/src/value.rs
@@ -18,6 +18,8 @@ use std::sync::Arc;

 use arrow::datatypes::{DataType as ArrowDataType, Field};
 use arrow_array::{Array, ListArray};
+use base64::engine::general_purpose::URL_SAFE;
+use base64::Engine as _;
 use common_base::bytes::{Bytes, StringBytes};
 use common_decimal::Decimal128;
 use common_telemetry::error;
@@ -28,8 +30,10 @@ use common_time::time::Time;
 use common_time::timestamp::{TimeUnit, Timestamp};
 use common_time::{Duration, Interval, Timezone};
 use datafusion_common::ScalarValue;
+use greptime_proto::v1::value::ValueData;
 pub use ordered_float::OrderedFloat;
 use serde::{Deserialize, Serialize, Serializer};
+use serde_json::{Number, Value as JsonValue};
 use snafu::{ensure, ResultExt};

 use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Error, Result, TryFromValueSnafu};
@@ -1364,15 +1368,179 @@ impl<'a> ValueRef<'a> {
    }
 }

+pub fn column_data_to_json(data: ValueData) -> JsonValue {
+    match data {
+        ValueData::BinaryValue(b) => JsonValue::String(URL_SAFE.encode(b)),
+        ValueData::BoolValue(b) => JsonValue::Bool(b),
+        ValueData::U8Value(i) => JsonValue::Number(i.into()),
+        ValueData::U16Value(i) => JsonValue::Number(i.into()),
+        ValueData::U32Value(i) => JsonValue::Number(i.into()),
+        ValueData::U64Value(i) => JsonValue::Number(i.into()),
+        ValueData::I8Value(i) => JsonValue::Number(i.into()),
+        ValueData::I16Value(i) => JsonValue::Number(i.into()),
+        ValueData::I32Value(i) => JsonValue::Number(i.into()),
+        ValueData::I64Value(i) => JsonValue::Number(i.into()),
+        ValueData::F32Value(f) => Number::from_f64(f as f64)
+            .map(JsonValue::Number)
+            .unwrap_or(JsonValue::Null),
+        ValueData::F64Value(f) => Number::from_f64(f)
+            .map(JsonValue::Number)
+            .unwrap_or(JsonValue::Null),
+        ValueData::StringValue(s) => JsonValue::String(s),
+        ValueData::DateValue(d) => JsonValue::String(Date::from(d).to_string()),
+        ValueData::DatetimeValue(d) => JsonValue::String(DateTime::from(d).to_string()),
+        ValueData::TimeSecondValue(d) => JsonValue::String(Time::new_second(d).to_iso8601_string()),
+        ValueData::TimeMillisecondValue(d) => {
+            JsonValue::String(Time::new_millisecond(d).to_iso8601_string())
+        }
+        ValueData::TimeMicrosecondValue(d) => {
+            JsonValue::String(Time::new_microsecond(d).to_iso8601_string())
+        }
+        ValueData::TimeNanosecondValue(d) => {
+            JsonValue::String(Time::new_nanosecond(d).to_iso8601_string())
+        }
+        ValueData::TimestampMicrosecondValue(d) => {
+            JsonValue::String(Timestamp::new_microsecond(d).to_iso8601_string())
+        }
+        ValueData::TimestampMillisecondValue(d) => {
+            JsonValue::String(Timestamp::new_millisecond(d).to_iso8601_string())
+        }
+        ValueData::TimestampNanosecondValue(d) => {
+            JsonValue::String(Timestamp::new_nanosecond(d).to_iso8601_string())
+        }
+        ValueData::TimestampSecondValue(d) => {
+            JsonValue::String(Timestamp::new_second(d).to_iso8601_string())
+        }
+        ValueData::IntervalYearMonthValue(d) => JsonValue::String(format!("interval year [{}]", d)),
+        ValueData::IntervalMonthDayNanoValue(d) => JsonValue::String(format!(
+            "interval month [{}][{}][{}]",
+            d.months, d.days, d.nanoseconds
+        )),
+        ValueData::IntervalDayTimeValue(d) => JsonValue::String(format!("interval day [{}]", d)),
+        ValueData::Decimal128Value(d) => {
+            JsonValue::String(format!("decimal128 [{}][{}]", d.hi, d.lo))
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use arrow::datatypes::DataType as ArrowDataType;
    use common_time::timezone::set_default_timezone;
+    use greptime_proto::v1::{Decimal128 as ProtoDecimal128, IntervalMonthDayNano};
    use num_traits::Float;

    use super::*;
    use crate::vectors::ListVectorBuilder;

+    #[test]
+    fn test_column_data_to_json() {
+        assert_eq!(
+            column_data_to_json(ValueData::BinaryValue(b"hello".to_vec())),
+            JsonValue::String("aGVsbG8=".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::BoolValue(true)),
+            JsonValue::Bool(true)
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::U8Value(1)),
+            JsonValue::Number(1.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::U16Value(2)),
+            JsonValue::Number(2.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::U32Value(3)),
+            JsonValue::Number(3.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::U64Value(4)),
+            JsonValue::Number(4.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::I8Value(5)),
+            JsonValue::Number(5.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::I16Value(6)),
+            JsonValue::Number(6.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::I32Value(7)),
+            JsonValue::Number(7.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::I64Value(8)),
+            JsonValue::Number(8.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::F32Value(9.0)),
+            JsonValue::Number(Number::from_f64(9.0_f64).unwrap())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::F64Value(10.0)),
+            JsonValue::Number(Number::from_f64(10.0_f64).unwrap())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::StringValue("hello".to_string())),
+            JsonValue::String("hello".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::DateValue(123)),
+            JsonValue::String("1970-05-04".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::DatetimeValue(456)),
+            JsonValue::String("1970-01-01 00:00:00.456+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimeSecondValue(789)),
+            JsonValue::String("00:13:09+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimeMillisecondValue(789)),
+            JsonValue::String("00:00:00.789+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimeMicrosecondValue(789)),
+            JsonValue::String("00:00:00.000789+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimestampMillisecondValue(1234567890)),
+            JsonValue::String("1970-01-15 06:56:07.890+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimestampNanosecondValue(1234567890123456789)),
+            JsonValue::String("2009-02-13 23:31:30.123456789+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimestampSecondValue(1234567890)),
+            JsonValue::String("2009-02-13 23:31:30+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::IntervalYearMonthValue(12)),
+            JsonValue::String("interval year [12]".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::IntervalMonthDayNanoValue(IntervalMonthDayNano {
+                months: 1,
+                days: 2,
+                nanoseconds: 3,
+            })),
+            JsonValue::String("interval month [1][2][3]".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::IntervalDayTimeValue(4)),
+            JsonValue::String("interval day [4]".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::Decimal128Value(ProtoDecimal128 { hi: 5, lo: 6 })),
+            JsonValue::String("decimal128 [5][6]".to_string())
+        );
+    }
+
    #[test]
    fn test_try_from_scalar_value() {
        assert_eq!(
--- a/src/frontend/src/instance.rs
+++ b/src/frontend/src/instance.rs
@@ -356,9 +356,10 @@ impl SqlQueryHandler for Instance {

    async fn is_valid_schema(&self, catalog: &str, schema: &str) -> Result<bool> {
        self.catalog_manager
-            .schema_exists(catalog, schema)
+            .schema_exists(catalog, schema, None)
            .await
            .context(error::CatalogSnafu)
+            .map(|b| b && !self.catalog_manager.is_reserved_schema_name(schema))
    }
 }

--- a/src/frontend/src/instance/prom_store.rs
+++ b/src/frontend/src/instance/prom_store.rs
@@ -102,7 +102,7 @@ impl Instance {
    ) -> Result<Output> {
        let table = self
            .catalog_manager
-            .table(catalog_name, schema_name, table_name)
+            .table(catalog_name, schema_name, table_name, Some(ctx))
            .await
            .context(CatalogSnafu)?
            .with_context(|| TableNotFoundSnafu {
--- a/src/frontend/src/script.rs
+++ b/src/frontend/src/script.rs
@@ -152,7 +152,12 @@ mod python {

            if let Some(table) = self
                .catalog_manager
-                .table(&expr.catalog_name, &expr.schema_name, &expr.table_name)
+                .table(
+                    &expr.catalog_name,
+                    &expr.schema_name,
+                    &expr.table_name,
+                    None,
+                )
                .await
                .context(CatalogSnafu)?
            {
@@ -185,6 +190,7 @@ mod python {
                    &table_name.catalog_name,
                    &table_name.schema_name,
                    &table_name.table_name,
+                    None,
                )
                .await
                .context(CatalogSnafu)?
--- a/src/meta-srv/src/handler/failure_handler.rs
+++ b/src/meta-srv/src/handler/failure_handler.rs
@@ -93,6 +93,7 @@ mod tests {
                approximate_bytes: 0,
                engine: default_engine().to_string(),
                role: RegionRole::Follower,
+                extensions: Default::default(),
            }
        }
        acc.stat = Some(Stat {
--- a/src/meta-srv/src/handler/node_stat.rs
+++ b/src/meta-srv/src/handler/node_stat.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};

 use api::v1::meta::HeartbeatRequest;
 use common_meta::ClusterId;
@@ -57,6 +57,8 @@ pub struct RegionStat {
    pub engine: String,
    /// The region role.
    pub role: RegionRole,
+    /// The extension info of this region
+    pub extensions: HashMap<String, Vec<u8>>,
 }

 impl Stat {
@@ -142,6 +144,7 @@ impl TryFrom<api::v1::meta::RegionStat> for RegionStat {
            approximate_bytes: value.approximate_bytes,
            engine: value.engine.to_string(),
            role: RegionRole::from(value.role()),
+            extensions: value.extensions,
        })
    }
 }
--- a/src/meta-srv/src/handler/region_lease_handler.rs
+++ b/src/meta-srv/src/handler/region_lease_handler.rs
@@ -135,6 +135,7 @@ mod test {
            wcus: 0,
            approximate_bytes: 0,
            engine: String::new(),
+            extensions: Default::default(),
        }
    }

--- a/src/meta-srv/src/procedure/utils.rs
+++ b/src/meta-srv/src/procedure/utils.rs
@@ -100,7 +100,7 @@ pub mod mock {
                    }),
                }),
                affected_rows: 0,
-                extension: Default::default(),
+                extensions: Default::default(),
            })
        }
    }
--- a/src/meta-srv/src/selector/weight_compute.rs
+++ b/src/meta-srv/src/selector/weight_compute.rs
@@ -199,6 +199,7 @@ mod tests {
                approximate_bytes: 1,
                engine: "mito2".to_string(),
                role: RegionRole::Leader,
+                extensions: Default::default(),
            }],
            ..Default::default()
        }
@@ -215,6 +216,7 @@ mod tests {
                approximate_bytes: 1,
                engine: "mito2".to_string(),
                role: RegionRole::Leader,
+                extensions: Default::default(),
            }],
            ..Default::default()
        }
@@ -231,6 +233,7 @@ mod tests {
                approximate_bytes: 1,
                engine: "mito2".to_string(),
                role: RegionRole::Leader,
+                extensions: Default::default(),
            }],
            ..Default::default()
        }
--- a/src/metric-engine/src/engine.rs
+++ b/src/metric-engine/src/engine.rs
@@ -162,7 +162,7 @@ impl RegionEngine for MetricEngine {

        result.map_err(BoxedError::new).map(|rows| RegionResponse {
            affected_rows: rows,
-            extension: extension_return_value,
+            extensions: extension_return_value,
        })
    }

--- a/src/mito2/src/read/scan_region.rs
+++ b/src/mito2/src/read/scan_region.rs
@@ -709,6 +709,10 @@ impl ScanInput {
        rows_in_files + rows_in_memtables
    }

+    pub(crate) fn predicate(&self) -> Option<Predicate> {
+        self.predicate.clone()
+    }
+
    /// Retrieves [`PartitionRange`] from memtable and files
    pub(crate) fn partition_ranges(&self) -> Vec<PartitionRange> {
        let mut id = 0;
--- a/src/mito2/src/read/seq_scan.rs
+++ b/src/mito2/src/read/seq_scan.rs
@@ -515,6 +515,11 @@ impl RegionScanner for SeqScan {
        self.properties.partitions = ranges;
        Ok(())
    }
+
+    fn has_predicate(&self) -> bool {
+        let predicate = self.stream_ctx.input.predicate();
+        predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false)
+    }
 }

 impl DisplayAs for SeqScan {
--- a/src/mito2/src/read/unordered_scan.rs
+++ b/src/mito2/src/read/unordered_scan.rs
@@ -228,6 +228,11 @@ impl RegionScanner for UnorderedScan {

        Ok(stream)
    }
+
+    fn has_predicate(&self) -> bool {
+        let predicate = self.stream_ctx.input.predicate();
+        predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false)
+    }
 }

 impl DisplayAs for UnorderedScan {
--- a/src/operator/src/delete.rs
+++ b/src/operator/src/delete.rs
@@ -232,7 +232,7 @@ impl Deleter {

    async fn get_table(&self, catalog: &str, schema: &str, table: &str) -> Result<TableRef> {
        self.catalog_manager
-            .table(catalog, schema, table)
+            .table(catalog, schema, table, None)
            .await
            .context(CatalogSnafu)?
            .with_context(|| TableNotFoundSnafu {
--- a/src/operator/src/flow.rs
+++ b/src/operator/src/flow.rs
@@ -119,7 +119,7 @@ impl FlowServiceOperator {
            if let Some(prev) = &mut final_result {
                prev.affected_rows = res.affected_rows;
                prev.affected_flows.extend(res.affected_flows);
-                prev.extension.extend(res.extension);
+                prev.extensions.extend(res.extensions);
            } else {
                final_result = Some(res);
            }
--- a/src/operator/src/insert.rs
+++ b/src/operator/src/insert.rs
@@ -608,7 +608,7 @@ impl Inserter {
        table: &str,
    ) -> Result<Option<TableRef>> {
        self.catalog_manager
-            .table(catalog, schema, table)
+            .table(catalog, schema, table, None)
            .await
            .context(CatalogSnafu)
    }
--- a/src/operator/src/req_convert/delete/row_to_region.rs
+++ b/src/operator/src/req_convert/delete/row_to_region.rs
@@ -64,7 +64,7 @@ impl<'a> RowToRegion<'a> {
        let catalog_name = self.ctx.current_catalog();
        let schema_name = self.ctx.current_schema();
        self.catalog_manager
-            .table(catalog_name, &schema_name, table_name)
+            .table(catalog_name, &schema_name, table_name, None)
            .await
            .context(CatalogSnafu)?
            .with_context(|| TableNotFoundSnafu {
--- a/src/operator/src/req_convert/insert/stmt_to_region.rs
+++ b/src/operator/src/req_convert/insert/stmt_to_region.rs
@@ -139,7 +139,7 @@ impl<'a> StatementToRegion<'a> {

    async fn get_table(&self, catalog: &str, schema: &str, table: &str) -> Result<TableRef> {
        self.catalog_manager
-            .table(catalog, schema, table)
+            .table(catalog, schema, table, None)
            .await
            .context(CatalogSnafu)?
            .with_context(|| TableNotFoundSnafu {
--- a/src/operator/src/request.rs
+++ b/src/operator/src/request.rs
@@ -219,7 +219,7 @@ impl Requester {
    ) -> Result<Vec<PartitionInfo>> {
        let table = self
            .catalog_manager
-            .table(catalog, schema, table_name)
+            .table(catalog, schema, table_name, None)
            .await
            .context(CatalogSnafu)?;

--- a/src/operator/src/statement.rs
+++ b/src/operator/src/statement.rs
@@ -286,7 +286,7 @@ impl StatementExecutor {

                let table_ref = self
                    .catalog_manager
-                    .table(&catalog, &schema, &table)
+                    .table(&catalog, &schema, &table, Some(&query_ctx))
                    .await
                    .context(CatalogSnafu)?
                    .context(TableNotFoundSnafu { table_name: &table })?;
@@ -313,7 +313,7 @@ impl StatementExecutor {
        let catalog = query_ctx.current_catalog();
        ensure!(
            self.catalog_manager
-                .schema_exists(catalog, db.as_ref())
+                .schema_exists(catalog, db.as_ref(), Some(&query_ctx))
                .await
                .context(CatalogSnafu)?,
            SchemaNotFoundSnafu { schema_info: &db }
@@ -382,7 +382,7 @@ impl StatementExecutor {
            table,
        } = table_ref;
        self.catalog_manager
-            .table(catalog, schema, table)
+            .table(catalog, schema, table, None)
            .await
            .context(CatalogSnafu)?
            .with_context(|| TableNotFoundSnafu {
--- a/src/operator/src/statement/copy_database.rs
+++ b/src/operator/src/statement/copy_database.rs
@@ -57,7 +57,7 @@ impl StatementExecutor {
        );
        let table_names = self
            .catalog_manager
-            .table_names(&req.catalog_name, &req.schema_name)
+            .table_names(&req.catalog_name, &req.schema_name, Some(&ctx))
            .await
            .context(CatalogSnafu)?;

--- a/src/operator/src/statement/ddl.rs
+++ b/src/operator/src/statement/ddl.rs
@@ -106,7 +106,7 @@ impl StatementExecutor {
            .context(error::ExternalSnafu)?;
        let table_ref = self
            .catalog_manager
-            .table(&catalog, &schema, &table)
+            .table(&catalog, &schema, &table, Some(&ctx))
            .await
            .context(CatalogSnafu)?
            .context(TableNotFoundSnafu { table_name: &table })?;
@@ -207,6 +207,7 @@ impl StatementExecutor {
                &create_table.catalog_name,
                &create_table.schema_name,
                &create_table.table_name,
+                Some(&query_ctx),
            )
            .await
            .context(CatalogSnafu)?
@@ -487,7 +488,12 @@ impl StatementExecutor {
        // if view or table exists.
        if let Some(table) = self
            .catalog_manager
-            .table(&expr.catalog_name, &expr.schema_name, &expr.view_name)
+            .table(
+                &expr.catalog_name,
+                &expr.schema_name,
+                &expr.view_name,
+                Some(&ctx),
+            )
            .await
            .context(CatalogSnafu)?
        {
@@ -656,7 +662,7 @@ impl StatementExecutor {
    ) -> Result<Output> {
        let view_info = if let Some(view) = self
            .catalog_manager
-            .table(&catalog, &schema, &view)
+            .table(&catalog, &schema, &view, None)
            .await
            .context(CatalogSnafu)?
        {
@@ -766,6 +772,7 @@ impl StatementExecutor {
                    &table_name.catalog_name,
                    &table_name.schema_name,
                    &table_name.table_name,
+                    Some(&query_context),
                )
                .await
                .context(CatalogSnafu)?
@@ -816,7 +823,7 @@ impl StatementExecutor {

        if self
            .catalog_manager
-            .schema_exists(&catalog, &schema)
+            .schema_exists(&catalog, &schema, None)
            .await
            .context(CatalogSnafu)?
        {
@@ -858,6 +865,7 @@ impl StatementExecutor {
                &table_name.catalog_name,
                &table_name.schema_name,
                &table_name.table_name,
+                Some(&query_context),
            )
            .await
            .context(CatalogSnafu)?
@@ -944,7 +952,12 @@ impl StatementExecutor {

        let table = self
            .catalog_manager
-            .table(&catalog_name, &schema_name, &table_name)
+            .table(
+                &catalog_name,
+                &schema_name,
+                &table_name,
+                Some(&query_context),
+            )
            .await
            .context(CatalogSnafu)?
            .with_context(|| TableNotFoundSnafu {
@@ -1167,9 +1180,10 @@ impl StatementExecutor {

        if !self
            .catalog_manager
-            .schema_exists(catalog, database)
+            .schema_exists(catalog, database, None)
            .await
            .context(CatalogSnafu)?
+            && !self.catalog_manager.is_reserved_schema_name(database)
        {
            self.create_database_procedure(
                catalog.to_string(),
--- a/src/operator/src/statement/describe.rs
+++ b/src/operator/src/statement/describe.rs
@@ -39,7 +39,7 @@ impl StatementExecutor {

        let table = self
            .catalog_manager
-            .table(&catalog, &schema, &table)
+            .table(&catalog, &schema, &table, Some(&query_ctx))
            .await
            .context(CatalogSnafu)?
            .with_context(|| TableNotFoundSnafu {
--- a/src/operator/src/statement/show.rs
+++ b/src/operator/src/statement/show.rs
@@ -143,7 +143,7 @@ impl StatementExecutor {

        let table_ref = self
            .catalog_manager
-            .table(&catalog, &schema, &view)
+            .table(&catalog, &schema, &view, Some(&query_ctx))
            .await
            .context(CatalogSnafu)?
            .context(ViewNotFoundSnafu { view_name: &view })?;
--- a/src/pipeline/benches/processor.rs
+++ b/src/pipeline/benches/processor.rs
@@ -13,27 +13,13 @@
 // limitations under the License.

 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use pipeline::{parse, Array, Content, GreptimeTransformer, Pipeline, Value as PipelineValue};
+use pipeline::{parse, Content, GreptimeTransformer, Pipeline};
 use serde_json::{Deserializer, Value};

-fn processor_map(
-    pipeline: &Pipeline<GreptimeTransformer>,
-    input_values: Vec<Value>,
-) -> impl IntoIterator<Item = greptime_proto::v1::Rows> {
-    let pipeline_data = input_values
-        .into_iter()
-        .map(|v| PipelineValue::try_from(v).unwrap())
-        .collect::<Vec<_>>();
-
-    pipeline.exec(PipelineValue::Array(Array {
-        values: pipeline_data,
-    }))
-}
-
 fn processor_mut(
    pipeline: &Pipeline<GreptimeTransformer>,
    input_values: Vec<Value>,
-) -> impl IntoIterator<Item = Vec<greptime_proto::v1::Row>> {
+) -> Result<Vec<greptime_proto::v1::Row>, String> {
    let mut payload = pipeline.init_intermediate_state();
    let mut result = Vec::with_capacity(input_values.len());

@@ -249,11 +235,10 @@ fn criterion_benchmark(c: &mut Criterion) {
    let pipeline = prepare_pipeline();
    let mut group = c.benchmark_group("pipeline");
    group.sample_size(50);
-    group.bench_function("processor map", |b| {
-        b.iter(|| processor_map(black_box(&pipeline), black_box(input_value.clone())))
-    });
    group.bench_function("processor mut", |b| {
-        b.iter(|| processor_mut(black_box(&pipeline), black_box(input_value.clone())))
+        b.iter(|| {
+            processor_mut(black_box(&pipeline), black_box(input_value.clone())).unwrap();
+        })
    });
    group.finish();
 }
--- a/src/pipeline/src/etl.rs
+++ b/src/pipeline/src/etl.rs
@@ -19,92 +19,24 @@ pub mod processor;
 pub mod transform;
 pub mod value;

-use ahash::{HashMap, HashSet};
-use common_telemetry::{debug, warn};
+use ahash::HashSet;
+use common_telemetry::debug;
 use itertools::{merge, Itertools};
-use processor::Processor;
-use transform::{Transformer, Transforms};
-use value::{Map, Value};
+use processor::{Processor, ProcessorBuilder, Processors};
+use transform::{TransformBuilders, Transformer, Transforms};
+use value::Value;
 use yaml_rust::YamlLoader;

 const DESCRIPTION: &str = "description";
 const PROCESSORS: &str = "processors";
 const TRANSFORM: &str = "transform";
+const TRANSFORMS: &str = "transforms";

 pub enum Content {
    Json(String),
    Yaml(String),
 }

-/// set the index for the processor keys
-/// the index is the position of the key in the final intermediate keys
-fn set_processor_keys_index(
-    processors: &mut processor::Processors,
-    final_intermediate_keys: &Vec<String>,
-) -> Result<(), String> {
-    let final_intermediate_key_index = final_intermediate_keys
-        .iter()
-        .enumerate()
-        .map(|(i, k)| (k.as_str(), i))
-        .collect::<HashMap<_, _>>();
-    for processor in processors.iter_mut() {
-        for field in processor.fields_mut().iter_mut() {
-            let index = final_intermediate_key_index.get(field.input_field.name.as_str()).ok_or(format!(
-                    "input field {} is not found in intermediate keys: {final_intermediate_keys:?} when set processor keys index",
-                    field.input_field.name
-                ))?;
-            field.set_input_index(*index);
-            for (k, v) in field.output_fields_index_mapping.iter_mut() {
-                let index = final_intermediate_key_index.get(k.as_str());
-                match index {
-                    Some(index) => {
-                        *v = *index;
-                    }
-                    None => {
-                        warn!(
-                            "output field {k} is not found in intermediate keys: {final_intermediate_keys:?} when set processor keys index"
-                        );
-                    }
-                }
-            }
-        }
-    }
-    Ok(())
-}
-
-fn set_transform_keys_index(
-    transforms: &mut Transforms,
-    final_intermediate_keys: &[String],
-    output_keys: &[String],
-) -> Result<(), String> {
-    let final_intermediate_key_index = final_intermediate_keys
-        .iter()
-        .enumerate()
-        .map(|(i, k)| (k.as_str(), i))
-        .collect::<HashMap<_, _>>();
-    let output_key_index = output_keys
-        .iter()
-        .enumerate()
-        .map(|(i, k)| (k.as_str(), i))
-        .collect::<HashMap<_, _>>();
-    for transform in transforms.iter_mut() {
-        for field in transform.fields.iter_mut() {
-            let index = final_intermediate_key_index.get(field.input_field.name.as_str()).ok_or(format!(
-                    "input field {} is not found in intermediate keys: {final_intermediate_keys:?} when set transform keys index",
-                    field.input_field.name
-                ))?;
-            field.set_input_index(*index);
-            for (k, v) in field.output_fields_index_mapping.iter_mut() {
-                let index = output_key_index.get(k.as_str()).ok_or(format!(
-                    "output field {k} is not found in output keys: {final_intermediate_keys:?} when set transform keys index"
-                ))?;
-                *v = *index;
-            }
-        }
-    }
-    Ok(())
-}
-
 pub fn parse<T>(input: &Content) -> Result<Pipeline<T>, String>
 where
    T: Transformer,
@@ -117,24 +49,22 @@ where

            let description = doc[DESCRIPTION].as_str().map(|s| s.to_string());

-            let mut processors = if let Some(v) = doc[PROCESSORS].as_vec() {
+            let processor_builder_list = if let Some(v) = doc[PROCESSORS].as_vec() {
                v.try_into()?
            } else {
-                processor::Processors::default()
+                processor::ProcessorBuilderList::default()
            };

-            let transforms = if let Some(v) = doc[TRANSFORM].as_vec() {
-                v.try_into()?
-            } else {
-                Transforms::default()
-            };
+            let transform_builders =
+                if let Some(v) = doc[TRANSFORMS].as_vec().or(doc[TRANSFORM].as_vec()) {
+                    v.try_into()?
+                } else {
+                    TransformBuilders::default()
+                };

-            let mut transformer = T::new(transforms)?;
-            let transforms = transformer.transforms_mut();
-
-            let processors_output_keys = processors.output_keys();
-            let processors_required_keys = processors.required_keys();
-            let processors_required_original_keys = processors.required_original_keys();
+            let processors_required_keys = &processor_builder_list.input_keys;
+            let processors_output_keys = &processor_builder_list.output_keys;
+            let processors_required_original_keys = &processor_builder_list.original_input_keys;

            debug!(
                "processors_required_original_keys: {:?}",
@@ -143,7 +73,7 @@ where
            debug!("processors_required_keys: {:?}", processors_required_keys);
            debug!("processors_output_keys: {:?}", processors_output_keys);

-            let transforms_required_keys = transforms.required_keys();
+            let transforms_required_keys = &transform_builders.required_keys;
            let mut tr_keys = Vec::with_capacity(50);
            for key in transforms_required_keys.iter() {
                if !processors_output_keys.contains(key)
@@ -183,9 +113,33 @@ where

            final_intermediate_keys.extend(intermediate_keys_exclude_original);

-            let output_keys = transforms.output_keys().clone();
-            set_processor_keys_index(&mut processors, &final_intermediate_keys)?;
-            set_transform_keys_index(transforms, &final_intermediate_keys, &output_keys)?;
+            let output_keys = transform_builders.output_keys.clone();
+
+            let processors_kind_list = processor_builder_list
+                .processor_builders
+                .into_iter()
+                .map(|builder| builder.build(&final_intermediate_keys))
+                .collect::<Result<Vec<_>, _>>()?;
+            let processors = Processors {
+                processors: processors_kind_list,
+                required_keys: processors_required_keys.clone(),
+                output_keys: processors_output_keys.clone(),
+                required_original_keys: processors_required_original_keys.clone(),
+            };
+
+            let transfor_list = transform_builders
+                .builders
+                .into_iter()
+                .map(|builder| builder.build(&final_intermediate_keys, &output_keys))
+                .collect::<Result<Vec<_>, String>>()?;
+
+            let transformers = Transforms {
+                transforms: transfor_list,
+                required_keys: transforms_required_keys.clone(),
+                output_keys: output_keys.clone(),
+            };
+
+            let transformer = T::new(transformers)?;

            Ok(Pipeline {
                description,
@@ -238,38 +192,6 @@ impl<T> Pipeline<T>
 where
    T: Transformer,
 {
-    fn exec_map(&self, map: &mut Map) -> Result<(), String> {
-        let v = map;
-        for processor in self.processors.iter() {
-            processor.exec_map(v)?;
-        }
-        Ok(())
-    }
-
-    pub fn exec(&self, mut val: Value) -> Result<T::Output, String> {
-        let result = match val {
-            Value::Map(ref mut map) => {
-                self.exec_map(map)?;
-                val
-            }
-            Value::Array(arr) => arr
-                .values
-                .into_iter()
-                .map(|mut v| match v {
-                    Value::Map(ref mut map) => {
-                        self.exec_map(map)?;
-                        Ok(v)
-                    }
-                    _ => Err(format!("expected a map, but got {}", v)),
-                })
-                .collect::<Result<Vec<Value>, String>>()
-                .map(|values| Value::Array(value::Array { values }))?,
-            _ => return Err(format!("expected a map or array, but got {}", val)),
-        };
-
-        self.transformer.transform(result)
-    }
-
    pub fn exec_mut(&self, val: &mut Vec<Value>) -> Result<T::VecOutput, String> {
        for processor in self.processors.iter() {
            processor.exec_mut(val)?;
@@ -347,9 +269,24 @@ where
    }
 }

+pub(crate) fn find_key_index(
+    intermediate_keys: &[String],
+    key: &str,
+    kind: &str,
+) -> Result<usize, String> {
+    intermediate_keys
+        .iter()
+        .position(|k| k == key)
+        .ok_or(format!(
+            "{} processor.{} not found in intermediate keys",
+            kind, key
+        ))
+}
+
 #[cfg(test)]
 mod tests {

+    use api::v1::Rows;
    use greptime_proto::v1::value::ValueData;
    use greptime_proto::v1::{self, ColumnDataType, SemanticType};

@@ -359,96 +296,43 @@ mod tests {

    #[test]
    fn test_pipeline_prepare() {
-        {
-            let input_value_str = r#"
-            {
-                "my_field": "1,2",
-                "foo": "bar"
-            }
-        "#;
-            let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
-
-            let pipeline_yaml = r#"
---
-description: Pipeline for Apache Tomcat
+        let input_value_str = r#"
+                {
+                    "my_field": "1,2",
+                    "foo": "bar"
+                }
+            "#;
+        let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();

+        let pipeline_yaml = r#"description: 'Pipeline for Apache Tomcat'
 processors:
  - csv:
-      field: my_field, my_field,field1, field2
-
+      field: my_field
+      target_fields: field1, field2
 transform:
  - field: field1
    type: uint32
  - field: field2
    type: uint32
 "#;
-            let pipeline: Pipeline<GreptimeTransformer> =
-                parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
-            let mut payload = pipeline.init_intermediate_state();
-            pipeline.prepare(input_value, &mut payload).unwrap();
-            assert_eq!(
-                &["greptime_timestamp", "my_field"].to_vec(),
-                pipeline.required_keys()
-            );
-            assert_eq!(
-                payload,
-                vec![
-                    Value::Null,
-                    Value::String("1,2".to_string()),
-                    Value::Null,
-                    Value::Null
-                ]
-            );
-            let result = pipeline.exec_mut(&mut payload).unwrap();
+        let pipeline: Pipeline<GreptimeTransformer> =
+            parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
+        let mut payload = pipeline.init_intermediate_state();
+        pipeline.prepare(input_value, &mut payload).unwrap();
+        assert_eq!(&["my_field"].to_vec(), pipeline.required_keys());
+        assert_eq!(
+            payload,
+            vec![Value::String("1,2".to_string()), Value::Null, Value::Null]
+        );
+        let result = pipeline.exec_mut(&mut payload).unwrap();

-            assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
-            assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
-            match &result.values[2].value_data {
-                Some(ValueData::TimestampNanosecondValue(v)) => {
-                    assert_ne!(*v, 0);
-                }
-                _ => panic!("expect null value"),
+        assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
+        assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
+        match &result.values[2].value_data {
+            Some(ValueData::TimestampNanosecondValue(v)) => {
+                assert_ne!(*v, 0);
            }
-        }
-        {
-            let input_value_str = r#"
-          {
-            "reqTimeSec": "1573840000.000"
-          }
-    "#;
-
-            let pipeline_yaml = r#"
---
-description: Pipeline for Demo Log
-
-processors:
-  - gsub:
-      field: reqTimeSec
-      pattern: "\\."
-      replacement: ""
-  - epoch:
-      field: reqTimeSec
-      resolution: millisecond
-      ignore_missing: true
-
-transform:
-  - field: reqTimeSec
-    type: epoch, millisecond
-    index: timestamp
-"#;
-            let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
-            let pipeline: Pipeline<GreptimeTransformer> =
-                parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
-            let mut payload = pipeline.init_intermediate_state();
-            pipeline.prepare(input_value, &mut payload).unwrap();
-            assert_eq!(&["reqTimeSec"].to_vec(), pipeline.required_keys());
-            assert_eq!(payload, vec![Value::String("1573840000.000".to_string())]);
-            let result = pipeline.exec_mut(&mut payload).unwrap();
-
-            assert_eq!(
-                result.values[0].value_data,
-                Some(ValueData::TimestampMillisecondValue(1573840000000))
-            );
+            _ => panic!("expect null value"),
        }
    }

@@ -541,21 +425,19 @@ transform:
    #[test]
    fn test_csv_pipeline() {
        let input_value_str = r#"
-            {
-                "my_field": "1,2",
-                "foo": "bar"
-            }
-        "#;
+                {
+                    "my_field": "1,2",
+                    "foo": "bar"
+                }
+            "#;
        let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();

        let pipeline_yaml = r#"
---
 description: Pipeline for Apache Tomcat
-
 processors:
  - csv:
-      field: my_field,my_field, field1, field2
-
+      field: my_field
+      target_fields: field1, field2
 transform:
  - field: field1
    type: uint32
@@ -565,8 +447,22 @@ transform:

        let pipeline: Pipeline<GreptimeTransformer> =
            parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
-        let output = pipeline.exec(input_value.try_into().unwrap());
-        assert!(output.is_ok());
+        let mut payload = pipeline.init_intermediate_state();
+        pipeline.prepare(input_value, &mut payload).unwrap();
+        assert_eq!(&["my_field"].to_vec(), pipeline.required_keys());
+        assert_eq!(
+            payload,
+            vec![Value::String("1,2".to_string()), Value::Null, Value::Null]
+        );
+        let result = pipeline.exec_mut(&mut payload).unwrap();
+        assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
+        assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
+        match &result.values[2].value_data {
+            Some(ValueData::TimestampNanosecondValue(v)) => {
+                assert_ne!(*v, 0);
+            }
+            _ => panic!("expect null value"),
+        }
    }

    #[test]
@@ -596,7 +492,14 @@ transform:

        let pipeline: Pipeline<GreptimeTransformer> =
            parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
-        let output = pipeline.exec(input_value.try_into().unwrap()).unwrap();
+        let schema = pipeline.schemas().clone();
+        let mut result = pipeline.init_intermediate_state();
+        pipeline.prepare(input_value, &mut result).unwrap();
+        let row = pipeline.exec_mut(&mut result).unwrap();
+        let output = Rows {
+            schema,
+            rows: vec![row],
+        };
        let schemas = output.schema;

        assert_eq!(schemas.len(), 1);
--- a/src/pipeline/src/etl/field.rs
+++ b/src/pipeline/src/etl/field.rs
@@ -12,69 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::collections::BTreeMap;
+use std::ops::Deref;
+use std::str::FromStr;

-use ahash::{HashSet, HashSetExt};
-use itertools::Itertools;
-
-#[derive(Debug, Default, Clone)]
-pub struct Fields(Vec<Field>);
-
-impl Fields {
-    pub(crate) fn new(fields: Vec<Field>) -> Result<Self, String> {
-        let ff = Fields(fields);
-        ff.check()
-    }
-
-    pub(crate) fn one(field: Field) -> Self {
-        Fields(vec![field])
-    }
-
-    pub(crate) fn get_target_fields(&self) -> Vec<&str> {
-        self.0.iter().map(|f| f.get_target_field()).collect()
-    }
-
-    fn check(self) -> Result<Self, String> {
-        if self.0.is_empty() {
-            return Err("fields must not be empty".to_string());
-        }
-
-        let mut set = HashSet::new();
-        for f in self.0.iter() {
-            if set.contains(&f.input_field.name) {
-                return Err(format!(
-                    "field name must be unique, but got duplicated: {}",
-                    f.input_field.name
-                ));
-            }
-            set.insert(&f.input_field.name);
-        }
-
-        Ok(self)
-    }
-}
-
-impl std::fmt::Display for Fields {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        let s = self.0.iter().map(|f| f.to_string()).join(";");
-        write!(f, "{s}")
-    }
-}
-
-impl std::ops::Deref for Fields {
-    type Target = Vec<Field>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl std::ops::DerefMut for Fields {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
-}
+use crate::etl::find_key_index;

+/// Information about the input field including the name and index in intermediate keys.
 #[derive(Debug, Default, Clone)]
 pub struct InputFieldInfo {
    pub(crate) name: String,
@@ -82,132 +25,202 @@ pub struct InputFieldInfo {
 }

 impl InputFieldInfo {
+    /// Create a new input field info with the given field name and index.
    pub(crate) fn new(field: impl Into<String>, index: usize) -> Self {
        InputFieldInfo {
            name: field.into(),
            index,
        }
    }
+}

-    pub(crate) fn name(field: impl Into<String>) -> Self {
-        InputFieldInfo {
-            name: field.into(),
-            index: 0,
+/// Information about a field that has one input and one output.
+#[derive(Debug, Default, Clone)]
+pub struct OneInputOneOutputField {
+    input: InputFieldInfo,
+    output: Option<(String, usize)>,
+}
+
+impl OneInputOneOutputField {
+    /// Create a new field with the given input and output.
+    pub(crate) fn new(input: InputFieldInfo, output: (String, usize)) -> Self {
+        OneInputOneOutputField {
+            input,
+            output: Some(output),
+        }
+    }
+
+    /// Build a new field with the given processor kind, intermediate keys, input field, and target field.
+    pub(crate) fn build(
+        processor_kind: &str,
+        intermediate_keys: &[String],
+        input_field: &str,
+        target_field: &str,
+    ) -> Result<Self, String> {
+        let input_index = find_key_index(intermediate_keys, input_field, processor_kind)?;
+
+        let input_field_info = InputFieldInfo::new(input_field, input_index);
+        let output_index = find_key_index(intermediate_keys, target_field, processor_kind)?;
+        Ok(OneInputOneOutputField::new(
+            input_field_info,
+            (target_field.to_string(), output_index),
+        ))
+    }
+
+    /// Get the input field information.
+    pub(crate) fn input(&self) -> &InputFieldInfo {
+        &self.input
+    }
+
+    /// Get the index of the input field.
+    pub(crate) fn input_index(&self) -> usize {
+        self.input.index
+    }
+
+    /// Get the name of the input field.
+    pub(crate) fn input_name(&self) -> &str {
+        &self.input.name
+    }
+
+    /// Get the index of the output field.
+    pub(crate) fn output_index(&self) -> usize {
+        *self.output().1
+    }
+
+    /// Get the name of the output field.
+    pub(crate) fn output_name(&self) -> &str {
+        self.output().0
+    }
+
+    /// Get the output field information.
+    pub(crate) fn output(&self) -> (&String, &usize) {
+        if let Some((name, index)) = &self.output {
+            (name, index)
+        } else {
+            (&self.input.name, &self.input.index)
        }
    }
 }

-/// Used to represent the input and output fields of a processor or transform.
+/// Information about a field that has one input and multiple outputs.
+#[derive(Debug, Default, Clone)]
+pub struct OneInputMultiOutputField {
+    input: InputFieldInfo,
+    /// Typically, processors that output multiple keys need to be distinguished by splicing the keys together.
+    prefix: Option<String>,
+}
+
+impl OneInputMultiOutputField {
+    /// Create a new field with the given input and prefix.
+    pub(crate) fn new(input: InputFieldInfo, prefix: Option<String>) -> Self {
+        OneInputMultiOutputField { input, prefix }
+    }
+
+    /// Get the input field information.
+    pub(crate) fn input(&self) -> &InputFieldInfo {
+        &self.input
+    }
+
+    /// Get the index of the input field.
+    pub(crate) fn input_index(&self) -> usize {
+        self.input.index
+    }
+
+    /// Get the name of the input field.
+    pub(crate) fn input_name(&self) -> &str {
+        &self.input.name
+    }
+
+    /// Get the prefix for the output fields.
+    pub(crate) fn target_prefix(&self) -> &str {
+        self.prefix.as_deref().unwrap_or(&self.input.name)
+    }
+}
+
+/// Raw processor-defined inputs and outputs
 #[derive(Debug, Default, Clone)]
 pub struct Field {
-    /// The input field name and index.
-    pub input_field: InputFieldInfo,
-
-    /// The output field name and index mapping.
-    pub output_fields_index_mapping: BTreeMap<String, usize>,
-
-    // rename
-    pub target_field: Option<String>,
-
-    // 1-to-many mapping
-    // processors:
-    //  - csv
-    pub target_fields: Option<Vec<String>>,
+    pub(crate) input_field: String,
+    pub(crate) target_field: Option<String>,
 }

-impl Field {
-    pub(crate) fn new(field: impl Into<String>) -> Self {
-        Field {
-            input_field: InputFieldInfo::name(field.into()),
-            output_fields_index_mapping: BTreeMap::new(),
-            target_field: None,
-            target_fields: None,
-        }
-    }
-
-    /// target column_name in processor or transform
-    /// if target_field is None, return input field name
-    pub(crate) fn get_target_field(&self) -> &str {
-        self.target_field
-            .as_deref()
-            .unwrap_or(&self.input_field.name)
-    }
-
-    /// input column_name in processor or transform
-    pub(crate) fn get_field_name(&self) -> &str {
-        &self.input_field.name
-    }
-
-    /// set input column index in processor or transform
-    pub(crate) fn set_input_index(&mut self, index: usize) {
-        self.input_field.index = index;
-    }
-
-    pub(crate) fn set_output_index(&mut self, key: &str, index: usize) {
-        if let Some(v) = self.output_fields_index_mapping.get_mut(key) {
-            *v = index;
-        }
-    }
-
-    pub(crate) fn insert_output_index(&mut self, key: String, index: usize) {
-        self.output_fields_index_mapping.insert(key, index);
-    }
-}
-
-impl std::str::FromStr for Field {
+impl FromStr for Field {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        let mut parts = s.split(',');
-        let field = parts.next().ok_or("field is missing")?.trim().to_string();
+        let input_field = parts
+            .next()
+            .ok_or("input field is missing")?
+            .trim()
+            .to_string();
+        let target_field = parts.next().map(|x| x.trim().to_string());

-        if field.is_empty() {
-            return Err("field is empty".to_string());
+        if input_field.is_empty() {
+            return Err("input field is empty".to_string());
        }

-        let renamed_field = match parts.next() {
-            Some(s) if !s.trim().is_empty() => Some(s.trim().to_string()),
-            _ => None,
-        };
-
-        // TODO(qtang): ???? what's this?
-        // weird design? field: <field>,<target_field>,<target_fields>,<target_fields>....
-        // and only use in csv processor
-        let fields: Vec<_> = parts
-            .map(|s| s.trim())
-            .filter(|s| !s.is_empty())
-            .map(|s| s.to_string())
-            .collect();
-        let target_fields = if fields.is_empty() {
-            None
-        } else {
-            Some(fields)
-        };
-
        Ok(Field {
-            input_field: InputFieldInfo::name(field),
-            output_fields_index_mapping: BTreeMap::new(),
-            target_field: renamed_field,
-            target_fields,
+            input_field,
+            target_field,
        })
    }
 }

-impl std::fmt::Display for Field {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match (&self.target_field, &self.target_fields) {
-            (Some(target_field), None) => write!(f, "{}, {target_field}", self.input_field.name),
-            (None, Some(target_fields)) => {
-                write!(
-                    f,
-                    "{}, {}",
-                    self.input_field.name,
-                    target_fields.iter().join(",")
-                )
-            }
-            _ => write!(f, "{}", self.input_field.name),
+impl Field {
+    /// Create a new field with the given input and target fields.
+    pub(crate) fn new(input_field: impl Into<String>, target_field: Option<String>) -> Self {
+        Field {
+            input_field: input_field.into(),
+            target_field,
        }
    }
+
+    /// Get the input field.
+    pub(crate) fn input_field(&self) -> &str {
+        &self.input_field
+    }
+
+    /// Get the target field.
+    pub(crate) fn target_field(&self) -> Option<&str> {
+        self.target_field.as_deref()
+    }
+
+    /// Get the target field or the input field if the target field is not set.
+    pub(crate) fn target_or_input_field(&self) -> &str {
+        self.target_field.as_deref().unwrap_or(&self.input_field)
+    }
+}
+
+/// A collection of fields.
+#[derive(Debug, Default, Clone)]
+pub struct Fields(Vec<Field>);
+
+impl Fields {
+    pub(crate) fn new(fields: Vec<Field>) -> Self {
+        Fields(fields)
+    }
+
+    pub(crate) fn one(field: Field) -> Self {
+        Fields(vec![field])
+    }
+}
+
+impl Deref for Fields {
+    type Target = Vec<Field>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl IntoIterator for Fields {
+    type Item = Field;
+    type IntoIter = std::vec::IntoIter<Field>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
 }

 #[cfg(test)]
@@ -227,35 +240,14 @@ mod tests {

        let cases = [
            // ("field", "field", None, None),
-            (
-                "field, target_field",
-                "field",
-                Some("target_field".into()),
-                None,
-            ),
-            (
-                "field, target_field1, target_field2, target_field3",
-                "field",
-                Some("target_field1".into()),
-                Some(vec!["target_field2".into(), "target_field3".into()]),
-            ),
-            (
-                "field,, target_field1, target_field2, target_field3",
-                "field",
-                None,
-                Some(vec![
-                    "target_field1".into(),
-                    "target_field2".into(),
-                    "target_field3".into(),
-                ]),
-            ),
+            ("field, target_field", "field", Some("target_field")),
+            ("field", "field", None),
        ];

-        for (s, field, target_field, target_fields) in cases.into_iter() {
+        for (s, field, target_field) in cases.into_iter() {
            let f: Field = s.parse().unwrap();
-            assert_eq!(f.get_field_name(), field, "{s}");
-            assert_eq!(f.target_field, target_field, "{s}");
-            assert_eq!(f.target_fields, target_fields, "{s}");
+            assert_eq!(f.input_field(), field, "{s}");
+            assert_eq!(f.target_field(), target_field, "{s}");
        }
    }
 }
--- a/src/pipeline/src/etl/processor.rs
+++ b/src/pipeline/src/etl/processor.rs
@@ -25,22 +25,22 @@ pub mod timestamp;
 pub mod urlencoding;

 use ahash::{HashSet, HashSetExt};
-use cmcd::CmcdProcessor;
-use csv::CsvProcessor;
-use date::DateProcessor;
-use dissect::DissectProcessor;
+use cmcd::{CmcdProcessor, CmcdProcessorBuilder};
+use csv::{CsvProcessor, CsvProcessorBuilder};
+use date::{DateProcessor, DateProcessorBuilder};
+use dissect::{DissectProcessor, DissectProcessorBuilder};
 use enum_dispatch::enum_dispatch;
-use epoch::EpochProcessor;
-use gsub::GsubProcessor;
+use epoch::{EpochProcessor, EpochProcessorBuilder};
+use gsub::{GsubProcessor, GsubProcessorBuilder};
 use itertools::Itertools;
-use join::JoinProcessor;
-use letter::LetterProcessor;
-use regex::RegexProcessor;
-use timestamp::TimestampProcessor;
-use urlencoding::UrlEncodingProcessor;
+use join::{JoinProcessor, JoinProcessorBuilder};
+use letter::{LetterProcessor, LetterProcessorBuilder};
+use regex::{RegexProcessor, RegexProcessorBuilder};
+use timestamp::{TimestampProcessor, TimestampProcessorBuilder};
+use urlencoding::{UrlEncodingProcessor, UrlEncodingProcessorBuilder};

-use crate::etl::field::{Field, Fields};
-use crate::etl::value::{Map, Value};
+use super::field::{Field, Fields};
+use crate::etl::value::Value;

 const FIELD_NAME: &str = "field";
 const FIELDS_NAME: &str = "fields";
@@ -49,6 +49,7 @@ const METHOD_NAME: &str = "method";
 const PATTERN_NAME: &str = "pattern";
 const PATTERNS_NAME: &str = "patterns";
 const SEPARATOR_NAME: &str = "separator";
+const TARGET_FIELDS_NAME: &str = "target_fields";

 // const IF_NAME: &str = "if";
 // const IGNORE_FAILURE_NAME: &str = "ignore_failure";
@@ -62,55 +63,14 @@ const SEPARATOR_NAME: &str = "separator";
 /// The output of a processor is a map of key-value pairs that will be merged into the document when you use exec_map method.
 #[enum_dispatch(ProcessorKind)]
 pub trait Processor: std::fmt::Debug + Send + Sync + 'static {
-    /// Get the processor's fields
-    /// fields is just the same processor for multiple keys. It is not the case that a processor has multiple inputs
-    fn fields(&self) -> &Fields;
-
-    /// Get the processor's fields mutably
-    fn fields_mut(&mut self) -> &mut Fields;
-
    /// Get the processor's kind
    fn kind(&self) -> &str;

    /// Whether to ignore missing
    fn ignore_missing(&self) -> bool;

-    /// processor all output keys
-    /// if a processor has multiple output keys, it should return all of them
-    fn output_keys(&self) -> HashSet<String>;
-
-    /// Execute the processor on a document
-    /// and return a map of key-value pairs
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String>;
-
    /// Execute the processor on a vector which be preprocessed by the pipeline
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String>;
-
-    /// Execute the processor on a map
-    /// and merge the output into the original map
-    fn exec_map(&self, map: &mut Map) -> Result<(), String> {
-        for ff @ Field {
-            input_field: field_info,
-            ..
-        } in self.fields().iter()
-        {
-            match map.get(&field_info.name) {
-                Some(v) => {
-                    map.extend(self.exec_field(v, ff)?);
-                }
-                None if self.ignore_missing() => {}
-                None => {
-                    return Err(format!(
-                        "{} processor: field '{}' is required but missing in {map}",
-                        self.kind(),
-                        field_info.name,
-                    ))
-                }
-            }
-        }
-
-        Ok(())
-    }
 }

 #[derive(Debug)]
@@ -129,6 +89,42 @@ pub enum ProcessorKind {
    Date(DateProcessor),
 }

+/// ProcessorBuilder trait defines the interface for all processor builders
+/// A processor builder is used to create a processor
+#[enum_dispatch(ProcessorBuilders)]
+pub trait ProcessorBuilder: std::fmt::Debug + Send + Sync + 'static {
+    /// Get the processor's output keys
+    fn output_keys(&self) -> HashSet<&str>;
+    /// Get the processor's input keys
+    fn input_keys(&self) -> HashSet<&str>;
+    /// Build the processor
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String>;
+}
+
+#[derive(Debug)]
+#[enum_dispatch]
+pub enum ProcessorBuilders {
+    Cmcd(CmcdProcessorBuilder),
+    Csv(CsvProcessorBuilder),
+    Dissect(DissectProcessorBuilder),
+    Gsub(GsubProcessorBuilder),
+    Join(JoinProcessorBuilder),
+    Letter(LetterProcessorBuilder),
+    Regex(RegexProcessorBuilder),
+    Timestamp(TimestampProcessorBuilder),
+    UrlEncoding(UrlEncodingProcessorBuilder),
+    Epoch(EpochProcessorBuilder),
+    Date(DateProcessorBuilder),
+}
+
+#[derive(Debug, Default)]
+pub struct ProcessorBuilderList {
+    pub(crate) processor_builders: Vec<ProcessorBuilders>,
+    pub(crate) input_keys: Vec<String>,
+    pub(crate) output_keys: Vec<String>,
+    pub(crate) original_input_keys: Vec<String>,
+}
+
 #[derive(Debug, Default)]
 pub struct Processors {
    /// A ordered list of processors
@@ -174,52 +170,63 @@ impl Processors {
    }
 }

-impl TryFrom<&Vec<yaml_rust::Yaml>> for Processors {
+impl TryFrom<&Vec<yaml_rust::Yaml>> for ProcessorBuilderList {
    type Error = String;

    fn try_from(vec: &Vec<yaml_rust::Yaml>) -> Result<Self, Self::Error> {
-        let mut processors = vec![];
+        let mut processors_builders = vec![];
        let mut all_output_keys = HashSet::with_capacity(50);
        let mut all_required_keys = HashSet::with_capacity(50);
        let mut all_required_original_keys = HashSet::with_capacity(50);
        for doc in vec {
            let processor = parse_processor(doc)?;
-
-            // get all required keys
-            let processor_required_keys: Vec<String> = processor
-                .fields()
-                .iter()
-                .map(|f| f.input_field.name.clone())
-                .collect();
-
-            for key in &processor_required_keys {
-                if !all_output_keys.contains(key) {
-                    all_required_original_keys.insert(key.clone());
-                }
-            }
-
-            all_required_keys.extend(processor_required_keys);
-
-            let processor_output_keys = processor.output_keys().into_iter();
-            all_output_keys.extend(processor_output_keys);
-
-            processors.push(processor);
+            processors_builders.push(processor);
        }

-        let all_required_keys = all_required_keys.into_iter().sorted().collect();
-        let all_output_keys = all_output_keys.into_iter().sorted().collect();
-        let all_required_original_keys = all_required_original_keys.into_iter().sorted().collect();
+        for processor in processors_builders.iter() {
+            {
+                // get all required keys
+                let processor_required_keys = processor.input_keys();

-        Ok(Processors {
-            processors,
-            required_keys: all_required_keys,
+                for key in &processor_required_keys {
+                    if !all_output_keys.contains(key) {
+                        all_required_original_keys.insert(*key);
+                    }
+                }
+
+                all_required_keys.extend(processor_required_keys);
+
+                let processor_output_keys = processor.output_keys().into_iter();
+                all_output_keys.extend(processor_output_keys);
+            }
+        }
+
+        let all_required_keys = all_required_keys
+            .into_iter()
+            .map(|x| x.to_string())
+            .sorted()
+            .collect();
+        let all_output_keys = all_output_keys
+            .into_iter()
+            .map(|x| x.to_string())
+            .sorted()
+            .collect();
+        let all_required_original_keys = all_required_original_keys
+            .into_iter()
+            .map(|x| x.to_string())
+            .sorted()
+            .collect();
+
+        Ok(ProcessorBuilderList {
+            processor_builders: processors_builders,
+            input_keys: all_required_keys,
            output_keys: all_output_keys,
-            required_original_keys: all_required_original_keys,
+            original_input_keys: all_required_original_keys,
        })
    }
 }

-fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind, String> {
+fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorBuilders, String> {
    let map = doc.as_hash().ok_or("processor must be a map".to_string())?;

    let key = map
@@ -238,20 +245,24 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind, String> {
        .ok_or("processor key must be a string".to_string())?;

    let processor = match str_key {
-        cmcd::PROCESSOR_CMCD => ProcessorKind::Cmcd(CmcdProcessor::try_from(value)?),
-        csv::PROCESSOR_CSV => ProcessorKind::Csv(CsvProcessor::try_from(value)?),
-        dissect::PROCESSOR_DISSECT => ProcessorKind::Dissect(DissectProcessor::try_from(value)?),
-        epoch::PROCESSOR_EPOCH => ProcessorKind::Epoch(EpochProcessor::try_from(value)?),
-        date::PROCESSOR_DATE => ProcessorKind::Date(DateProcessor::try_from(value)?),
-        gsub::PROCESSOR_GSUB => ProcessorKind::Gsub(GsubProcessor::try_from(value)?),
-        join::PROCESSOR_JOIN => ProcessorKind::Join(JoinProcessor::try_from(value)?),
-        letter::PROCESSOR_LETTER => ProcessorKind::Letter(LetterProcessor::try_from(value)?),
-        regex::PROCESSOR_REGEX => ProcessorKind::Regex(RegexProcessor::try_from(value)?),
+        cmcd::PROCESSOR_CMCD => ProcessorBuilders::Cmcd(CmcdProcessorBuilder::try_from(value)?),
+        csv::PROCESSOR_CSV => ProcessorBuilders::Csv(CsvProcessorBuilder::try_from(value)?),
+        dissect::PROCESSOR_DISSECT => {
+            ProcessorBuilders::Dissect(DissectProcessorBuilder::try_from(value)?)
+        }
+        epoch::PROCESSOR_EPOCH => ProcessorBuilders::Epoch(EpochProcessorBuilder::try_from(value)?),
+        date::PROCESSOR_DATE => ProcessorBuilders::Date(DateProcessorBuilder::try_from(value)?),
+        gsub::PROCESSOR_GSUB => ProcessorBuilders::Gsub(GsubProcessorBuilder::try_from(value)?),
+        join::PROCESSOR_JOIN => ProcessorBuilders::Join(JoinProcessorBuilder::try_from(value)?),
+        letter::PROCESSOR_LETTER => {
+            ProcessorBuilders::Letter(LetterProcessorBuilder::try_from(value)?)
+        }
+        regex::PROCESSOR_REGEX => ProcessorBuilders::Regex(RegexProcessorBuilder::try_from(value)?),
        timestamp::PROCESSOR_TIMESTAMP => {
-            ProcessorKind::Timestamp(TimestampProcessor::try_from(value)?)
+            ProcessorBuilders::Timestamp(TimestampProcessorBuilder::try_from(value)?)
        }
        urlencoding::PROCESSOR_URL_ENCODING => {
-            ProcessorKind::UrlEncoding(UrlEncodingProcessor::try_from(value)?)
+            ProcessorBuilders::UrlEncoding(UrlEncodingProcessorBuilder::try_from(value)?)
        }
        _ => return Err(format!("unsupported {} processor", str_key)),
    };
@@ -301,19 +312,10 @@ where
    })
 }

-pub(crate) fn yaml_fields(v: &yaml_rust::Yaml, field: &str) -> Result<Fields, String> {
-    let v = yaml_parse_strings(v, field)?;
-    Fields::new(v)
+pub(crate) fn yaml_new_fields(v: &yaml_rust::Yaml, field: &str) -> Result<Fields, String> {
+    yaml_parse_strings(v, field).map(Fields::new)
 }

-pub(crate) fn yaml_field(v: &yaml_rust::Yaml, field: &str) -> Result<Field, String> {
+pub(crate) fn yaml_new_field(v: &yaml_rust::Yaml, field: &str) -> Result<Field, String> {
    yaml_parse_string(v, field)
 }
-
-pub(crate) fn update_one_one_output_keys(fields: &mut Fields) {
-    for field in fields.iter_mut() {
-        field
-            .output_fields_index_mapping
-            .insert(field.get_target_field().to_string(), 0_usize);
-    }
-}
--- a/src/pipeline/src/etl/processor/cmcd.rs
+++ b/src/pipeline/src/etl/processor/cmcd.rs
@@ -12,14 +12,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::BTreeMap;
+
 use ahash::HashSet;
 use urlencoding::decode;

-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Field, Fields, InputFieldInfo, OneInputMultiOutputField};
+use crate::etl::find_key_index;
 use crate::etl::processor::{
-    yaml_bool, yaml_field, yaml_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, Processor, ProcessorBuilder, ProcessorKind,
+    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;

 pub(crate) const PROCESSOR_CMCD: &str = "cmcd";

@@ -63,6 +67,178 @@ const CMCD_KEYS: [&str; 18] = [
    CMCD_KEY_V,
 ];

+/// CmcdProcessorBuilder is a builder for CmcdProcessor
+/// parse from raw yaml
+#[derive(Debug, Default)]
+pub struct CmcdProcessorBuilder {
+    fields: Fields,
+    output_keys: HashSet<String>,
+    ignore_missing: bool,
+}
+
+impl CmcdProcessorBuilder {
+    /// build_cmcd_outputs build cmcd output info
+    /// generate index and function for each output
+    pub(super) fn build_cmcd_outputs(
+        field: &Field,
+        intermediate_keys: &[String],
+    ) -> Result<(BTreeMap<String, usize>, Vec<CmcdOutputInfo>), String> {
+        let mut output_index = BTreeMap::new();
+        let mut cmcd_field_outputs = Vec::with_capacity(CMCD_KEYS.len());
+        for cmcd in CMCD_KEYS {
+            let final_key = generate_key(field.target_or_input_field(), cmcd);
+            let index = find_key_index(intermediate_keys, &final_key, "cmcd")?;
+            output_index.insert(final_key.clone(), index);
+            match cmcd {
+                CMCD_KEY_BS | CMCD_KEY_SU => {
+                    let output_info = CmcdOutputInfo::new(final_key, cmcd, index, bs_su);
+                    cmcd_field_outputs.push(output_info);
+                }
+                CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP
+                | CMCD_KEY_RTP | CMCD_KEY_TB => {
+                    let output_info = CmcdOutputInfo::new(final_key, cmcd, index, br_tb);
+                    cmcd_field_outputs.push(output_info);
+                }
+                CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID
+                | CMCD_KEY_ST | CMCD_KEY_V => {
+                    let output_info = CmcdOutputInfo::new(final_key, cmcd, index, cid_v);
+                    cmcd_field_outputs.push(output_info);
+                }
+                CMCD_KEY_NOR => {
+                    let output_info = CmcdOutputInfo::new(final_key, cmcd, index, nor);
+                    cmcd_field_outputs.push(output_info);
+                }
+                CMCD_KEY_PR => {
+                    let output_info = CmcdOutputInfo::new(final_key, cmcd, index, pr);
+                    cmcd_field_outputs.push(output_info);
+                }
+                _ => {}
+            }
+        }
+        Ok((output_index, cmcd_field_outputs))
+    }
+
+    /// build CmcdProcessor from CmcdProcessorBuilder
+    pub fn build(self, intermediate_keys: &[String]) -> Result<CmcdProcessor, String> {
+        let mut real_fields = vec![];
+        let mut cmcd_outputs = Vec::with_capacity(CMCD_KEYS.len());
+        for field in self.fields.into_iter() {
+            let input_index = find_key_index(intermediate_keys, field.input_field(), "cmcd")?;
+
+            let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
+
+            let (_, cmcd_field_outputs) = Self::build_cmcd_outputs(&field, intermediate_keys)?;
+
+            cmcd_outputs.push(cmcd_field_outputs);
+
+            let real_field = OneInputMultiOutputField::new(input_field_info, field.target_field);
+            real_fields.push(real_field);
+        }
+        Ok(CmcdProcessor {
+            fields: real_fields,
+            cmcd_outputs,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+impl ProcessorBuilder for CmcdProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.output_keys.iter().map(|s| s.as_str()).collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Cmcd)
+    }
+}
+
+fn generate_key(prefix: &str, key: &str) -> String {
+    format!("{}_{}", prefix, key)
+}
+
+/// CmcdOutputInfo is a struct to store output info
+#[derive(Debug)]
+pub(super) struct CmcdOutputInfo {
+    /// {input_field}_{cmcd_key}
+    final_key: String,
+    /// cmcd key
+    key: &'static str,
+    /// index in intermediate_keys
+    index: usize,
+    /// function to resolve value
+    f: fn(&str, &str, Option<&str>) -> Result<Value, String>,
+}
+
+impl CmcdOutputInfo {
+    fn new(
+        final_key: String,
+        key: &'static str,
+        index: usize,
+        f: fn(&str, &str, Option<&str>) -> Result<Value, String>,
+    ) -> Self {
+        Self {
+            final_key,
+            key,
+            index,
+            f,
+        }
+    }
+}
+
+impl Default for CmcdOutputInfo {
+    fn default() -> Self {
+        Self {
+            final_key: String::default(),
+            key: "",
+            index: 0,
+            f: |_, _, _| Ok(Value::Null),
+        }
+    }
+}
+
+/// function to resolve CMCD_KEY_BS | CMCD_KEY_SU
+fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<Value, String> {
+    Ok(Value::Boolean(true))
+}
+
+/// function to resolve CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP | CMCD_KEY_RTP | CMCD_KEY_TB
+fn br_tb(s: &str, k: &str, v: Option<&str>) -> Result<Value, String> {
+    let v = v.ok_or(format!("{k} missing value in {s}"))?;
+    let val: i64 = v
+        .parse()
+        .map_err(|_| format!("failed to parse {v} as i64"))?;
+    Ok(Value::Int64(val))
+}
+
+/// function to resolve CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID | CMCD_KEY_V
+fn cid_v(s: &str, k: &str, v: Option<&str>) -> Result<Value, String> {
+    let v = v.ok_or(format!("{k} missing value in {s}"))?;
+    Ok(Value::String(v.to_string()))
+}
+
+/// function to resolve CMCD_KEY_NOR
+fn nor(s: &str, k: &str, v: Option<&str>) -> Result<Value, String> {
+    let v = v.ok_or(format!("{k} missing value in {s}"))?;
+    let val = match decode(v) {
+        Ok(val) => val.to_string(),
+        Err(_) => v.to_string(),
+    };
+    Ok(Value::String(val))
+}
+
+/// function to resolve CMCD_KEY_PR
+fn pr(s: &str, k: &str, v: Option<&str>) -> Result<Value, String> {
+    let v = v.ok_or(format!("{k} missing value in {s}"))?;
+    let val: f64 = v
+        .parse()
+        .map_err(|_| format!("failed to parse {v} as f64"))?;
+    Ok(Value::Float64(val))
+}
+
 /// Common Media Client Data Specification:
 /// https://cdn.cta.tech/cta/media/media/resources/standards/pdfs/cta-5004-final.pdf
 ///
@@ -100,98 +276,43 @@ const CMCD_KEYS: [&str; 18] = [
 /// 12. Transport Layer Security SHOULD be used to protect all transmission of CMCD data.
 #[derive(Debug, Default)]
 pub struct CmcdProcessor {
-    fields: Fields,
+    fields: Vec<OneInputMultiOutputField>,
+    cmcd_outputs: Vec<Vec<CmcdOutputInfo>>,

    ignore_missing: bool,
 }

 impl CmcdProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        Self::update_output_keys(&mut fields);
-        self.fields = fields;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
    fn generate_key(prefix: &str, key: &str) -> String {
        format!("{}_{}", prefix, key)
    }

-    fn parse(prefix: &str, s: &str) -> Result<Map, String> {
-        let mut map = Map::default();
+    fn parse(&self, field_index: usize, s: &str) -> Result<Vec<(usize, Value)>, String> {
        let parts = s.split(',');
+        let mut result = Vec::new();
        for part in parts {
            let mut kv = part.split('=');
            let k = kv.next().ok_or(format!("{part} missing key in {s}"))?;
            let v = kv.next();

-            let key = Self::generate_key(prefix, k);
-            match k {
-                CMCD_KEY_BS | CMCD_KEY_SU => {
-                    map.insert(key, Value::Boolean(true));
+            for cmcd_key in self.cmcd_outputs[field_index].iter() {
+                if cmcd_key.key == k {
+                    let val = (cmcd_key.f)(s, k, v)?;
+                    result.push((cmcd_key.index, val));
                }
-                CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP
-                | CMCD_KEY_RTP | CMCD_KEY_TB => {
-                    let v = v.ok_or(format!("{k} missing value in {s}"))?;
-                    let val: i64 = v
-                        .parse()
-                        .map_err(|_| format!("failed to parse {v} as i64"))?;
-                    map.insert(key, Value::Int64(val));
-                }
-                CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID
-                | CMCD_KEY_ST | CMCD_KEY_V => {
-                    let v = v.ok_or(format!("{k} missing value in {s}"))?;
-                    map.insert(key, Value::String(v.to_string()));
-                }
-                CMCD_KEY_NOR => {
-                    let v = v.ok_or(format!("{k} missing value in {s}"))?;
-                    let val = match decode(v) {
-                        Ok(val) => val.to_string(),
-                        Err(_) => v.to_string(),
-                    };
-                    map.insert(key, Value::String(val));
-                }
-                CMCD_KEY_PR => {
-                    let v = v.ok_or(format!("{k} missing value in {s}"))?;
-                    let val: f64 = v
-                        .parse()
-                        .map_err(|_| format!("failed to parse {v} as f64"))?;
-                    map.insert(key, Value::Float64(val));
-                }
-                _ => match v {
-                    Some(v) => map.insert(key, Value::String(v.to_string())),
-                    None => map.insert(k, Value::Boolean(true)),
-                },
            }
        }

-        Ok(map)
-    }
-
-    fn process_field(&self, val: &str, field: &Field) -> Result<Map, String> {
-        let prefix = field.get_target_field();
-
-        Self::parse(prefix, val)
-    }
-
-    fn update_output_keys(fields: &mut Fields) {
-        for field in fields.iter_mut() {
-            for key in CMCD_KEYS.iter() {
-                field
-                    .output_fields_index_mapping
-                    .insert(Self::generate_key(field.get_target_field(), key), 0);
-            }
-        }
+        Ok(result)
    }
 }

-impl TryFrom<&yaml_rust::yaml::Hash> for CmcdProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for CmcdProcessorBuilder {
    type Error = String;

    fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = CmcdProcessor::default();
+        let mut fields = Fields::default();
+        let mut ignore_missing = false;

        for (k, v) in value.iter() {
            let key = k
@@ -199,25 +320,40 @@ impl TryFrom<&yaml_rust::yaml::Hash> for CmcdProcessor {
                .ok_or(format!("key must be a string, but got {k:?}"))?;
            match key {
                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                }
                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                }

                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                }

                _ => {}
            }
        }

-        Ok(processor)
+        let output_keys = fields
+            .iter()
+            .flat_map(|f| {
+                CMCD_KEYS
+                    .iter()
+                    .map(|cmcd_key| generate_key(f.target_or_input_field(), cmcd_key))
+            })
+            .collect();
+
+        let builder = CmcdProcessorBuilder {
+            fields,
+            output_keys,
+            ignore_missing,
+        };
+
+        Ok(builder)
    }
 }

-impl crate::etl::processor::Processor for CmcdProcessor {
+impl Processor for CmcdProcessor {
    fn kind(&self) -> &str {
        PROCESSOR_CMCD
    }
@@ -226,51 +362,14 @@ impl crate::etl::processor::Processor for CmcdProcessor {
        self.ignore_missing
    }

-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|field| {
-                field
-                    .target_field
-                    .clone()
-                    .unwrap_or_else(|| field.get_field_name().to_string())
-            })
-            .flat_map(|keys| {
-                CMCD_KEYS
-                    .iter()
-                    .map(move |key| format!("{}_{}", keys, *key))
-            })
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => self.process_field(val, field),
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
-        for field in self.fields.iter() {
-            match val.get(field.input_field.index) {
+        for (field_index, field) in self.fields.iter().enumerate() {
+            let field_value_index = field.input_index();
+            match val.get(field_value_index) {
                Some(Value::String(v)) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let map = self.process_field(v, field)?;
-                    for (k, v) in map.values.into_iter() {
-                        if let Some(index) = field.output_fields_index_mapping.get(&k) {
-                            val[*index] = v;
-                        }
+                    let result_list = self.parse(field_index, v)?;
+                    for (output_index, v) in result_list {
+                        val[output_index] = v;
                    }
                }
                Some(Value::Null) | None => {
@@ -278,7 +377,7 @@ impl crate::etl::processor::Processor for CmcdProcessor {
                        return Err(format!(
                            "{} processor: missing field: {}",
                            self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                        ));
                    }
                }
@@ -299,7 +398,8 @@ mod tests {
    use ahash::HashMap;
    use urlencoding::decode;

-    use super::CmcdProcessor;
+    use super::{CmcdProcessorBuilder, CMCD_KEYS};
+    use crate::etl::field::{Field, Fields};
    use crate::etl::value::{Map, Value};

    #[test]
@@ -329,6 +429,7 @@ mod tests {
                ],
            ),
            (
+                // we not resolve `b` key
                "b%2Crtp%3D15000%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
                vec![
                    (
@@ -336,7 +437,6 @@ mod tests {
                        Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
                    ),
                    ("prefix_rtp", Value::Int64(15000)),
-                    ("b", Value::Boolean(true)),
                ],
            ),
            (
@@ -347,16 +447,17 @@ mod tests {
                ],
            ),
            (
+                // we not resolve custom key
                "d%3D4004%2Ccom.example-myNumericKey%3D500%2Ccom.examplemyStringKey%3D%22myStringValue%22",
                vec![
-                    (
-                        "prefix_com.example-myNumericKey",
-                        Value::String("500".into()),
-                    ),
-                    (
-                        "prefix_com.examplemyStringKey",
-                        Value::String("\"myStringValue\"".into()),
-                    ),
+                    // (
+                    //     "prefix_com.example-myNumericKey",
+                    //     Value::String("500".into()),
+                    // ),
+                    // (
+                    //     "prefix_com.examplemyStringKey",
+                    //     Value::String("\"myStringValue\"".into()),
+                    // ),
                    ("prefix_d", Value::Int64(4004)),
                ],
            ),
@@ -431,6 +532,24 @@ mod tests {
            ),
        ];

+        let field = Field::new("prefix", None);
+
+        let output_keys = CMCD_KEYS
+            .iter()
+            .map(|k| format!("prefix_{}", k))
+            .collect::<Vec<String>>();
+
+        let mut intermediate_keys = vec!["prefix".to_string()];
+        intermediate_keys.append(&mut (output_keys.clone()));
+
+        let builder = CmcdProcessorBuilder {
+            fields: Fields::new(vec![field]),
+            output_keys: output_keys.iter().map(|s| s.to_string()).collect(),
+            ignore_missing: false,
+        };
+
+        let processor = builder.build(&intermediate_keys).unwrap();
+
        for (s, vec) in ss.into_iter() {
            let decoded = decode(s).unwrap().to_string();

@@ -440,7 +559,12 @@ mod tests {
                .collect::<HashMap<String, Value>>();
            let expected = Map { values };

-            let actual = CmcdProcessor::parse("prefix", &decoded).unwrap();
+            let actual = processor.parse(0, &decoded).unwrap();
+            let actual = actual
+                .into_iter()
+                .map(|(index, value)| (intermediate_keys[index].clone(), value))
+                .collect::<HashMap<String, Value>>();
+            let actual = Map { values: actual };
            assert_eq!(actual, expected);
        }
    }
--- a/src/pipeline/src/etl/processor/csv.rs
+++ b/src/pipeline/src/etl/processor/csv.rs
@@ -14,17 +14,18 @@

 // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/csv-processor.html

-use ahash::{HashMap, HashSet};
+use ahash::HashSet;
 use csv::{ReaderBuilder, Trim};
 use itertools::EitherOrBoth::{Both, Left, Right};
 use itertools::Itertools;

-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, InputFieldInfo, OneInputMultiOutputField};
+use crate::etl::find_key_index;
 use crate::etl::processor::{
-    yaml_bool, yaml_field, yaml_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
-    IGNORE_MISSING_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
+    ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;

 pub(crate) const PROCESSOR_CSV: &str = "csv";

@@ -32,18 +33,78 @@ const SEPARATOR_NAME: &str = "separator";
 const QUOTE_NAME: &str = "quote";
 const TRIM_NAME: &str = "trim";
 const EMPTY_VALUE_NAME: &str = "empty_value";
+const TARGET_FIELDS: &str = "target_fields";
+
+#[derive(Debug, Default)]
+pub struct CsvProcessorBuilder {
+    reader: ReaderBuilder,
+
+    fields: Fields,
+    ignore_missing: bool,
+
+    // Value used to fill empty fields, empty fields will be skipped if this is not provided.
+    empty_value: Option<String>,
+    target_fields: Vec<String>,
+    // description
+    // if
+    // ignore_failure
+    // on_failure
+    // tag
+}
+
+impl CsvProcessorBuilder {
+    fn build(self, intermediate_keys: &[String]) -> Result<CsvProcessor, String> {
+        let mut real_fields = vec![];
+
+        for field in self.fields {
+            let input_index = find_key_index(intermediate_keys, field.input_field(), "csv")?;
+
+            let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
+            let real_field = OneInputMultiOutputField::new(input_field_info, None);
+            real_fields.push(real_field);
+        }
+
+        let output_index_info = self
+            .target_fields
+            .iter()
+            .map(|f| find_key_index(intermediate_keys, f, "csv"))
+            .collect::<Result<Vec<_>, String>>()?;
+        Ok(CsvProcessor {
+            reader: self.reader,
+            fields: real_fields,
+            ignore_missing: self.ignore_missing,
+            empty_value: self.empty_value,
+            output_index_info,
+        })
+    }
+}
+
+impl ProcessorBuilder for CsvProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.target_fields.iter().map(|s| s.as_str()).collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Csv)
+    }
+}

 /// only support string value
 #[derive(Debug)]
 pub struct CsvProcessor {
    reader: ReaderBuilder,

-    fields: Fields,
+    fields: Vec<OneInputMultiOutputField>,

    ignore_missing: bool,

    // Value used to fill empty fields, empty fields will be skipped if this is not provided.
    empty_value: Option<String>,
+    output_index_info: Vec<usize>,
    // description
    // if
    // ignore_failure
@@ -52,81 +113,19 @@ pub struct CsvProcessor {
 }

 impl CsvProcessor {
-    fn new() -> Self {
-        let mut reader = ReaderBuilder::new();
-        reader.has_headers(false);
-
-        Self {
-            reader,
-            fields: Fields::default(),
-            ignore_missing: false,
-            empty_value: None,
-        }
-    }
-
-    fn with_fields(&mut self, fields: Fields) {
-        self.fields = fields;
-    }
-
-    fn try_separator(&mut self, separator: String) -> Result<(), String> {
-        if separator.len() != 1 {
-            Err(format!(
-                "'{}' must be a single character, but got '{}'",
-                SEPARATOR_NAME, separator
-            ))
-        } else {
-            self.reader.delimiter(separator.as_bytes()[0]);
-            Ok(())
-        }
-    }
-
-    fn try_quote(&mut self, quote: String) -> Result<(), String> {
-        if quote.len() != 1 {
-            Err(format!(
-                "'{}' must be a single character, but got '{}'",
-                QUOTE_NAME, quote
-            ))
-        } else {
-            self.reader.quote(quote.as_bytes()[0]);
-            Ok(())
-        }
-    }
-
-    fn with_trim(&mut self, trim: bool) {
-        if trim {
-            self.reader.trim(Trim::All);
-        } else {
-            self.reader.trim(Trim::None);
-        }
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
-    fn with_empty_value(&mut self, empty_value: String) {
-        self.empty_value = Some(empty_value);
-    }
-
    // process the csv format string to a map with target_fields as keys
-    fn process_field(&self, val: &str, field: &Field) -> Result<Map, String> {
+    fn process(&self, val: &str) -> Result<Vec<(usize, Value)>, String> {
        let mut reader = self.reader.from_reader(val.as_bytes());

        if let Some(result) = reader.records().next() {
            let record: csv::StringRecord = result.map_err(|e| e.to_string())?;

-            let values: HashMap<String, Value> = field
-                .target_fields
-                .as_ref()
-                .ok_or(format!(
-                    "target fields must be set after '{}'",
-                    field.get_field_name()
-                ))?
+            let values: Vec<(usize, Value)> = self
+                .output_index_info
                .iter()
-                .map(|f| f.to_string())
                .zip_longest(record.iter())
                .filter_map(|zipped| match zipped {
-                    Both(target_field, val) => Some((target_field, Value::String(val.into()))),
+                    Both(target_field, val) => Some((*target_field, Value::String(val.into()))),
                    // if target fields are more than extracted fields, fill the rest with empty value
                    Left(target_field) => {
                        let value = self
@@ -134,69 +133,101 @@ impl CsvProcessor {
                            .as_ref()
                            .map(|s| Value::String(s.clone()))
                            .unwrap_or(Value::Null);
-                        Some((target_field, value))
+                        Some((*target_field, value))
                    }
                    // if extracted fields are more than target fields, ignore the rest
                    Right(_) => None,
                })
                .collect();

-            Ok(Map { values })
+            Ok(values)
        } else {
            Err("expected at least one record from csv format, but got none".into())
        }
    }
-
-    fn update_output_keys(&mut self) {
-        self.fields.iter_mut().for_each(|f| {
-            if let Some(tfs) = f.target_fields.as_ref() {
-                tfs.iter().for_each(|tf| {
-                    if !tf.is_empty() {
-                        f.output_fields_index_mapping.insert(tf.to_string(), 0);
-                    }
-                });
-            }
-        })
-    }
 }

-impl TryFrom<&yaml_rust::yaml::Hash> for CsvProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for CsvProcessorBuilder {
    type Error = String;

    fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = CsvProcessor::new();
+        let mut reader = ReaderBuilder::new();
+        reader.has_headers(false);
+
+        let mut fields = Fields::default();
+        let mut ignore_missing = false;
+        let mut empty_value = None;
+        let mut target_fields = vec![];
+
        for (k, v) in hash {
            let key = k
                .as_str()
                .ok_or(format!("key must be a string, but got {k:?}"))?;
            match key {
                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                }
                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
+                }
+                TARGET_FIELDS => {
+                    target_fields = yaml_string(v, TARGET_FIELDS)?
+                        .split(',')
+                        .map(|s| s.trim().to_string())
+                        .filter(|s| !s.is_empty())
+                        .collect();
                }
                SEPARATOR_NAME => {
-                    processor.try_separator(yaml_string(v, SEPARATOR_NAME)?)?;
+                    let separator = yaml_string(v, SEPARATOR_NAME)?;
+                    if separator.len() != 1 {
+                        return Err(format!(
+                            "'{}' must be a single character, but got '{}'",
+                            SEPARATOR_NAME, separator
+                        ));
+                    } else {
+                        reader.delimiter(separator.as_bytes()[0]);
+                    }
                }
                QUOTE_NAME => {
-                    processor.try_quote(yaml_string(v, QUOTE_NAME)?)?;
+                    let quote = yaml_string(v, QUOTE_NAME)?;
+                    if quote.len() != 1 {
+                        return Err(format!(
+                            "'{}' must be a single character, but got '{}'",
+                            QUOTE_NAME, quote
+                        ));
+                    } else {
+                        reader.quote(quote.as_bytes()[0]);
+                    }
                }
                TRIM_NAME => {
-                    processor.with_trim(yaml_bool(v, TRIM_NAME)?);
+                    let trim = yaml_bool(v, TRIM_NAME)?;
+                    if trim {
+                        reader.trim(Trim::All);
+                    } else {
+                        reader.trim(Trim::None);
+                    }
                }
                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                }
                EMPTY_VALUE_NAME => {
-                    processor.with_empty_value(yaml_string(v, EMPTY_VALUE_NAME)?);
+                    empty_value = Some(yaml_string(v, EMPTY_VALUE_NAME)?);
                }

                _ => {}
            }
        }
-        processor.update_output_keys();
-        Ok(processor)
+        let builder = {
+            CsvProcessorBuilder {
+                reader,
+                fields,
+                ignore_missing,
+                empty_value,
+                target_fields,
+            }
+        };
+
+        Ok(builder)
    }
 }

@@ -209,41 +240,14 @@ impl Processor for CsvProcessor {
        self.ignore_missing
    }

-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .flat_map(|f| f.target_fields.clone().unwrap_or_default())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => self.process_field(val, field),
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
        for field in self.fields.iter() {
-            match val.get(field.input_field.index) {
+            let index = field.input_index();
+            match val.get(index) {
                Some(Value::String(v)) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let map = self.process_field(v, field)?;
-                    for (k, v) in map.values.into_iter() {
-                        if let Some(index) = field.output_fields_index_mapping.get(&k) {
-                            val[*index] = v;
-                        }
+                    let resule_list = self.process(v)?;
+                    for (k, v) in resule_list {
+                        val[k] = v;
                    }
                }
                Some(Value::Null) | None => {
@@ -251,7 +255,7 @@ impl Processor for CsvProcessor {
                        return Err(format!(
                            "{} processor: missing field: {}",
                            self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                        ));
                    }
                }
@@ -267,116 +271,140 @@ impl Processor for CsvProcessor {
    }
 }

-// TODO(yuanbohan): more test cases
 #[cfg(test)]
 mod tests {
+
    use ahash::HashMap;

-    use super::{CsvProcessor, Value};
-    use crate::etl::field::Fields;
-    use crate::etl::processor::Processor;
-    use crate::etl::value::Map;
+    use super::Value;
+    use crate::etl::processor::csv::CsvProcessorBuilder;

    #[test]
    fn test_equal_length() {
-        let mut processor = CsvProcessor::new();
-        let field = "data,, a, b".parse().unwrap();
-        processor.with_fields(Fields::one(field));
+        let mut reader = csv::ReaderBuilder::new();
+        reader.has_headers(false);
+        let builder = CsvProcessorBuilder {
+            reader,
+            target_fields: vec!["a".into(), "b".into()],
+            ..Default::default()
+        };

-        let values: HashMap<String, Value> = [("data".into(), Value::String("1,2".into()))]
+        let intermediate_keys = vec!["data".into(), "a".into(), "b".into()];
+
+        let processor = builder.build(&intermediate_keys).unwrap();
+        let result = processor
+            .process("1,2")
+            .unwrap()
            .into_iter()
-            .collect();
-        let mut m = Map { values };
-
-        processor.exec_map(&mut m).unwrap();
+            .map(|(k, v)| (intermediate_keys[k].clone(), v))
+            .collect::<HashMap<_, _>>();

        let values = [
-            ("data".into(), Value::String("1,2".into())),
            ("a".into(), Value::String("1".into())),
            ("b".into(), Value::String("2".into())),
        ]
        .into_iter()
-        .collect();
-        let expected = Map { values };
+        .collect::<HashMap<_, _>>();

-        assert_eq!(expected, m);
+        assert_eq!(result, values);
    }

    // test target_fields length larger than the record length
    #[test]
    fn test_target_fields_has_more_length() {
-        let values = [("data".into(), Value::String("1,2".into()))]
-            .into_iter()
-            .collect();
-        let mut input = Map { values };
-
        // with no empty value
        {
-            let mut processor = CsvProcessor::new();
-            let field = "data,, a,b,c".parse().unwrap();
-            processor.with_fields(Fields::one(field));
+            let mut reader = csv::ReaderBuilder::new();
+            reader.has_headers(false);
+            let builder = CsvProcessorBuilder {
+                reader,
+                target_fields: vec!["a".into(), "b".into(), "c".into()],
+                ..Default::default()
+            };

-            processor.exec_map(&mut input).unwrap();
+            let intermediate_keys = vec!["data".into(), "a".into(), "b".into(), "c".into()];
+
+            let processor = builder.build(&intermediate_keys).unwrap();
+            let result = processor
+                .process("1,2")
+                .unwrap()
+                .into_iter()
+                .map(|(k, v)| (intermediate_keys[k].clone(), v))
+                .collect::<HashMap<_, _>>();

            let values = [
-                ("data".into(), Value::String("1,2".into())),
                ("a".into(), Value::String("1".into())),
                ("b".into(), Value::String("2".into())),
                ("c".into(), Value::Null),
            ]
            .into_iter()
-            .collect();
-            let expected = Map { values };
+            .collect::<HashMap<_, _>>();

-            assert_eq!(expected, input);
+            assert_eq!(result, values);
        }

        // with empty value
        {
-            let mut processor = CsvProcessor::new();
-            let field = "data,, a,b,c".parse().unwrap();
-            processor.with_fields(Fields::one(field));
-            processor.with_empty_value("default".into());
+            let mut reader = csv::ReaderBuilder::new();
+            reader.has_headers(false);
+            let builder = CsvProcessorBuilder {
+                reader,
+                target_fields: vec!["a".into(), "b".into(), "c".into()],
+                empty_value: Some("default".into()),
+                ..Default::default()
+            };

-            processor.exec_map(&mut input).unwrap();
+            let intermediate_keys = vec!["data".into(), "a".into(), "b".into(), "c".into()];
+
+            let processor = builder.build(&intermediate_keys).unwrap();
+            let result = processor
+                .process("1,2")
+                .unwrap()
+                .into_iter()
+                .map(|(k, v)| (intermediate_keys[k].clone(), v))
+                .collect::<HashMap<_, _>>();

            let values = [
-                ("data".into(), Value::String("1,2".into())),
                ("a".into(), Value::String("1".into())),
                ("b".into(), Value::String("2".into())),
                ("c".into(), Value::String("default".into())),
            ]
            .into_iter()
            .collect();
-            let expected = Map { values };

-            assert_eq!(expected, input);
+            assert_eq!(result, values);
        }
    }

    // test record has larger length
    #[test]
    fn test_target_fields_has_less_length() {
-        let values = [("data".into(), Value::String("1,2,3".into()))]
+        let mut reader = csv::ReaderBuilder::new();
+        reader.has_headers(false);
+        let builder = CsvProcessorBuilder {
+            reader,
+            target_fields: vec!["a".into(), "b".into()],
+            empty_value: Some("default".into()),
+            ..Default::default()
+        };
+
+        let intermediate_keys = vec!["data".into(), "a".into(), "b".into()];
+
+        let processor = builder.build(&intermediate_keys).unwrap();
+        let result = processor
+            .process("1,2")
+            .unwrap()
            .into_iter()
-            .collect();
-        let mut input = Map { values };
-
-        let mut processor = CsvProcessor::new();
-        let field = "data,,a,b".parse().unwrap();
-        processor.with_fields(Fields::one(field));
-
-        processor.exec_map(&mut input).unwrap();
+            .map(|(k, v)| (intermediate_keys[k].clone(), v))
+            .collect::<HashMap<_, _>>();

        let values = [
-            ("data".into(), Value::String("1,2,3".into())),
            ("a".into(), Value::String("1".into())),
            ("b".into(), Value::String("2".into())),
        ]
        .into_iter()
        .collect();
-        let expected = Map { values };

-        assert_eq!(expected, input);
+        assert_eq!(result, values);
    }
 }
--- a/src/pipeline/src/etl/processor/date.rs
+++ b/src/pipeline/src/etl/processor/date.rs
@@ -19,12 +19,12 @@ use chrono::{DateTime, NaiveDateTime};
 use chrono_tz::Tz;
 use lazy_static::lazy_static;

-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, yaml_strings,
-    Processor, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor,
+    ProcessorBuilder, ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
 };
-use crate::etl::value::{Map, Timestamp, Value};
+use crate::etl::value::{Timestamp, Value};

 pub(crate) const PROCESSOR_DATE: &str = "date";

@@ -57,9 +57,15 @@ lazy_static! {
                .collect();
 }

-#[derive(Debug, Default)]
+#[derive(Debug)]
 struct Formats(Vec<Arc<String>>);

+impl Default for Formats {
+    fn default() -> Self {
+        Formats(DEFAULT_FORMATS.clone())
+    }
+}
+
 impl Formats {
    fn new(mut formats: Vec<Arc<String>>) -> Self {
        formats.sort();
@@ -76,16 +82,119 @@ impl std::ops::Deref for Formats {
    }
 }

+#[derive(Debug, Default)]
+pub struct DateProcessorBuilder {
+    fields: Fields,
+    formats: Formats,
+    timezone: Option<Arc<String>>,
+    locale: Option<Arc<String>>,
+    ignore_missing: bool,
+}
+
+impl ProcessorBuilder for DateProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Date)
+    }
+}
+
+impl DateProcessorBuilder {
+    pub fn build(self, intermediate_keys: &[String]) -> Result<DateProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "date",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+        Ok(DateProcessor {
+            fields: real_fields,
+            formats: self.formats,
+            timezone: self.timezone,
+            locale: self.locale,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+impl TryFrom<&yaml_rust::yaml::Hash> for DateProcessorBuilder {
+    type Error = String;
+
+    fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
+        let mut fields = Fields::default();
+        let mut formats = Formats::default();
+        let mut timezone = None;
+        let mut locale = None;
+        let mut ignore_missing = false;
+
+        for (k, v) in hash {
+            let key = k
+                .as_str()
+                .ok_or(format!("key must be a string, but got {k:?}"))?;
+
+            match key {
+                FIELD_NAME => {
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
+                }
+                FIELDS_NAME => {
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
+                }
+
+                FORMATS_NAME => {
+                    let format_strs = yaml_strings(v, FORMATS_NAME)?;
+                    if format_strs.is_empty() {
+                        formats = Formats::new(DEFAULT_FORMATS.clone());
+                    } else {
+                        formats = Formats::new(format_strs.into_iter().map(Arc::new).collect());
+                    }
+                }
+                TIMEZONE_NAME => {
+                    timezone = Some(Arc::new(yaml_string(v, TIMEZONE_NAME)?));
+                }
+                LOCALE_NAME => {
+                    locale = Some(Arc::new(yaml_string(v, LOCALE_NAME)?));
+                }
+                IGNORE_MISSING_NAME => {
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
+                }
+
+                _ => {}
+            }
+        }
+
+        let builder = DateProcessorBuilder {
+            fields,
+            formats,
+            timezone,
+            locale,
+            ignore_missing,
+        };
+
+        Ok(builder)
+    }
+}
+
 /// deprecated it should be removed in the future
 /// Reserved for compatibility only
 #[derive(Debug, Default)]
 pub struct DateProcessor {
-    fields: Fields,
-
+    fields: Vec<OneInputOneOutputField>,
    formats: Formats,
    timezone: Option<Arc<String>>,
    locale: Option<Arc<String>>, // to support locale
-    output_format: Option<Arc<String>>,

    ignore_missing: bool,
    // description
@@ -96,43 +205,6 @@ pub struct DateProcessor {
 }

 impl DateProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields
-    }
-
-    fn with_formats(&mut self, v: Option<Vec<Arc<String>>>) {
-        let v = match v {
-            Some(v) if !v.is_empty() => v,
-            _ => DEFAULT_FORMATS.clone(),
-        };
-
-        let formats = Formats::new(v);
-        self.formats = formats;
-    }
-
-    fn with_timezone(&mut self, timezone: String) {
-        if !timezone.is_empty() {
-            self.timezone = Some(Arc::new(timezone));
-        }
-    }
-
-    fn with_locale(&mut self, locale: String) {
-        if !locale.is_empty() {
-            self.locale = Some(Arc::new(locale));
-        }
-    }
-
-    fn with_output_format(&mut self, output_format: String) {
-        if !output_format.is_empty() {
-            self.output_format = Some(Arc::new(output_format));
-        }
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
    fn parse(&self, val: &str) -> Result<Timestamp, String> {
        let mut tz = Tz::UTC;
        if let Some(timezone) = &self.timezone {
@@ -147,61 +219,6 @@ impl DateProcessor {

        Err(format!("{} processor: failed to parse {val}", self.kind(),))
    }
-
-    fn process_field(&self, val: &str, field: &Field) -> Result<Map, String> {
-        let key = field.get_target_field();
-
-        Ok(Map::one(key, Value::Timestamp(self.parse(val)?)))
-    }
-}
-
-impl TryFrom<&yaml_rust::yaml::Hash> for DateProcessor {
-    type Error = String;
-
-    fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = DateProcessor::default();
-
-        let mut formats_opt = None;
-
-        for (k, v) in hash {
-            let key = k
-                .as_str()
-                .ok_or(format!("key must be a string, but got {k:?}"))?;
-
-            match key {
-                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
-                }
-                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
-                }
-
-                FORMATS_NAME => {
-                    let formats = yaml_strings(v, FORMATS_NAME)?;
-                    formats_opt = Some(formats.into_iter().map(Arc::new).collect());
-                }
-                TIMEZONE_NAME => {
-                    processor.with_timezone(yaml_string(v, TIMEZONE_NAME)?);
-                }
-                LOCALE_NAME => {
-                    processor.with_locale(yaml_string(v, LOCALE_NAME)?);
-                }
-                OUTPUT_FORMAT_NAME => {
-                    processor.with_output_format(yaml_string(v, OUTPUT_FORMAT_NAME)?);
-                }
-
-                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
-                }
-
-                _ => {}
-            }
-        }
-
-        processor.with_formats(formats_opt);
-
-        Ok(processor)
-    }
 }

 impl Processor for DateProcessor {
@@ -213,53 +230,21 @@ impl Processor for DateProcessor {
        self.ignore_missing
    }

-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(s) => self.process_field(s, field),
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
-        for field in self.fields().iter() {
-            let index = field.input_field.index;
+        for field in self.fields.iter() {
+            let index = field.input_index();
            match val.get(index) {
                Some(Value::String(s)) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut map = self.process_field(s, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let timestamp = self.parse(s)?;
+                    let output_index = field.output_index();
+                    val[output_index] = Value::Timestamp(timestamp);
                }
                Some(Value::Null) | None => {
                    if !self.ignore_missing {
                        return Err(format!(
                            "{} processor: missing field: {}",
                            self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                        ));
                    }
                }
@@ -318,8 +303,7 @@ mod tests {

    #[test]
    fn test_parse() {
-        let mut processor = DateProcessor::default();
-        processor.with_formats(None);
+        let processor = DateProcessor::default();

        let values: Vec<&str> = vec![
            "2014-5-17T12:34:56",
@@ -340,7 +324,6 @@ mod tests {

    #[test]
    fn test_parse_with_formats() {
-        let mut processor = DateProcessor::default();
        let formats = vec![
            "%Y-%m-%dT%H:%M:%S%:z",
            "%Y-%m-%dT%H:%M:%S%.3f%:z",
@@ -349,8 +332,11 @@ mod tests {
        ]
        .into_iter()
        .map(|s| Arc::new(s.to_string()))
-        .collect();
-        processor.with_formats(Some(formats));
+        .collect::<Vec<_>>();
+        let processor = DateProcessor {
+            formats: super::Formats(formats),
+            ..Default::default()
+        };

        let values: Vec<&str> = vec![
            "2014-5-17T12:34:56",
@@ -371,9 +357,10 @@ mod tests {

    #[test]
    fn test_parse_with_timezone() {
-        let mut processor = DateProcessor::default();
-        processor.with_formats(None);
-        processor.with_timezone("Asia/Tokyo".to_string());
+        let processor = DateProcessor {
+            timezone: Some(Arc::new("Asia/Tokyo".to_string())),
+            ..Default::default()
+        };

        let values: Vec<&str> = vec![
            "2014-5-17T12:34:56",
--- a/src/pipeline/src/etl/processor/dissect.rs
+++ b/src/pipeline/src/etl/processor/dissect.rs
--- a/src/pipeline/src/etl/processor/epoch.rs
+++ b/src/pipeline/src/etl/processor/epoch.rs
@@ -14,17 +14,17 @@

 use ahash::HashSet;

-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, Processor,
-    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
+    ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
 };
 use crate::etl::value::time::{
    MICROSECOND_RESOLUTION, MICRO_RESOLUTION, MILLISECOND_RESOLUTION, MILLI_RESOLUTION,
    MS_RESOLUTION, NANOSECOND_RESOLUTION, NANO_RESOLUTION, NS_RESOLUTION, SECOND_RESOLUTION,
    SEC_RESOLUTION, S_RESOLUTION, US_RESOLUTION,
 };
-use crate::etl::value::{Map, Timestamp, Value};
+use crate::etl::value::{Timestamp, Value};

 pub(crate) const PROCESSOR_EPOCH: &str = "epoch";
 const RESOLUTION_NAME: &str = "resolution";
@@ -52,12 +52,56 @@ impl TryFrom<&str> for Resolution {
    }
 }

+#[derive(Debug, Default)]
+pub struct EpochProcessorBuilder {
+    fields: Fields,
+    resolution: Resolution,
+    ignore_missing: bool,
+}
+
+impl ProcessorBuilder for EpochProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Epoch)
+    }
+}
+
+impl EpochProcessorBuilder {
+    pub fn build(self, intermediate_keys: &[String]) -> Result<EpochProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "epoch",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+        Ok(EpochProcessor {
+            fields: real_fields,
+            resolution: self.resolution,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
 /// support string, integer, float, time, epoch
 /// deprecated it should be removed in the future
 /// Reserved for compatibility only
 #[derive(Debug, Default)]
 pub struct EpochProcessor {
-    fields: Fields,
+    fields: Vec<OneInputOneOutputField>,
    resolution: Resolution,
    ignore_missing: bool,
    // description
@@ -68,19 +112,6 @@ pub struct EpochProcessor {
 }

 impl EpochProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields
-    }
-
-    fn with_resolution(&mut self, resolution: Resolution) {
-        self.resolution = resolution;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
    fn parse(&self, val: &Value) -> Result<Timestamp, String> {
        let t: i64 = match val {
            Value::String(s) => s
@@ -117,19 +148,15 @@ impl EpochProcessor {
            Resolution::Nano => Ok(Timestamp::Nanosecond(t)),
        }
    }
-
-    fn process_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        let key = field.get_target_field();
-
-        Ok(Map::one(key, Value::Timestamp(self.parse(val)?)))
-    }
 }

-impl TryFrom<&yaml_rust::yaml::Hash> for EpochProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for EpochProcessorBuilder {
    type Error = String;

    fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = EpochProcessor::default();
+        let mut fields = Fields::default();
+        let mut resolution = Resolution::default();
+        let mut ignore_missing = false;

        for (k, v) in hash {
            let key = k
@@ -138,24 +165,29 @@ impl TryFrom<&yaml_rust::yaml::Hash> for EpochProcessor {

            match key {
                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                }
                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                }
                RESOLUTION_NAME => {
                    let s = yaml_string(v, RESOLUTION_NAME)?.as_str().try_into()?;
-                    processor.with_resolution(s);
+                    resolution = s;
                }
                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                }

                _ => {}
            }
        }
+        let builder = EpochProcessorBuilder {
+            fields,
+            resolution,
+            ignore_missing,
+        };

-        Ok(processor)
+        Ok(builder)
    }
 }

@@ -168,49 +200,23 @@ impl Processor for EpochProcessor {
        self.ignore_missing
    }

-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        self.process_field(val, field)
-    }
-
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
        for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
            match val.get(index) {
                Some(Value::Null) | None => {
                    if !self.ignore_missing {
                        return Err(format!(
                            "{} processor: missing field: {}",
                            self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                        ));
                    }
                }
                Some(v) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut map = self.process_field(v, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let timestamp = self.parse(v)?;
+                    let output_index = field.output_index();
+                    val[output_index] = Value::Timestamp(timestamp);
                }
            }
        }
@@ -225,8 +231,10 @@ mod tests {

    #[test]
    fn test_parse_epoch() {
-        let mut processor = EpochProcessor::default();
-        processor.with_resolution(super::Resolution::Second);
+        let processor = EpochProcessor {
+            resolution: super::Resolution::Second,
+            ..Default::default()
+        };

        let values = [
            Value::String("1573840000".into()),
--- a/src/pipeline/src/etl/processor/gsub.rs
+++ b/src/pipeline/src/etl/processor/gsub.rs
@@ -15,45 +15,43 @@
 use ahash::HashSet;
 use regex::Regex;

-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, Processor,
-    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
+    ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
 };
-use crate::etl::value::{Array, Map, Value};
+use crate::etl::value::Value;

 pub(crate) const PROCESSOR_GSUB: &str = "gsub";

 const REPLACEMENT_NAME: &str = "replacement";

-/// A processor to replace all matches of a pattern in string by a replacement, only support string value, and array string value
 #[derive(Debug, Default)]
-pub struct GsubProcessor {
+pub struct GsubProcessorBuilder {
    fields: Fields,
    pattern: Option<Regex>,
    replacement: Option<String>,
    ignore_missing: bool,
 }

-impl GsubProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields;
+impl ProcessorBuilder for GsubProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
    }

-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
    }

-    fn try_pattern(&mut self, pattern: &str) -> Result<(), String> {
-        self.pattern = Some(Regex::new(pattern).map_err(|e| e.to_string())?);
-        Ok(())
-    }
-
-    fn with_replacement(&mut self, replacement: impl Into<String>) {
-        self.replacement = Some(replacement.into());
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Gsub)
    }
+}

+impl GsubProcessorBuilder {
    fn check(self) -> Result<Self, String> {
        if self.pattern.is_none() {
            return Err("pattern is required".to_string());
@@ -66,7 +64,49 @@ impl GsubProcessor {
        Ok(self)
    }

-    fn process_string_field(&self, val: &str, field: &Field) -> Result<Map, String> {
+    fn build(self, intermediate_keys: &[String]) -> Result<GsubProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "gsub",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+        Ok(GsubProcessor {
+            fields: real_fields,
+            pattern: self.pattern,
+            replacement: self.replacement,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+/// A processor to replace all matches of a pattern in string by a replacement, only support string value, and array string value
+#[derive(Debug, Default)]
+pub struct GsubProcessor {
+    fields: Vec<OneInputOneOutputField>,
+    pattern: Option<Regex>,
+    replacement: Option<String>,
+    ignore_missing: bool,
+}
+
+impl GsubProcessor {
+    fn check(self) -> Result<Self, String> {
+        if self.pattern.is_none() {
+            return Err("pattern is required".to_string());
+        }
+
+        if self.replacement.is_none() {
+            return Err("replacement is required".to_string());
+        }
+
+        Ok(self)
+    }
+
+    fn process_string(&self, val: &str) -> Result<Value, String> {
        let replacement = self.replacement.as_ref().unwrap();
        let new_val = self
            .pattern
@@ -76,42 +116,28 @@ impl GsubProcessor {
            .to_string();
        let val = Value::String(new_val);

-        let key = field.get_target_field();
-
-        Ok(Map::one(key, val))
+        Ok(val)
    }

-    fn process_array_field(&self, arr: &Array, field: &Field) -> Result<Map, String> {
-        let key = field.get_target_field();
-
-        let re = self.pattern.as_ref().unwrap();
-        let replacement = self.replacement.as_ref().unwrap();
-
-        let mut result = Array::default();
-        for val in arr.iter() {
-            match val {
-                Value::String(haystack) => {
-                    let new_val = re.replace_all(haystack, replacement).to_string();
-                    result.push(Value::String(new_val));
-                }
-                _ => {
-                    return Err(format!(
-                        "{} processor: expect string or array string, but got {val:?}",
-                        self.kind()
-                    ))
-                }
-            }
+    fn process(&self, val: &Value) -> Result<Value, String> {
+        match val {
+            Value::String(val) => self.process_string(val),
+            _ => Err(format!(
+                "{} processor: expect string or array string, but got {val:?}",
+                self.kind()
+            )),
        }
-
-        Ok(Map::one(key, Value::Array(result)))
    }
 }

-impl TryFrom<&yaml_rust::yaml::Hash> for GsubProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for GsubProcessorBuilder {
    type Error = String;

    fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = GsubProcessor::default();
+        let mut fields = Fields::default();
+        let mut ignore_missing = false;
+        let mut pattern = None;
+        let mut replacement = None;

        for (k, v) in value.iter() {
            let key = k
@@ -119,27 +145,36 @@ impl TryFrom<&yaml_rust::yaml::Hash> for GsubProcessor {
                .ok_or(format!("key must be a string, but got {k:?}"))?;
            match key {
                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                }
                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                }
                PATTERN_NAME => {
-                    processor.try_pattern(&yaml_string(v, PATTERN_NAME)?)?;
+                    let pattern_str = yaml_string(v, PATTERN_NAME)?;
+                    pattern = Some(Regex::new(&pattern_str).map_err(|e| e.to_string())?);
                }
                REPLACEMENT_NAME => {
-                    processor.with_replacement(yaml_string(v, REPLACEMENT_NAME)?);
+                    let replacement_str = yaml_string(v, REPLACEMENT_NAME)?;
+                    replacement = Some(replacement_str);
                }

                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                }

                _ => {}
            }
        }

-        processor.check()
+        let builder = GsubProcessorBuilder {
+            fields,
+            pattern,
+            replacement,
+            ignore_missing,
+        };
+
+        builder.check()
    }
 }

@@ -152,56 +187,23 @@ impl crate::etl::processor::Processor for GsubProcessor {
        self.ignore_missing
    }

-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => self.process_string_field(val, field),
-            Value::Array(arr) => self.process_array_field(arr, field),
-            _ => Err(format!(
-                "{} processor: expect string or array string, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
        for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
            match val.get(index) {
                Some(Value::Null) | None => {
                    if !self.ignore_missing {
                        return Err(format!(
                            "{} processor: missing field: {}",
                            self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                        ));
                    }
                }
                Some(v) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut map = self.exec_field(v, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let result = self.process(v)?;
+                    let output_index = field.output_index();
+                    val[output_index] = result;
                }
            }
        }
@@ -211,55 +213,20 @@ impl crate::etl::processor::Processor for GsubProcessor {

 #[cfg(test)]
 mod tests {
-    use crate::etl::field::Field;
    use crate::etl::processor::gsub::GsubProcessor;
-    use crate::etl::processor::Processor;
-    use crate::etl::value::{Map, Value};
+    use crate::etl::value::Value;

    #[test]
    fn test_string_value() {
-        let mut processor = GsubProcessor::default();
-        processor.try_pattern(r"\d+").unwrap();
-        processor.with_replacement("xxx");
+        let processor = GsubProcessor {
+            pattern: Some(regex::Regex::new(r"\d+").unwrap()),
+            replacement: Some("xxx".to_string()),
+            ..Default::default()
+        };

-        let field = Field::new("message");
        let val = Value::String("123".to_string());
-        let result = processor.exec_field(&val, &field).unwrap();
+        let result = processor.process(&val).unwrap();

-        assert_eq!(
-            result,
-            Map::one("message", Value::String("xxx".to_string()))
-        );
-    }
-
-    #[test]
-    fn test_array_string_value() {
-        let mut processor = GsubProcessor::default();
-        processor.try_pattern(r"\d+").unwrap();
-        processor.with_replacement("xxx");
-
-        let field = Field::new("message");
-        let val = Value::Array(
-            vec![
-                Value::String("123".to_string()),
-                Value::String("456".to_string()),
-            ]
-            .into(),
-        );
-        let result = processor.exec_field(&val, &field).unwrap();
-
-        assert_eq!(
-            result,
-            Map::one(
-                "message",
-                Value::Array(
-                    vec![
-                        Value::String("xxx".to_string()),
-                        Value::String("xxx".to_string())
-                    ]
-                    .into()
-                )
-            )
-        );
+        assert_eq!(result, Value::String("xxx".to_string()));
    }
 }
--- a/src/pipeline/src/etl/processor/join.rs
+++ b/src/pipeline/src/etl/processor/join.rs
@@ -14,40 +14,78 @@

 use ahash::HashSet;

-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, Processor,
-    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, SEPARATOR_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
+    ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, SEPARATOR_NAME,
 };
-use crate::etl::value::{Array, Map, Value};
+use crate::etl::value::{Array, Value};

 pub(crate) const PROCESSOR_JOIN: &str = "join";

-/// A processor to join each element of an array into a single string using a separator string between each element
 #[derive(Debug, Default)]
-pub struct JoinProcessor {
+pub struct JoinProcessorBuilder {
    fields: Fields,
    separator: Option<String>,
    ignore_missing: bool,
 }

+impl ProcessorBuilder for JoinProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Join)
+    }
+}
+
+impl JoinProcessorBuilder {
+    fn check(self) -> Result<Self, String> {
+        if self.separator.is_none() {
+            return Err("separator is required".to_string());
+        }
+
+        Ok(self)
+    }
+
+    pub fn build(self, intermediate_keys: &[String]) -> Result<JoinProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "join",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+
+        Ok(JoinProcessor {
+            fields: real_fields,
+            separator: self.separator,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+/// A processor to join each element of an array into a single string using a separator string between each element
+#[derive(Debug, Default)]
+pub struct JoinProcessor {
+    fields: Vec<OneInputOneOutputField>,
+    separator: Option<String>,
+    ignore_missing: bool,
+}
+
 impl JoinProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields;
-    }
-
-    fn with_separator(&mut self, separator: impl Into<String>) {
-        self.separator = Some(separator.into());
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
-    fn process_field(&self, arr: &Array, field: &Field) -> Result<Map, String> {
-        let key = field.get_target_field();
-
+    fn process(&self, arr: &Array) -> Result<Value, String> {
        let sep = self.separator.as_ref().unwrap();
        let val = arr
            .iter()
@@ -55,7 +93,7 @@ impl JoinProcessor {
            .collect::<Vec<String>>()
            .join(sep);

-        Ok(Map::one(key, Value::String(val)))
+        Ok(Value::String(val))
    }

    fn check(self) -> Result<Self, String> {
@@ -67,11 +105,13 @@ impl JoinProcessor {
    }
 }

-impl TryFrom<&yaml_rust::yaml::Hash> for JoinProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for JoinProcessorBuilder {
    type Error = String;

    fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = JoinProcessor::default();
+        let mut fields = Fields::default();
+        let mut separator = None;
+        let mut ignore_missing = false;

        for (k, v) in value.iter() {
            let key = k
@@ -79,30 +119,31 @@ impl TryFrom<&yaml_rust::yaml::Hash> for JoinProcessor {
                .ok_or(format!("key must be a string, but got {k:?}"))?;
            match key {
                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                }
                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                }
                SEPARATOR_NAME => {
-                    processor.with_separator(yaml_string(v, SEPARATOR_NAME)?);
+                    separator = Some(yaml_string(v, SEPARATOR_NAME)?);
                }
                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                }
                _ => {}
            }
        }

-        processor.check()
+        let builder = JoinProcessorBuilder {
+            fields,
+            separator,
+            ignore_missing,
+        };
+        builder.check()
    }
 }

 impl Processor for JoinProcessor {
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
    fn kind(&self) -> &str {
        PROCESSOR_JOIN
    }
@@ -111,49 +152,21 @@ impl Processor for JoinProcessor {
        self.ignore_missing
    }

-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::Array(arr) => self.process_field(arr, field),
-            _ => Err(format!(
-                "{} processor: expect array value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
        for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
            match val.get(index) {
                Some(Value::Array(arr)) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut map = self.process_field(arr, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let result = self.process(arr)?;
+                    let output_index = field.output_index();
+                    val[output_index] = result;
                }
                Some(Value::Null) | None => {
                    if !self.ignore_missing {
                        return Err(format!(
                            "{} processor: missing field: {}",
                            self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                        ));
                    }
                }
@@ -173,25 +186,22 @@ impl Processor for JoinProcessor {
 #[cfg(test)]
 mod tests {

-    use crate::etl::field::Field;
    use crate::etl::processor::join::JoinProcessor;
-    use crate::etl::processor::Processor;
-    use crate::etl::value::{Map, Value};
+    use crate::etl::value::Value;

    #[test]
    fn test_join_processor() {
-        let mut processor = JoinProcessor::default();
-        processor.with_separator("-");
+        let processor = JoinProcessor {
+            separator: Some("-".to_string()),
+            ..Default::default()
+        };

-        let field = Field::new("test");
-        let arr = Value::Array(
-            vec![
-                Value::String("a".to_string()),
-                Value::String("b".to_string()),
-            ]
-            .into(),
-        );
-        let result = processor.exec_field(&arr, &field).unwrap();
-        assert_eq!(result, Map::one("test", Value::String("a-b".to_string())));
+        let arr = vec![
+            Value::String("a".to_string()),
+            Value::String("b".to_string()),
+        ]
+        .into();
+        let result = processor.process(&arr).unwrap();
+        assert_eq!(result, Value::String("a-b".to_string()));
    }
 }
--- a/src/pipeline/src/etl/processor/letter.rs
+++ b/src/pipeline/src/etl/processor/letter.rs
@@ -14,12 +14,12 @@

 use ahash::HashSet;

-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, Processor,
-    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, METHOD_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
+    ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, METHOD_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;

 pub(crate) const PROCESSOR_LETTER: &str = "letter";

@@ -54,29 +54,61 @@ impl std::str::FromStr for Method {
    }
 }

-/// only support string value
 #[derive(Debug, Default)]
-pub struct LetterProcessor {
+pub struct LetterProcessorBuilder {
    fields: Fields,
    method: Method,
    ignore_missing: bool,
 }

+impl ProcessorBuilder for LetterProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Letter)
+    }
+}
+
+impl LetterProcessorBuilder {
+    pub fn build(self, intermediate_keys: &[String]) -> Result<LetterProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "letter",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+
+        Ok(LetterProcessor {
+            fields: real_fields,
+            method: self.method,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+/// only support string value
+#[derive(Debug, Default)]
+pub struct LetterProcessor {
+    fields: Vec<OneInputOneOutputField>,
+    method: Method,
+    ignore_missing: bool,
+}
+
 impl LetterProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields;
-    }
-
-    fn with_method(&mut self, method: Method) {
-        self.method = method;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
-    fn process_field(&self, val: &str, field: &Field) -> Result<Map, String> {
+    fn process_field(&self, val: &str) -> Result<Value, String> {
        let processed = match self.method {
            Method::Upper => val.to_uppercase(),
            Method::Lower => val.to_lowercase(),
@@ -84,17 +116,17 @@ impl LetterProcessor {
        };
        let val = Value::String(processed);

-        let key = field.get_target_field();
-
-        Ok(Map::one(key, val))
+        Ok(val)
    }
 }

-impl TryFrom<&yaml_rust::yaml::Hash> for LetterProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for LetterProcessorBuilder {
    type Error = String;

    fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = LetterProcessor::default();
+        let mut fields = Fields::default();
+        let mut method = Method::Lower;
+        let mut ignore_missing = false;

        for (k, v) in value.iter() {
            let key = k
@@ -102,23 +134,26 @@ impl TryFrom<&yaml_rust::yaml::Hash> for LetterProcessor {
                .ok_or(format!("key must be a string, but got {k:?}"))?;
            match key {
                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                }
                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                }
                METHOD_NAME => {
-                    let method = yaml_string(v, METHOD_NAME)?;
-                    processor.with_method(method.parse()?);
+                    method = yaml_string(v, METHOD_NAME)?.parse()?;
                }
                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                }
                _ => {}
            }
        }

-        Ok(processor)
+        Ok(LetterProcessorBuilder {
+            fields,
+            method,
+            ignore_missing,
+        })
    }
 }

@@ -131,53 +166,21 @@ impl Processor for LetterProcessor {
        self.ignore_missing
    }

-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => self.process_field(val, field),
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
        for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
            match val.get(index) {
                Some(Value::String(s)) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut processed = self.process_field(s, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = processed.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let result = self.process_field(s)?;
+                    let (_, output_index) = field.output();
+                    val[*output_index] = result;
                }
                Some(Value::Null) | None => {
                    if !self.ignore_missing {
                        return Err(format!(
                            "{} processor: missing field: {}",
                            self.kind(),
-                            field.get_field_name()
+                            &field.input().name
                        ));
                    }
                }
@@ -204,33 +207,36 @@ fn capitalize(s: &str) -> String {

 #[cfg(test)]
 mod tests {
-    use crate::etl::field::Fields;
    use crate::etl::processor::letter::{LetterProcessor, Method};
-    use crate::etl::value::{Map, Value};
+    use crate::etl::value::Value;

    #[test]
    fn test_process() {
-        let field = "letter";
-        let ff: crate::etl::processor::Field = field.parse().unwrap();
-        let mut processor = LetterProcessor::default();
-        processor.with_fields(Fields::one(ff.clone()));
-
        {
-            processor.with_method(Method::Upper);
-            let processed = processor.process_field("pipeline", &ff).unwrap();
-            assert_eq!(Map::one(field, Value::String("PIPELINE".into())), processed)
+            let processor = LetterProcessor {
+                method: Method::Upper,
+                ..Default::default()
+            };
+            let processed = processor.process_field("pipeline").unwrap();
+            assert_eq!(Value::String("PIPELINE".into()), processed)
        }

        {
-            processor.with_method(Method::Lower);
-            let processed = processor.process_field("Pipeline", &ff).unwrap();
-            assert_eq!(Map::one(field, Value::String("pipeline".into())), processed)
+            let processor = LetterProcessor {
+                method: Method::Lower,
+                ..Default::default()
+            };
+            let processed = processor.process_field("Pipeline").unwrap();
+            assert_eq!(Value::String("pipeline".into()), processed)
        }

        {
-            processor.with_method(Method::Capital);
-            let processed = processor.process_field("pipeline", &ff).unwrap();
-            assert_eq!(Map::one(field, Value::String("Pipeline".into())), processed)
+            let processor = LetterProcessor {
+                method: Method::Capital,
+                ..Default::default()
+            };
+            let processed = processor.process_field("pipeline").unwrap();
+            assert_eq!(Value::String("Pipeline".into()), processed)
        }
    }
 }
--- a/src/pipeline/src/etl/processor/regex.rs
+++ b/src/pipeline/src/etl/processor/regex.rs
@@ -18,16 +18,17 @@ const PATTERNS_NAME: &str = "patterns";

 pub(crate) const PROCESSOR_REGEX: &str = "regex";

-use ahash::HashSet;
+use ahash::{HashSet, HashSetExt};
 use lazy_static::lazy_static;
 use regex::Regex;

-use crate::etl::field::Fields;
+use crate::etl::field::{Fields, InputFieldInfo, OneInputMultiOutputField};
+use crate::etl::find_key_index;
 use crate::etl::processor::{
-    yaml_bool, yaml_field, yaml_fields, yaml_string, yaml_strings, Field, Processor, FIELDS_NAME,
-    FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor,
+    ProcessorBuilder, ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;

 lazy_static! {
    static ref GROUPS_NAME_REGEX: Regex = Regex::new(r"\(\?P?<([[:word:]]+)>.+?\)").unwrap();
@@ -40,6 +41,10 @@ fn get_regex_group_names(s: &str) -> Vec<String> {
        .collect()
 }

+fn generate_key(prefix: &str, group: &str) -> String {
+    format!("{prefix}_{group}")
+}
+
 #[derive(Debug)]
 struct GroupRegex {
    origin: String,
@@ -72,34 +77,29 @@ impl std::str::FromStr for GroupRegex {
    }
 }

-/// only support string value
-/// if no value found from a pattern, the target_field will be ignored
 #[derive(Debug, Default)]
-pub struct RegexProcessor {
+pub struct RegexProcessorBuilder {
    fields: Fields,
    patterns: Vec<GroupRegex>,
    ignore_missing: bool,
+    output_keys: HashSet<String>,
 }

-impl RegexProcessor {
-    fn with_fields(&mut self, fields: Fields) {
-        self.fields = fields;
+impl ProcessorBuilder for RegexProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.output_keys.iter().map(|k| k.as_str()).collect()
    }

-    fn try_with_patterns(&mut self, patterns: Vec<String>) -> Result<(), String> {
-        let mut rs = vec![];
-        for pattern in patterns {
-            let gr = pattern.parse()?;
-            rs.push(gr);
-        }
-        self.patterns = rs;
-        Ok(())
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
    }

-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Regex)
    }
+}

+impl RegexProcessorBuilder {
    fn check(self) -> Result<Self, String> {
        if self.fields.is_empty() {
            return Err(format!(
@@ -118,47 +118,78 @@ impl RegexProcessor {
        Ok(self)
    }

-    fn generate_key(prefix: &str, group: &str) -> String {
-        format!("{prefix}_{group}")
+    fn build_group_output_info(
+        group_regex: &GroupRegex,
+        om_field: &OneInputMultiOutputField,
+        intermediate_keys: &[String],
+    ) -> Result<Vec<OutPutInfo>, String> {
+        group_regex
+            .groups
+            .iter()
+            .map(|g| {
+                let key = generate_key(om_field.target_prefix(), g);
+                let index = find_key_index(intermediate_keys, &key, "regex");
+                index.map(|index| OutPutInfo {
+                    final_key: key,
+                    group_name: g.to_string(),
+                    index,
+                })
+            })
+            .collect::<Result<Vec<_>, String>>()
    }

-    fn process_field(&self, val: &str, field: &Field, gr: &GroupRegex) -> Result<Map, String> {
-        let mut map = Map::default();
-
-        if let Some(captures) = gr.regex.captures(val) {
-            for group in &gr.groups {
-                if let Some(capture) = captures.name(group) {
-                    let value = capture.as_str().to_string();
-                    let prefix = field.get_target_field();
-
-                    let key = Self::generate_key(prefix, group);
-
-                    map.insert(key, Value::String(value));
-                }
-            }
-        }
-
-        Ok(map)
+    fn build_group_output_infos(
+        patterns: &[GroupRegex],
+        om_field: &OneInputMultiOutputField,
+        intermediate_keys: &[String],
+    ) -> Result<Vec<Vec<OutPutInfo>>, String> {
+        patterns
+            .iter()
+            .map(|group_regex| {
+                Self::build_group_output_info(group_regex, om_field, intermediate_keys)
+            })
+            .collect::<Result<Vec<_>, String>>()
    }

-    fn update_output_keys(&mut self) {
-        for field in self.fields.iter_mut() {
-            for gr in &self.patterns {
-                for group in &gr.groups {
-                    field
-                        .output_fields_index_mapping
-                        .insert(Self::generate_key(field.get_target_field(), group), 0_usize);
-                }
-            }
+    fn build_output_info(
+        real_fields: &[OneInputMultiOutputField],
+        patterns: &[GroupRegex],
+        intermediate_keys: &[String],
+    ) -> Result<RegexProcessorOutputInfo, String> {
+        let inner = real_fields
+            .iter()
+            .map(|om_field| Self::build_group_output_infos(patterns, om_field, intermediate_keys))
+            .collect::<Result<Vec<_>, String>>();
+        inner.map(|inner| RegexProcessorOutputInfo { inner })
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<RegexProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input_index = find_key_index(intermediate_keys, field.input_field(), "regex")?;
+            let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
+
+            let input = OneInputMultiOutputField::new(input_field_info, field.target_field);
+            real_fields.push(input);
        }
+        let output_info = Self::build_output_info(&real_fields, &self.patterns, intermediate_keys)?;
+        Ok(RegexProcessor {
+            // fields: Fields::one(Field::new("test".to_string())),
+            fields: real_fields,
+            patterns: self.patterns,
+            output_info,
+            ignore_missing: self.ignore_missing,
+        })
    }
 }

-impl TryFrom<&yaml_rust::yaml::Hash> for RegexProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for RegexProcessorBuilder {
    type Error = String;

    fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = RegexProcessor::default();
+        let mut fields = Fields::default();
+        let mut patterns: Vec<GroupRegex> = vec![];
+        let mut ignore_missing = false;

        for (k, v) in value.iter() {
            let key = k
@@ -166,28 +197,113 @@ impl TryFrom<&yaml_rust::yaml::Hash> for RegexProcessor {
                .ok_or(format!("key must be a string, but got {k:?}"))?;
            match key {
                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                }
                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                }
                PATTERN_NAME => {
-                    processor.try_with_patterns(vec![yaml_string(v, PATTERN_NAME)?])?;
+                    let pattern = yaml_string(v, PATTERN_NAME)?;
+                    let gr = pattern.parse()?;
+                    patterns.push(gr);
                }
                PATTERNS_NAME => {
-                    processor.try_with_patterns(yaml_strings(v, PATTERNS_NAME)?)?;
+                    for pattern in yaml_strings(v, PATTERNS_NAME)? {
+                        let gr = pattern.parse()?;
+                        patterns.push(gr);
+                    }
                }
                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                }
                _ => {}
            }
        }

-        processor.check().map(|mut p| {
-            p.update_output_keys();
-            p
-        })
+        let pattern_output_keys = patterns
+            .iter()
+            .flat_map(|pattern| pattern.groups.iter())
+            .collect::<Vec<_>>();
+        let mut output_keys = HashSet::new();
+        for field in fields.iter() {
+            for x in pattern_output_keys.iter() {
+                output_keys.insert(generate_key(field.target_or_input_field(), x));
+            }
+        }
+
+        let processor_builder = RegexProcessorBuilder {
+            fields,
+            patterns,
+            ignore_missing,
+            output_keys,
+        };
+
+        processor_builder.check()
+    }
+}
+
+#[derive(Debug, Default)]
+struct OutPutInfo {
+    final_key: String,
+    group_name: String,
+    index: usize,
+}
+
+#[derive(Debug, Default)]
+struct RegexProcessorOutputInfo {
+    pub inner: Vec<Vec<Vec<OutPutInfo>>>,
+}
+
+impl RegexProcessorOutputInfo {
+    fn get_output_index(
+        &self,
+        field_index: usize,
+        pattern_index: usize,
+        group_index: usize,
+    ) -> usize {
+        self.inner[field_index][pattern_index][group_index].index
+    }
+}
+/// only support string value
+/// if no value found from a pattern, the target_field will be ignored
+#[derive(Debug, Default)]
+pub struct RegexProcessor {
+    fields: Vec<OneInputMultiOutputField>,
+    output_info: RegexProcessorOutputInfo,
+    patterns: Vec<GroupRegex>,
+    ignore_missing: bool,
+}
+
+impl RegexProcessor {
+    fn try_with_patterns(&mut self, patterns: Vec<String>) -> Result<(), String> {
+        let mut rs = vec![];
+        for pattern in patterns {
+            let gr = pattern.parse()?;
+            rs.push(gr);
+        }
+        self.patterns = rs;
+        Ok(())
+    }
+
+    fn process(
+        &self,
+        val: &str,
+        gr: &GroupRegex,
+        index: (usize, usize),
+    ) -> Result<Vec<(usize, Value)>, String> {
+        let mut result = Vec::new();
+        if let Some(captures) = gr.regex.captures(val) {
+            for (group_index, group) in gr.groups.iter().enumerate() {
+                if let Some(capture) = captures.name(group) {
+                    let value = capture.as_str().to_string();
+                    let index = self
+                        .output_info
+                        .get_output_index(index.0, index.1, group_index);
+                    result.push((index, Value::String(value)));
+                }
+            }
+        }
+        Ok(result)
    }
 }

@@ -200,71 +316,40 @@ impl Processor for RegexProcessor {
        self.ignore_missing
    }

-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .flat_map(|f| {
-                self.patterns.iter().flat_map(move |p| {
-                    p.groups
-                        .iter()
-                        .map(move |g| Self::generate_key(&f.input_field.name, g))
-                })
-            })
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => {
-                let mut map = Map::default();
-                for gr in &self.patterns {
-                    let m = self.process_field(val, field, gr)?;
-                    map.extend(m);
-                }
-                Ok(map)
-            }
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
-        for field in self.fields.iter() {
-            let index = field.input_field.index;
+        for (field_index, field) in self.fields.iter().enumerate() {
+            let index = field.input_index();
+            let mut result_list = None;
            match val.get(index) {
                Some(Value::String(s)) => {
-                    let mut map = Map::default();
-                    for gr in &self.patterns {
-                        // TODO(qtang): Let this method use the intermediate state collection directly.
-                        let m = self.process_field(s, field, gr)?;
-                        map.extend(m);
-                    }
-
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
+                    // we get rust borrow checker error here
+                    // for (gr_index, gr) in self.patterns.iter().enumerate() {
+                    //     let result_list = self.process(s.as_str(), gr, (field_index, gr_index))?;
+                    //     for (output_index, result) in result_list {
+                    //cannot borrow `*val` as mutable because it is also borrowed as immutable mutable borrow occurs here
+                    //         val[output_index] = result;
+                    //     }
+                    // }
+                    for (gr_index, gr) in self.patterns.iter().enumerate() {
+                        let result = self.process(s.as_str(), gr, (field_index, gr_index))?;
+                        if !result.is_empty() {
+                            match result_list.as_mut() {
+                                None => {
+                                    result_list = Some(result);
+                                }
+                                Some(result_list) => {
+                                    result_list.extend(result);
+                                }
                            }
-                        });
+                        }
+                    }
                }
                Some(Value::Null) | None => {
                    if !self.ignore_missing {
                        return Err(format!(
                            "{} processor: missing field: {}",
                            self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                        ));
                    }
                }
@@ -275,6 +360,15 @@ impl Processor for RegexProcessor {
                    ));
                }
            }
+            // safety here
+            match result_list {
+                None => {}
+                Some(result_list) => {
+                    for (output_index, result) in result_list {
+                        val[output_index] = result;
+                    }
+                }
+            }
        }

        Ok(())
@@ -282,37 +376,42 @@ impl Processor for RegexProcessor {
 }
 #[cfg(test)]
 mod tests {
+    use ahash::{HashMap, HashMapExt};
    use itertools::Itertools;

-    use super::RegexProcessor;
-    use crate::etl::field::Fields;
-    use crate::etl::processor::Processor;
+    use crate::etl::processor::regex::RegexProcessorBuilder;
    use crate::etl::value::{Map, Value};

    #[test]
    fn test_simple_parse() {
-        let mut processor = RegexProcessor::default();
+        let pipeline_str = r#"fields: ["a"]
+patterns: ['(?<ar>\d)']
+ignore_missing: false"#;
+
+        let processor_yaml = yaml_rust::YamlLoader::load_from_str(pipeline_str)
+            .unwrap()
+            .pop()
+            .unwrap();
+        let processor_yaml_hash = processor_yaml.as_hash().unwrap();
+        let builder = RegexProcessorBuilder::try_from(processor_yaml_hash).unwrap();
+        let intermediate_keys = ["a".to_string(), "a_ar".to_string()];
+        let processor = builder.build(&intermediate_keys).unwrap();

        // single field (with prefix), multiple patterns
-        let f = ["a"].iter().map(|f| f.parse().unwrap()).collect();
-        processor.with_fields(Fields::new(f).unwrap());

-        let ar = "(?<ar>\\d)";
+        let result = processor
+            .process("123", &processor.patterns[0], (0, 0))
+            .unwrap()
+            .into_iter()
+            .map(|(k, v)| (intermediate_keys[k].clone(), v))
+            .collect();

-        let patterns = [ar].iter().map(|p| p.to_string()).collect();
-        processor.try_with_patterns(patterns).unwrap();
-
-        let mut map = Map::default();
-        map.insert("a", Value::String("123".to_string()));
-        processor.exec_map(&mut map).unwrap();
+        let map = Map { values: result };

        let v = Map {
-            values: vec![
-                ("a_ar".to_string(), Value::String("1".to_string())),
-                ("a".to_string(), Value::String("123".to_string())),
-            ]
-            .into_iter()
-            .collect(),
+            values: vec![("a_ar".to_string(), Value::String("1".to_string()))]
+                .into_iter()
+                .collect(),
        };

        assert_eq!(v, map);
@@ -320,17 +419,14 @@ mod tests {

    #[test]
    fn test_process() {
-        let mut processor = RegexProcessor::default();
-
        let cc = "[c=c,n=US_CA_SANJOSE,o=55155]";
        let cg = "[a=12.34.567.89,b=12345678,c=g,n=US_CA_SANJOSE,o=20940]";
        let co = "[a=987.654.321.09,c=o]";
        let cp = "[c=p,n=US_CA_SANJOSE,o=55155]";
        let cw = "[c=w,n=US_CA_SANJOSE,o=55155]";
-        let breadcrumbs = Value::String([cc, cg, co, cp, cw].iter().join(","));
+        let breadcrumbs_str = [cc, cg, co, cp, cw].iter().join(",");

        let values = [
-            ("breadcrumbs", breadcrumbs.clone()),
            ("breadcrumbs_parent", Value::String(cc.to_string())),
            ("breadcrumbs_edge", Value::String(cg.to_string())),
            ("breadcrumbs_origin", Value::String(co.to_string())),
@@ -340,61 +436,141 @@ mod tests {
        .into_iter()
        .map(|(k, v)| (k.to_string(), v))
        .collect();
-        let mut temporary_map = Map { values };
+        let temporary_map = Map { values };

        {
            // single field (with prefix), multiple patterns
-            let ff = ["breadcrumbs, breadcrumbs"]
-                .iter()
-                .map(|f| f.parse().unwrap())
-                .collect();
-            processor.with_fields(Fields::new(ff).unwrap());

-            let ccr = "(?<parent>\\[[^\\[]*c=c[^\\]]*\\])";
-            let cgr = "(?<edge>\\[[^\\[]*c=g[^\\]]*\\])";
-            let cor = "(?<origin>\\[[^\\[]*c=o[^\\]]*\\])";
-            let cpr = "(?<peer>\\[[^\\[]*c=p[^\\]]*\\])";
-            let cwr = "(?<wrapper>\\[[^\\[]*c=w[^\\]]*\\])";
-            let patterns = [ccr, cgr, cor, cpr, cwr]
-                .iter()
-                .map(|p| p.to_string())
-                .collect();
-            processor.try_with_patterns(patterns).unwrap();
+            let pipeline_str = r#"fields: ["breadcrumbs"]
+patterns:
+  - '(?<parent>\[[^\[]*c=c[^\]]*\])'
+  - '(?<edge>\[[^\[]*c=g[^\]]*\])'
+  - '(?<origin>\[[^\[]*c=o[^\]]*\])'
+  - '(?<peer>\[[^\[]*c=p[^\]]*\])'
+  - '(?<wrapper>\[[^\[]*c=w[^\]]*\])'
+ignore_missing: false"#;

-            let mut map = Map::default();
-            map.insert("breadcrumbs", breadcrumbs.clone());
-            processor.exec_map(&mut map).unwrap();
-
-            assert_eq!(map, temporary_map);
+            let processor_yaml = yaml_rust::YamlLoader::load_from_str(pipeline_str)
+                .unwrap()
+                .pop()
+                .unwrap();
+            let processor_yaml_hash = processor_yaml.as_hash().unwrap();
+            let builder = RegexProcessorBuilder::try_from(processor_yaml_hash).unwrap();
+            let intermediate_keys = [
+                "breadcrumbs",
+                "breadcrumbs_parent",
+                "breadcrumbs_edge",
+                "breadcrumbs_origin",
+                "breadcrumbs_peer",
+                "breadcrumbs_wrapper",
+            ]
+            .iter()
+            .map(|k| k.to_string())
+            .collect_vec();
+            let processor = builder.build(&intermediate_keys).unwrap();
+            let mut result = HashMap::new();
+            for (index, pattern) in processor.patterns.iter().enumerate() {
+                let r = processor
+                    .process(&breadcrumbs_str, pattern, (0, index))
+                    .unwrap()
+                    .into_iter()
+                    .map(|(k, v)| (intermediate_keys[k].clone(), v))
+                    .collect::<HashMap<_, _>>();
+                result.extend(r);
+            }
+            let map = Map { values: result };
+            assert_eq!(temporary_map, map);
        }

        {
            // multiple fields (with prefix), multiple patterns
-            let ff = [
-                "breadcrumbs_parent, parent",
-                "breadcrumbs_edge, edge",
-                "breadcrumbs_origin, origin",
-                "breadcrumbs_peer, peer",
-                "breadcrumbs_wrapper, wrapper",
-            ]
-            .iter()
-            .map(|f| f.parse().unwrap())
-            .collect();
-            processor.with_fields(Fields::new(ff).unwrap());

-            let patterns = [
-                "a=(?<ip>[^,\\]]+)",
-                "b=(?<request_id>[^,\\]]+)",
-                "k=(?<request_end_time>[^,\\]]+)",
-                "l=(?<turn_around_time>[^,\\]]+)",
-                "m=(?<dns_lookup_time>[^,\\]]+)",
-                "n=(?<geo>[^,\\]]+)",
-                "o=(?<asn>[^,\\]]+)",
+            let pipeline_str = r#"fields:
+  - breadcrumbs_parent, parent
+  - breadcrumbs_edge, edge
+  - breadcrumbs_origin, origin
+  - breadcrumbs_peer, peer
+  - breadcrumbs_wrapper, wrapper
+patterns:
+  - 'a=(?<ip>[^,\]]+)'
+  - 'b=(?<request_id>[^,\]]+)'
+  - 'k=(?<request_end_time>[^,\]]+)'
+  - 'l=(?<turn_around_time>[^,\]]+)'
+  - 'm=(?<dns_lookup_time>[^,\]]+)'
+  - 'n=(?<geo>[^,\]]+)'
+  - 'o=(?<asn>[^,\]]+)'
+ignore_missing: false"#;
+
+            let processor_yaml = yaml_rust::YamlLoader::load_from_str(pipeline_str)
+                .unwrap()
+                .pop()
+                .unwrap();
+            let processor_yaml_hash = processor_yaml.as_hash().unwrap();
+            let builder = RegexProcessorBuilder::try_from(processor_yaml_hash).unwrap();
+
+            let intermediate_keys = [
+                "breadcrumbs_parent",
+                "breadcrumbs_edge",
+                "breadcrumbs_origin",
+                "breadcrumbs_peer",
+                "breadcrumbs_wrapper",
+                "edge_ip",
+                "edge_request_id",
+                "edge_request_end_time",
+                "edge_turn_around_time",
+                "edge_dns_lookup_time",
+                "edge_geo",
+                "edge_asn",
+                "origin_ip",
+                "origin_request_id",
+                "origin_request_end_time",
+                "origin_turn_around_time",
+                "origin_dns_lookup_time",
+                "origin_geo",
+                "origin_asn",
+                "peer_ip",
+                "peer_request_id",
+                "peer_request_end_time",
+                "peer_turn_around_time",
+                "peer_dns_lookup_time",
+                "peer_geo",
+                "peer_asn",
+                "parent_ip",
+                "parent_request_id",
+                "parent_request_end_time",
+                "parent_turn_around_time",
+                "parent_dns_lookup_time",
+                "parent_geo",
+                "parent_asn",
+                "wrapper_ip",
+                "wrapper_request_id",
+                "wrapper_request_end_time",
+                "wrapper_turn_around_time",
+                "wrapper_dns_lookup_time",
+                "wrapper_geo",
+                "wrapper_asn",
            ]
            .iter()
-            .map(|p| p.to_string())
-            .collect();
-            processor.try_with_patterns(patterns).unwrap();
+            .map(|k| k.to_string())
+            .collect_vec();
+            let processor = builder.build(&intermediate_keys).unwrap();
+
+            let mut result = HashMap::new();
+            for (field_index, field) in processor.fields.iter().enumerate() {
+                for (pattern_index, pattern) in processor.patterns.iter().enumerate() {
+                    let s = temporary_map
+                        .get(field.input_name())
+                        .unwrap()
+                        .to_str_value();
+                    let r = processor
+                        .process(&s, pattern, (field_index, pattern_index))
+                        .unwrap()
+                        .into_iter()
+                        .map(|(k, v)| (intermediate_keys[k].clone(), v))
+                        .collect::<HashMap<_, _>>();
+                    result.extend(r);
+                }
+            }

            let new_values = vec![
                ("edge_ip", Value::String("12.34.567.89".to_string())),
@@ -413,11 +589,7 @@ mod tests {
            .map(|(k, v)| (k.to_string(), v))
            .collect();

-            let mut expected_map = temporary_map.clone();
-            processor.exec_map(&mut temporary_map).unwrap();
-            expected_map.extend(Map { values: new_values });
-
-            assert_eq!(expected_map, temporary_map);
+            assert_eq!(result, new_values);
        }
    }
 }
--- a/src/pipeline/src/etl/processor/timestamp.rs
+++ b/src/pipeline/src/etl/processor/timestamp.rs
@@ -19,18 +19,17 @@ use chrono::{DateTime, NaiveDateTime};
 use chrono_tz::Tz;
 use lazy_static::lazy_static;

-use super::yaml_strings;
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, Processor,
-    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor,
+    ProcessorBuilder, ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
 };
 use crate::etl::value::time::{
    MICROSECOND_RESOLUTION, MICRO_RESOLUTION, MILLISECOND_RESOLUTION, MILLI_RESOLUTION,
    MS_RESOLUTION, NANOSECOND_RESOLUTION, NANO_RESOLUTION, NS_RESOLUTION, SECOND_RESOLUTION,
    SEC_RESOLUTION, S_RESOLUTION, US_RESOLUTION,
 };
-use crate::etl::value::{Map, Timestamp, Value};
+use crate::etl::value::{Timestamp, Value};

 pub(crate) const PROCESSOR_TIMESTAMP: &str = "timestamp";
 const RESOLUTION_NAME: &str = "resolution";
@@ -108,10 +107,56 @@ impl std::ops::Deref for Formats {
    }
 }

+#[derive(Debug)]
+pub struct TimestampProcessorBuilder {
+    fields: Fields,
+    formats: Formats,
+    resolution: Resolution,
+    ignore_missing: bool,
+}
+
+impl ProcessorBuilder for TimestampProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Timestamp)
+    }
+}
+
+impl TimestampProcessorBuilder {
+    pub fn build(self, intermediate_keys: &[String]) -> Result<TimestampProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "timestamp",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+        Ok(TimestampProcessor {
+            fields: real_fields,
+            formats: self.formats,
+            resolution: self.resolution,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
 /// support string, integer, float, time, epoch
 #[derive(Debug, Default)]
 pub struct TimestampProcessor {
-    fields: Fields,
+    fields: Vec<OneInputOneOutputField>,
    formats: Formats,
    resolution: Resolution,
    ignore_missing: bool,
@@ -123,29 +168,6 @@ pub struct TimestampProcessor {
 }

 impl TimestampProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields
-    }
-
-    fn with_resolution(&mut self, resolution: Resolution) {
-        self.resolution = resolution;
-    }
-
-    fn with_formats(&mut self, v: Option<Vec<(Arc<String>, Tz)>>) {
-        let v = match v {
-            Some(v) if !v.is_empty() => v,
-            _ => DEFAULT_FORMATS.clone(),
-        };
-
-        let formats = Formats::new(v);
-        self.formats = formats;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
    /// try to parse val with timezone first, if failed, parse without timezone
    fn try_parse(val: &str, fmt: &str, tz: Tz) -> Result<i64, String> {
        if let Ok(dt) = DateTime::parse_from_str(val, fmt) {
@@ -212,12 +234,6 @@ impl TimestampProcessor {
            Resolution::Nano => Ok(Timestamp::Nanosecond(t)),
        }
    }
-
-    fn process_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        let key = field.get_target_field();
-
-        Ok(Map::one(key, Value::Timestamp(self.parse(val)?)))
-    }
 }

 fn parse_formats(yaml: &yaml_rust::yaml::Yaml) -> Result<Vec<(Arc<String>, Tz)>, String> {
@@ -250,11 +266,14 @@ fn parse_formats(yaml: &yaml_rust::yaml::Yaml) -> Result<Vec<(Arc<String>, Tz)>,
    };
 }

-impl TryFrom<&yaml_rust::yaml::Hash> for TimestampProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for TimestampProcessorBuilder {
    type Error = String;

    fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = TimestampProcessor::default();
+        let mut fields = Fields::default();
+        let mut formats = Formats::default();
+        let mut resolution = Resolution::default();
+        let mut ignore_missing = false;

        for (k, v) in hash {
            let key = k
@@ -263,28 +282,33 @@ impl TryFrom<&yaml_rust::yaml::Hash> for TimestampProcessor {

            match key {
                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                }
                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                }
                FORMATS_NAME => {
-                    let formats = parse_formats(v)?;
-                    processor.with_formats(Some(formats));
+                    let formats_vec = parse_formats(v)?;
+                    formats = Formats::new(formats_vec);
                }
                RESOLUTION_NAME => {
-                    let s = yaml_string(v, RESOLUTION_NAME)?.as_str().try_into()?;
-                    processor.with_resolution(s);
+                    resolution = yaml_string(v, RESOLUTION_NAME)?.as_str().try_into()?;
                }
                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                }
-
                _ => {}
            }
        }

-        Ok(processor)
+        let processor_builder = TimestampProcessorBuilder {
+            fields,
+            formats,
+            resolution,
+            ignore_missing,
+        };
+
+        Ok(processor_builder)
    }
 }

@@ -297,49 +321,23 @@ impl Processor for TimestampProcessor {
        self.ignore_missing
    }

-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        self.process_field(val, field)
-    }
-
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
        for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input().index;
            match val.get(index) {
                Some(Value::Null) | None => {
                    if !self.ignore_missing {
                        return Err(format!(
                            "{} processor: missing field: {}",
                            self.kind(),
-                            field.get_field_name()
+                            &field.input().name
                        ));
                    }
                }
                Some(v) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut map = self.process_field(v, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let result = self.parse(v)?;
+                    let (_, index) = field.output();
+                    val[*index] = Value::Timestamp(result);
                }
            }
        }
@@ -351,9 +349,18 @@ impl Processor for TimestampProcessor {
 mod tests {
    use yaml_rust::YamlLoader;

-    use super::TimestampProcessor;
+    use super::{TimestampProcessor, TimestampProcessorBuilder};
    use crate::etl::value::{Timestamp, Value};

+    fn builder_to_native_processor(builder: TimestampProcessorBuilder) -> TimestampProcessor {
+        TimestampProcessor {
+            fields: vec![],
+            formats: builder.formats,
+            resolution: builder.resolution,
+            ignore_missing: builder.ignore_missing,
+        }
+    }
+
    #[test]
    fn test_parse_epoch() {
        let processor_yaml_str = r#"fields:
@@ -367,7 +374,9 @@ formats:
 "#;
        let yaml = &YamlLoader::load_from_str(processor_yaml_str).unwrap()[0];
        let timestamp_yaml = yaml.as_hash().unwrap();
-        let processor = TimestampProcessor::try_from(timestamp_yaml).unwrap();
+        let processor = builder_to_native_processor(
+            TimestampProcessorBuilder::try_from(timestamp_yaml).unwrap(),
+        );

        let values = [
            (
@@ -419,7 +428,9 @@ formats:
 "#;
        let yaml = &YamlLoader::load_from_str(processor_yaml_str).unwrap()[0];
        let timestamp_yaml = yaml.as_hash().unwrap();
-        let processor = TimestampProcessor::try_from(timestamp_yaml).unwrap();
+        let processor = builder_to_native_processor(
+            TimestampProcessorBuilder::try_from(timestamp_yaml).unwrap(),
+        );

        let values: Vec<&str> = vec![
            "2014-5-17T12:34:56",
--- a/src/pipeline/src/etl/processor/urlencoding.rs
+++ b/src/pipeline/src/etl/processor/urlencoding.rs
@@ -15,12 +15,12 @@
 use ahash::HashSet;
 use urlencoding::{decode, encode};

-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    yaml_bool, yaml_field, yaml_fields, yaml_string, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
-    METHOD_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, ProcessorBuilder, ProcessorKind,
+    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, METHOD_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;

 pub(crate) const PROCESSOR_URL_ENCODING: &str = "urlencoding";

@@ -52,54 +52,76 @@ impl std::str::FromStr for Method {
    }
 }

-/// only support string value
 #[derive(Debug, Default)]
-pub struct UrlEncodingProcessor {
+pub struct UrlEncodingProcessorBuilder {
    fields: Fields,
    method: Method,
    ignore_missing: bool,
 }

+impl ProcessorBuilder for UrlEncodingProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys)
+            .map(ProcessorKind::UrlEncoding)
+    }
+}
+
+impl UrlEncodingProcessorBuilder {
+    fn build(self, intermediate_keys: &[String]) -> Result<UrlEncodingProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "urlencoding",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+        Ok(UrlEncodingProcessor {
+            fields: real_fields,
+            method: self.method,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+/// only support string value
+#[derive(Debug, Default)]
+pub struct UrlEncodingProcessor {
+    fields: Vec<OneInputOneOutputField>,
+    method: Method,
+    ignore_missing: bool,
+}
+
 impl UrlEncodingProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        Self::update_output_keys(&mut fields);
-        self.fields = fields;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
-    fn with_method(&mut self, method: Method) {
-        self.method = method;
-    }
-
-    fn process_field(&self, val: &str, field: &Field) -> Result<Map, String> {
+    fn process_field(&self, val: &str) -> Result<Value, String> {
        let processed = match self.method {
            Method::Encode => encode(val).to_string(),
            Method::Decode => decode(val).map_err(|e| e.to_string())?.into_owned(),
        };
-        let val = Value::String(processed);
-
-        let key = field.get_target_field();
-
-        Ok(Map::one(key, val))
-    }
-
-    fn update_output_keys(fields: &mut Fields) {
-        for field in fields.iter_mut() {
-            field
-                .output_fields_index_mapping
-                .insert(field.get_target_field().to_string(), 0_usize);
-        }
+        Ok(Value::String(processed))
    }
 }

-impl TryFrom<&yaml_rust::yaml::Hash> for UrlEncodingProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for UrlEncodingProcessorBuilder {
    type Error = String;

    fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = UrlEncodingProcessor::default();
+        let mut fields = Fields::default();
+        let mut method = Method::Decode;
+        let mut ignore_missing = false;

        for (k, v) in value.iter() {
            let key = k
@@ -107,24 +129,29 @@ impl TryFrom<&yaml_rust::yaml::Hash> for UrlEncodingProcessor {
                .ok_or(format!("key must be a string, but got {k:?}"))?;
            match key {
                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                }
                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                }

                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                }

                METHOD_NAME => {
-                    let method = yaml_string(v, METHOD_NAME)?;
-                    processor.with_method(method.parse()?);
+                    let method_str = yaml_string(v, METHOD_NAME)?;
+                    method = method_str.parse()?;
                }

                _ => {}
            }
        }
+        let processor = UrlEncodingProcessorBuilder {
+            fields,
+            method,
+            ignore_missing,
+        };

        Ok(processor)
    }
@@ -139,52 +166,21 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
        self.ignore_missing
    }

-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => self.process_field(val, field),
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
    fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
        for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
            match val.get(index) {
                Some(Value::String(s)) => {
-                    let mut map = self.process_field(s, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let result = self.process_field(s)?;
+                    let output_index = field.output_index();
+                    val[output_index] = result;
                }
                Some(Value::Null) | None => {
                    if !self.ignore_missing {
                        return Err(format!(
                            "{} processor: missing field: {}",
                            self.kind(),
-                            field.get_field_name()
+                            field.output_name()
                        ));
                    }
                }
@@ -202,29 +198,28 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {

 #[cfg(test)]
 mod tests {
-    use crate::etl::field::{Field, Fields};
+
    use crate::etl::processor::urlencoding::UrlEncodingProcessor;
-    use crate::etl::value::{Map, Value};
+    use crate::etl::value::Value;

    #[test]
    fn test_decode_url() {
-        let field = "url";
-        let ff: Field = field.parse().unwrap();
-
        let decoded = "//BC/[a=6.7.8.9,c=g,k=0,l=1]";
        let encoded = "%2F%2FBC%2F%5Ba%3D6.7.8.9%2Cc%3Dg%2Ck%3D0%2Cl%3D1%5D";

-        let mut processor = UrlEncodingProcessor::default();
-        processor.with_fields(Fields::one(ff.clone()));
-
        {
-            let result = processor.process_field(encoded, &ff).unwrap();
-            assert_eq!(Map::one(field, Value::String(decoded.into())), result)
+            let processor = UrlEncodingProcessor::default();
+            let result = processor.process_field(encoded).unwrap();
+            assert_eq!(Value::String(decoded.into()), result)
        }
        {
-            processor.with_method(super::Method::Encode);
-            let result = processor.process_field(decoded, &ff).unwrap();
-            assert_eq!(Map::one(field, Value::String(encoded.into())), result)
+            let processor = UrlEncodingProcessor {
+                fields: vec![],
+                method: super::Method::Encode,
+                ignore_missing: false,
+            };
+            let result = processor.process_field(decoded).unwrap();
+            assert_eq!(Value::String(encoded.into()), result)
        }
    }
 }
--- a/src/pipeline/src/etl/transform.rs
+++ b/src/pipeline/src/etl/transform.rs
@@ -17,8 +17,8 @@ pub mod transformer;

 use itertools::Itertools;

-use crate::etl::field::Fields;
-use crate::etl::processor::{update_one_one_output_keys, yaml_field, yaml_fields, yaml_string};
+use crate::etl::find_key_index;
+use crate::etl::processor::yaml_string;
 use crate::etl::transform::index::Index;
 use crate::etl::value::Value;

@@ -31,6 +31,9 @@ const TRANSFORM_ON_FAILURE: &str = "on_failure";

 pub use transformer::greptime::GreptimeTransformer;

+use super::field::{Fields, InputFieldInfo, OneInputOneOutputField};
+use super::processor::{yaml_new_field, yaml_new_fields};
+
 pub trait Transformer: std::fmt::Display + Sized + Send + Sync + 'static {
    type Output;
    type VecOutput;
@@ -39,12 +42,11 @@ pub trait Transformer: std::fmt::Display + Sized + Send + Sync + 'static {
    fn schemas(&self) -> &Vec<greptime_proto::v1::ColumnSchema>;
    fn transforms(&self) -> &Transforms;
    fn transforms_mut(&mut self) -> &mut Transforms;
-    fn transform(&self, val: Value) -> Result<Self::Output, String>;
    fn transform_mut(&self, val: &mut Vec<Value>) -> Result<Self::VecOutput, String>;
 }

 /// On Failure behavior when transform fails
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone, Default, Copy)]
 pub enum OnFailure {
    // Return None if transform fails
    #[default]
@@ -74,12 +76,18 @@ impl std::fmt::Display for OnFailure {
        }
    }
 }
+#[derive(Debug, Default, Clone)]
+pub struct TransformBuilders {
+    pub(crate) builders: Vec<TransformBuilder>,
+    pub(crate) output_keys: Vec<String>,
+    pub(crate) required_keys: Vec<String>,
+}

 #[derive(Debug, Default, Clone)]
 pub struct Transforms {
-    transforms: Vec<Transform>,
-    output_keys: Vec<String>,
-    required_keys: Vec<String>,
+    pub(crate) transforms: Vec<Transform>,
+    pub(crate) output_keys: Vec<String>,
+    pub(crate) required_keys: Vec<String>,
 }

 impl Transforms {
@@ -130,7 +138,7 @@ impl std::ops::DerefMut for Transforms {
    }
 }

-impl TryFrom<&Vec<yaml_rust::Yaml>> for Transforms {
+impl TryFrom<&Vec<yaml_rust::Yaml>> for TransformBuilders {
    type Error = String;

    fn try_from(docs: &Vec<yaml_rust::Yaml>) -> Result<Self, Self::Error> {
@@ -138,41 +146,78 @@ impl TryFrom<&Vec<yaml_rust::Yaml>> for Transforms {
        let mut all_output_keys: Vec<String> = Vec::with_capacity(100);
        let mut all_required_keys = Vec::with_capacity(100);
        for doc in docs {
-            let transform: Transform = doc
+            let transform_builder: TransformBuilder = doc
                .as_hash()
                .ok_or("transform element must be a map".to_string())?
                .try_into()?;
-            let mut transform_output_keys = transform
+            let mut transform_output_keys = transform_builder
                .fields
                .iter()
-                .map(|f| f.get_target_field().to_string())
+                .map(|f| f.target_or_input_field().to_string())
                .collect();
            all_output_keys.append(&mut transform_output_keys);

-            let mut transform_required_keys = transform
+            let mut transform_required_keys = transform_builder
                .fields
                .iter()
-                .map(|f| f.input_field.name.clone())
+                .map(|f| f.input_field().to_string())
                .collect();
            all_required_keys.append(&mut transform_required_keys);

-            transforms.push(transform);
+            transforms.push(transform_builder);
        }

        all_required_keys.sort();

-        Ok(Transforms {
-            transforms,
+        Ok(TransformBuilders {
+            builders: transforms,
            output_keys: all_output_keys,
            required_keys: all_required_keys,
        })
    }
 }

+#[derive(Debug, Clone)]
+pub struct TransformBuilder {
+    fields: Fields,
+    type_: Value,
+    default: Option<Value>,
+    index: Option<Index>,
+    on_failure: Option<OnFailure>,
+}
+
+impl TransformBuilder {
+    pub fn build(
+        self,
+        intermediate_keys: &[String],
+        output_keys: &[String],
+    ) -> Result<Transform, String> {
+        let mut real_fields = vec![];
+        for field in self.fields {
+            let input_index = find_key_index(intermediate_keys, field.input_field(), "transform")?;
+            let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
+            let output_index =
+                find_key_index(output_keys, field.target_or_input_field(), "transform")?;
+            let input = OneInputOneOutputField::new(
+                input_field_info,
+                (field.target_or_input_field().to_string(), output_index),
+            );
+            real_fields.push(input);
+        }
+        Ok(Transform {
+            real_fields,
+            type_: self.type_,
+            default: self.default,
+            index: self.index,
+            on_failure: self.on_failure,
+        })
+    }
+}
+
 /// only field is required
 #[derive(Debug, Clone)]
 pub struct Transform {
-    pub fields: Fields,
+    pub real_fields: Vec<OneInputOneOutputField>,

    pub type_: Value,

@@ -192,7 +237,7 @@ impl std::fmt::Display for Transform {
        };

        let type_ = format!("type: {}", self.type_);
-        let fields = format!("field(s): {}", self.fields);
+        let fields = format!("field(s): {:?}", self.real_fields);
        let default = if let Some(default) = &self.default {
            format!(", default: {}", default)
        } else {
@@ -212,7 +257,7 @@ impl std::fmt::Display for Transform {
 impl Default for Transform {
    fn default() -> Self {
        Transform {
-            fields: Fields::default(),
+            real_fields: Vec::new(),
            type_: Value::Null,
            default: None,
            index: None,
@@ -222,40 +267,6 @@ impl Default for Transform {
 }

 impl Transform {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields;
-    }
-
-    fn with_type(&mut self, type_: Value) {
-        self.type_ = type_;
-    }
-
-    fn try_default(&mut self, default: Value) -> Result<(), String> {
-        match (&self.type_, &default) {
-            (Value::Null, _) => Err(format!(
-                "transform {} type MUST BE set before default {}",
-                self.fields, &default,
-            )),
-            (_, Value::Null) => Ok(()), // if default is not set, then it will be regarded as default null
-            (_, _) => {
-                let target = self
-                    .type_
-                    .parse_str_value(default.to_str_value().as_str())?;
-                self.default = Some(target);
-                Ok(())
-            }
-        }
-    }
-
-    fn with_index(&mut self, index: Index) {
-        self.index = Some(index);
-    }
-
-    fn with_on_failure(&mut self, on_failure: OnFailure) {
-        self.on_failure = Some(on_failure);
-    }
-
    pub(crate) fn get_default(&self) -> Option<&Value> {
        self.default.as_ref()
    }
@@ -265,52 +276,74 @@ impl Transform {
    }
 }

-impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
+impl TryFrom<&yaml_rust::yaml::Hash> for TransformBuilder {
    type Error = String;

    fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut transform = Transform::default();
-
-        let mut default_opt = None;
+        let mut fields = Fields::default();
+        let mut type_ = Value::Null;
+        let mut default = None;
+        let mut index = None;
+        let mut on_failure = None;

        for (k, v) in hash {
            let key = k.as_str().ok_or("key must be a string")?;
            match key {
                TRANSFORM_FIELD => {
-                    transform.with_fields(Fields::one(yaml_field(v, TRANSFORM_FIELD)?));
+                    fields = Fields::one(yaml_new_field(v, TRANSFORM_FIELD)?);
                }

                TRANSFORM_FIELDS => {
-                    transform.with_fields(yaml_fields(v, TRANSFORM_FIELDS)?);
+                    fields = yaml_new_fields(v, TRANSFORM_FIELDS)?;
                }

                TRANSFORM_TYPE => {
                    let t = yaml_string(v, TRANSFORM_TYPE)?;
-                    transform.with_type(Value::parse_str_type(&t)?);
+                    type_ = Value::parse_str_type(&t)?;
                }

                TRANSFORM_INDEX => {
-                    let index = yaml_string(v, TRANSFORM_INDEX)?;
-                    transform.with_index(index.try_into()?);
+                    let index_str = yaml_string(v, TRANSFORM_INDEX)?;
+                    index = Some(index_str.try_into()?);
                }

                TRANSFORM_DEFAULT => {
-                    default_opt = Some(Value::try_from(v)?);
+                    default = Some(Value::try_from(v)?);
                }

                TRANSFORM_ON_FAILURE => {
-                    let on_failure = yaml_string(v, TRANSFORM_ON_FAILURE)?;
-                    transform.with_on_failure(on_failure.parse()?);
+                    let on_failure_str = yaml_string(v, TRANSFORM_ON_FAILURE)?;
+                    on_failure = Some(on_failure_str.parse()?);
                }

                _ => {}
            }
        }
+        let mut final_default = None;

-        if let Some(default) = default_opt {
-            transform.try_default(default)?;
+        if let Some(default_value) = default {
+            match (&type_, &default_value) {
+                (Value::Null, _) => {
+                    return Err(format!(
+                        "transform {:?} type MUST BE set before default {}",
+                        fields, &default_value,
+                    ));
+                }
+                (_, Value::Null) => {} // if default is not set, then it will be regarded as default null
+                (_, _) => {
+                    let target = type_.parse_str_value(default_value.to_str_value().as_str())?;
+                    final_default = Some(target);
+                }
+            }
        }
+        let builder = TransformBuilder {
+            fields,
+            type_,
+            default: final_default,
+            index,
+            on_failure,
+        };

-        Ok(transform)
+        Ok(builder)
    }
 }
--- a/src/pipeline/src/etl/transform/transformer/greptime.rs
+++ b/src/pipeline/src/etl/transform/transformer/greptime.rs
@@ -20,10 +20,10 @@ use coerce::{coerce_columns, coerce_value};
 use greptime_proto::v1::{ColumnSchema, Row, Rows, Value as GreptimeValue};
 use itertools::Itertools;

-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{InputFieldInfo, OneInputOneOutputField};
 use crate::etl::transform::index::Index;
 use crate::etl::transform::{Transform, Transformer, Transforms};
-use crate::etl::value::{Array, Map, Timestamp, Value};
+use crate::etl::value::{Timestamp, Value};

 const DEFAULT_GREPTIME_TIMESTAMP_COLUMN: &str = "greptime_timestamp";

@@ -36,23 +36,41 @@ pub struct GreptimeTransformer {
 }

 impl GreptimeTransformer {
-    fn default_greptime_timestamp_column() -> Transform {
+    /// Add a default timestamp column to the transforms
+    fn add_greptime_timestamp_column(transforms: &mut Transforms) {
        let ns = chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0);
        let type_ = Value::Timestamp(Timestamp::Nanosecond(ns));
        let default = Some(type_.clone());
-        let mut field = Field::new(DEFAULT_GREPTIME_TIMESTAMP_COLUMN);
-        field.insert_output_index(DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string(), 0);
-        let fields = Fields::new(vec![field]).unwrap();

-        Transform {
-            fields,
+        let transform = Transform {
+            real_fields: vec![OneInputOneOutputField::new(
+                InputFieldInfo {
+                    name: DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string(),
+                    index: usize::MAX,
+                },
+                (
+                    DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string(),
+                    transforms
+                        .transforms
+                        .iter()
+                        .map(|x| x.real_fields.len())
+                        .sum(),
+                ),
+            )],
            type_,
            default,
            index: Some(Index::Time),
            on_failure: Some(crate::etl::transform::OnFailure::Default),
-        }
+        };
+        let required_keys = transforms.required_keys_mut();
+        required_keys.push(DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string());
+
+        let output_keys = transforms.output_keys_mut();
+        output_keys.push(DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string());
+        transforms.push(transform);
    }

+    /// Generate the schema for the GreptimeTransformer
    fn schemas(transforms: &Transforms) -> Result<Vec<ColumnSchema>, String> {
        let mut schema = vec![];
        for transform in transforms.iter() {
@@ -60,53 +78,6 @@ impl GreptimeTransformer {
        }
        Ok(schema)
    }
-
-    fn transform_map(&self, map: &Map) -> Result<Row, String> {
-        let mut values = vec![GreptimeValue { value_data: None }; self.schema.len()];
-        for transform in self.transforms.iter() {
-            for field in transform.fields.iter() {
-                let value_data = match map.get(field.get_field_name()) {
-                    Some(val) => coerce_value(val, transform)?,
-                    None => {
-                        let default = transform.get_default();
-                        match default {
-                            Some(default) => coerce_value(default, transform)?,
-                            None => None,
-                        }
-                    }
-                };
-                if let Some(i) = field
-                    .output_fields_index_mapping
-                    .iter()
-                    .next()
-                    .map(|kv| kv.1)
-                {
-                    values[*i] = GreptimeValue { value_data }
-                } else {
-                    return Err(format!(
-                        "field: {} output_fields is empty.",
-                        field.get_field_name()
-                    ));
-                }
-            }
-        }
-
-        Ok(Row { values })
-    }
-
-    fn transform_array(&self, arr: &Array) -> Result<Vec<Row>, String> {
-        let mut rows = Vec::with_capacity(arr.len());
-        for v in arr.iter() {
-            match v {
-                Value::Map(map) => {
-                    let row = self.transform_map(map)?;
-                    rows.push(row);
-                }
-                _ => return Err(format!("Expected map, found: {v:?}")),
-            }
-        }
-        Ok(rows)
-    }
 }

 impl std::fmt::Display for GreptimeTransformer {
@@ -129,9 +100,9 @@ impl Transformer for GreptimeTransformer {

        for transform in transforms.iter() {
            let target_fields_set = transform
-                .fields
+                .real_fields
                .iter()
-                .map(|f| f.get_target_field())
+                .map(|f| f.output_name())
                .collect::<HashSet<_>>();

            let intersections: Vec<_> = column_names_set.intersection(&target_fields_set).collect();
@@ -146,12 +117,15 @@ impl Transformer for GreptimeTransformer {

            if let Some(idx) = transform.index {
                if idx == Index::Time {
-                    match transform.fields.len() {
-                        1 => timestamp_columns.push(transform.fields.first().unwrap().get_field_name()),
-                        _ => return Err(format!(
-                            "Illegal to set multiple timestamp Index columns, please set only one: {}",
-                            transform.fields.get_target_fields().join(", ")
-                        )),
+                    match transform.real_fields.len() {
+                        1 => timestamp_columns
+                            .push(transform.real_fields.first().unwrap().input_name()),
+                        _ => {
+                            return Err(format!(
+                                "Illegal to set multiple timestamp Index columns, please set only one: {}",
+                                transform.real_fields.iter().map(|x|x.input_name()).join(", ")
+                            ))
+                        }
                    }
                }
            }
@@ -159,13 +133,7 @@ impl Transformer for GreptimeTransformer {

        match timestamp_columns.len() {
            0 => {
-                transforms.push(GreptimeTransformer::default_greptime_timestamp_column());
-
-                let required_keys = transforms.required_keys_mut();
-                required_keys.push(DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string());
-
-                let output_keys = transforms.output_keys_mut();
-                output_keys.push(DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string());
+                GreptimeTransformer::add_greptime_timestamp_column(&mut transforms);

                let schema = GreptimeTransformer::schemas(&transforms)?;
                Ok(GreptimeTransformer { transforms, schema })
@@ -184,54 +152,26 @@ impl Transformer for GreptimeTransformer {
        }
    }

-    fn transform(&self, value: Value) -> Result<Self::Output, String> {
-        match value {
-            Value::Map(map) => {
-                let rows = vec![self.transform_map(&map)?];
-                Ok(Rows {
-                    schema: self.schema.clone(),
-                    rows,
-                })
-            }
-            Value::Array(arr) => {
-                let rows = self.transform_array(&arr)?;
-                Ok(Rows {
-                    schema: self.schema.clone(),
-                    rows,
-                })
-            }
-            _ => Err(format!("Expected map or array, found: {}", value)),
-        }
-    }
-
    fn transform_mut(&self, val: &mut Vec<Value>) -> Result<Self::VecOutput, String> {
        let mut values = vec![GreptimeValue { value_data: None }; self.schema.len()];
        for transform in self.transforms.iter() {
-            for field in transform.fields.iter() {
-                let index = field.input_field.index;
+            for field in transform.real_fields.iter() {
+                let index = field.input_index();
+                let output_index = field.output_index();
                match val.get(index) {
                    Some(v) => {
                        let value_data = coerce_value(v, transform)
-                            .map_err(|e| format!("{} processor: {}", field.get_field_name(), e))?;
+                            .map_err(|e| format!("{} processor: {}", field.input_name(), e))?;
                        // every transform fields has only one output field
-                        if let Some(i) = field
-                            .output_fields_index_mapping
-                            .iter()
-                            .next()
-                            .map(|kv| kv.1)
-                        {
-                            values[*i] = GreptimeValue { value_data }
-                        } else {
-                            return Err(format!(
-                                "field: {} output_fields is empty.",
-                                field.get_field_name()
-                            ));
-                        }
+                        values[output_index] = GreptimeValue { value_data };
                    }
-                    _ => {
-                        return Err(format!(
-                            "Get field not in the array field: {field:?}, {val:?}"
-                        ))
+                    None => {
+                        let default = transform.get_default();
+                        let value_data = match default {
+                            Some(default) => coerce_value(default, transform)?,
+                            None => None,
+                        };
+                        values[output_index] = GreptimeValue { value_data };
                    }
                }
            }
--- a/src/pipeline/src/etl/transform/transformer/greptime/coerce.rs
+++ b/src/pipeline/src/etl/transform/transformer/greptime/coerce.rs
@@ -66,8 +66,8 @@ impl TryFrom<Value> for ValueData {
 pub(crate) fn coerce_columns(transform: &Transform) -> Result<Vec<ColumnSchema>, String> {
    let mut columns = Vec::new();

-    for field in transform.fields.iter() {
-        let column_name = field.get_target_field().to_string();
+    for field in transform.real_fields.iter() {
+        let column_name = field.output_name().to_string();

        let datatype = coerce_type(transform)? as i32;

@@ -134,7 +134,7 @@ fn coerce_type(transform: &Transform) -> Result<ColumnDataType, String> {

        Value::Null => Err(format!(
            "Null type not supported when to coerce '{}' type",
-            transform.fields
+            transform.type_.to_str_type()
        )),
    }
 }
@@ -144,15 +144,18 @@ pub(crate) fn coerce_value(
    transform: &Transform,
 ) -> Result<Option<ValueData>, String> {
    match val {
-        Value::Null => match transform.on_failure {
-            Some(OnFailure::Ignore) => Ok(None),
-            Some(OnFailure::Default) => transform
-                .get_default()
-                .map(|default| coerce_value(default, transform))
-                .unwrap_or_else(|| {
-                    coerce_value(transform.get_type_matched_default_val(), transform)
-                }),
-            None => Ok(None),
+        Value::Null => match &transform.default {
+            Some(default) => coerce_value(default, transform),
+            None => match transform.on_failure {
+                Some(OnFailure::Ignore) => Ok(None),
+                Some(OnFailure::Default) => transform
+                    .get_default()
+                    .map(|default| coerce_value(default, transform))
+                    .unwrap_or_else(|| {
+                        coerce_value(transform.get_type_matched_default_val(), transform)
+                    }),
+                None => Ok(None),
+            },
        },

        Value::Int8(n) => coerce_i64_value(*n as i64, transform),
@@ -404,12 +407,11 @@ fn coerce_string_value(s: &String, transform: &Transform) -> Result<Option<Value
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::etl::field::Fields;

    #[test]
    fn test_coerce_string_without_on_failure() {
        let transform = Transform {
-            fields: Fields::default(),
+            real_fields: vec![],
            type_: Value::Int32(0),
            default: None,
            index: None,
@@ -434,7 +436,7 @@ mod tests {
    #[test]
    fn test_coerce_string_with_on_failure_ignore() {
        let transform = Transform {
-            fields: Fields::default(),
+            real_fields: vec![],
            type_: Value::Int32(0),
            default: None,
            index: None,
@@ -449,7 +451,7 @@ mod tests {
    #[test]
    fn test_coerce_string_with_on_failure_default() {
        let mut transform = Transform {
-            fields: Fields::default(),
+            real_fields: vec![],
            type_: Value::Int32(0),
            default: None,
            index: None,
--- a/src/pipeline/src/manager/pipeline_operator.rs
+++ b/src/pipeline/src/manager/pipeline_operator.rs
@@ -110,7 +110,12 @@ impl PipelineOperator {
        // exist in catalog, just open
        if let Some(table) = self
            .catalog_manager
-            .table(&expr.catalog_name, &expr.schema_name, &expr.table_name)
+            .table(
+                &expr.catalog_name,
+                &expr.schema_name,
+                &expr.table_name,
+                Some(&ctx),
+            )
            .await
            .context(CatalogSnafu)?
        {
@@ -130,7 +135,7 @@ impl PipelineOperator {
        // get from catalog
        let table = self
            .catalog_manager
-            .table(catalog, schema, table_name)
+            .table(catalog, schema, table_name, Some(&ctx))
            .await
            .context(CatalogSnafu)?
            .context(PipelineTableNotFoundSnafu)?;
--- a/src/pipeline/tests/common.rs
+++ b/src/pipeline/tests/common.rs
@@ -13,20 +13,45 @@
 // limitations under the License.

 use greptime_proto::v1::{ColumnDataType, ColumnSchema, Rows, SemanticType};
-use pipeline::{parse, Content, GreptimeTransformer, Pipeline, Value};
+use pipeline::{parse, Content, GreptimeTransformer, Pipeline};

 /// test util function to parse and execute pipeline
 pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
-    let input_value: Value = serde_json::from_str::<serde_json::Value>(input_str)
-        .expect("failed to parse into json")
-        .try_into()
-        .expect("failed to convert into value");
+    let input_value = serde_json::from_str::<serde_json::Value>(input_str).unwrap();

    let yaml_content = Content::Yaml(pipeline_yaml.into());
    let pipeline: Pipeline<GreptimeTransformer> =
        parse(&yaml_content).expect("failed to parse pipeline");
+    let mut result = pipeline.init_intermediate_state();

-    pipeline.exec(input_value).expect("failed to exec pipeline")
+    let schema = pipeline.schemas().clone();
+
+    let mut rows = Vec::new();
+
+    match input_value {
+        serde_json::Value::Array(array) => {
+            for value in array {
+                pipeline.prepare(value, &mut result).unwrap();
+                let row = pipeline
+                    .exec_mut(&mut result)
+                    .expect("failed to exec pipeline");
+                rows.push(row);
+                pipeline.reset_intermediate_state(&mut result);
+            }
+        }
+        serde_json::Value::Object(_) => {
+            pipeline.prepare(input_value, &mut result).unwrap();
+            let row = pipeline
+                .exec_mut(&mut result)
+                .expect("failed to exec pipeline");
+            rows.push(row);
+        }
+        _ => {
+            panic!("invalid input value");
+        }
+    }
+
+    Rows { schema, rows }
 }

 /// test util function to create column schema
--- a/src/pipeline/tests/dissect.rs
+++ b/src/pipeline/tests/dissect.rs
@@ -157,7 +157,7 @@ transform:
 fn test_modifier() {
    let empty_str = r#"
 {
-    "str": "key1 key2 key3 key4 key5       key6 key7 key8"
+    "str": "key1 key2 key3 key4 key5       key6"
 }"#;

    let pipeline_yaml = r#"
@@ -165,7 +165,7 @@ processors:
  - dissect:
      field: str
      patterns: 
-        - "%{key1} %{key2} %{+key3} %{+key3/2} %{key5->} %{?key6} %{*key_7} %{&key_7}"
+        - "%{key1} %{key2} %{+key3} %{+key3/2} %{key5->} %{?key6}"

 transform:
  - fields:
@@ -173,7 +173,6 @@ transform:
        - key2
        - key3
        - key5
-        - key7
    type: string
 "#;

@@ -184,7 +183,6 @@ transform:
        make_string_column_schema("key2".to_string()),
        make_string_column_schema("key3".to_string()),
        make_string_column_schema("key5".to_string()),
-        make_string_column_schema("key7".to_string()),
        common::make_column_schema(
            "greptime_timestamp".to_string(),
            ColumnDataType::TimestampNanosecond,
@@ -209,10 +207,6 @@ transform:
        output.rows[0].values[3].value_data,
        Some(StringValue("key5".to_string()))
    );
-    assert_eq!(
-        output.rows[0].values[4].value_data,
-        Some(StringValue("key8".to_string()))
-    );
 }

 #[test]
--- a/src/pipeline/tests/pipeline.rs
+++ b/src/pipeline/tests/pipeline.rs
@@ -12,18 +12,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use api::v1::Rows;
 use common_telemetry::tracing::info;
 use greptime_proto::v1::value::ValueData::{
    BoolValue, F64Value, StringValue, TimestampNanosecondValue, TimestampSecondValue, U32Value,
    U64Value, U8Value,
 };
 use greptime_proto::v1::Value as GreptimeValue;
-use pipeline::{parse, Content, GreptimeTransformer, Pipeline, Value};
+use pipeline::{parse, Content, GreptimeTransformer, Pipeline};

 #[test]
 fn test_complex_data() {
    let input_value_str = r#"
-    [
      {
        "version": 1,
        "streamId": "12345",
@@ -73,12 +73,9 @@ fn test_complex_data() {
        "ewExecutionInfo": "c:4380:7:161:162:161:n:::12473:200|C:4380:3:0:4:0:n:::6967:200|R:4380:20:99:99:1:n:::35982:200",
        "customField": "any-custom-value"
      }
-    ]
 "#;
-    let input_value: Value = serde_json::from_str::<serde_json::Value>(input_value_str)
-        .expect("failed to parse input value")
-        .try_into()
-        .expect("failed to convert input value");
+    let input_value = serde_json::from_str::<serde_json::Value>(input_value_str)
+        .expect("failed to parse input value");

    let pipeline_yaml = r#"
 ---
@@ -422,7 +419,19 @@ transform:
    let yaml_content = Content::Yaml(pipeline_yaml.into());
    let pipeline: Pipeline<GreptimeTransformer> =
        parse(&yaml_content).expect("failed to parse pipeline");
-    let output = pipeline.exec(input_value).expect("failed to exec pipeline");
+    let mut stats = pipeline.init_intermediate_state();
+    pipeline
+        .prepare(input_value, &mut stats)
+        .expect("failed to prepare pipeline");
+
+    let row = pipeline
+        .exec_mut(&mut stats)
+        .expect("failed to exec pipeline");
+
+    let output = Rows {
+        schema: pipeline.schemas().clone(),
+        rows: vec![row],
+    };

    assert_eq!(output.rows.len(), 1);
    let values = output.rows.first().unwrap().values.clone();
@@ -464,10 +473,7 @@ fn test_simple_data() {
    "line": "2024-05-25 20:16:37.217 hello world"
 }
 "#;
-    let input_value: Value = serde_json::from_str::<serde_json::Value>(input_value_str)
-        .unwrap()
-        .try_into()
-        .unwrap();
+    let input_value = serde_json::from_str::<serde_json::Value>(input_value_str).unwrap();

    let pipeline_yaml = r#"
 processors:
@@ -493,11 +499,13 @@ transform:

    let yaml_content = Content::Yaml(pipeline_yaml.into());
    let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();
-    let output = pipeline.exec(input_value).unwrap();
-    let r = output
-        .rows
+
+    let mut status = pipeline.init_intermediate_state();
+    pipeline.prepare(input_value, &mut status).unwrap();
+    let row = pipeline.exec_mut(&mut status).unwrap();
+    let r = row
+        .values
        .into_iter()
-        .flat_map(|v| v.values)
        .map(|v| v.value_data.unwrap())
        .collect::<Vec<_>>();

--- a/src/query/src/datafusion.rs
+++ b/src/query/src/datafusion.rs
@@ -116,7 +116,7 @@ impl DatafusionQueryEngine {
        let default_catalog = &query_ctx.current_catalog().to_owned();
        let default_schema = &query_ctx.current_schema();
        let table_name = dml.table_name.resolve(default_catalog, default_schema);
-        let table = self.find_table(&table_name).await?;
+        let table = self.find_table(&table_name, &query_ctx).await?;

        let output = self
            .exec_query_plan(LogicalPlan::DfPlan((*dml.input).clone()), query_ctx.clone())
@@ -241,14 +241,18 @@ impl DatafusionQueryEngine {
            .context(TableMutationSnafu)
    }

-    async fn find_table(&self, table_name: &ResolvedTableReference) -> Result<TableRef> {
+    async fn find_table(
+        &self,
+        table_name: &ResolvedTableReference,
+        query_context: &QueryContextRef,
+    ) -> Result<TableRef> {
        let catalog_name = table_name.catalog.as_ref();
        let schema_name = table_name.schema.as_ref();
        let table_name = table_name.table.as_ref();

        self.state
            .catalog_manager()
-            .table(catalog_name, schema_name, table_name)
+            .table(catalog_name, schema_name, table_name, Some(query_context))
            .await
            .context(CatalogSnafu)?
            .with_context(|| TableNotFoundSnafu { table: table_name })
@@ -529,7 +533,7 @@ mod tests {
    use datatypes::prelude::ConcreteDataType;
    use datatypes::schema::ColumnSchema;
    use datatypes::vectors::{Helper, UInt32Vector, UInt64Vector, VectorRef};
-    use session::context::QueryContext;
+    use session::context::{QueryContext, QueryContextBuilder};
    use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};

    use super::*;
@@ -618,12 +622,16 @@ mod tests {
            .as_any()
            .downcast_ref::<DatafusionQueryEngine>()
            .unwrap();
+        let query_ctx = Arc::new(QueryContextBuilder::default().build());
        let table = engine
-            .find_table(&ResolvedTableReference {
-                catalog: "greptime".into(),
-                schema: "public".into(),
-                table: "numbers".into(),
-            })
+            .find_table(
+                &ResolvedTableReference {
+                    catalog: "greptime".into(),
+                    schema: "public".into(),
+                    table: "numbers".into(),
+                },
+                &query_ctx,
+            )
            .await
            .unwrap();

--- a/src/query/src/datafusion/planner.rs
+++ b/src/query/src/datafusion/planner.rs
@@ -61,7 +61,7 @@ impl DfContextProviderAdapter {
        let mut table_provider = DfTableSourceProvider::new(
            engine_state.catalog_manager().clone(),
            engine_state.disallow_cross_catalog_query(),
-            query_ctx.as_ref(),
+            query_ctx.clone(),
            Arc::new(DefaultPlanDecoder::new(session_state.clone(), &query_ctx)?),
            session_state
                .config_options()
--- a/src/query/src/dist_plan/planner.rs
+++ b/src/query/src/dist_plan/planner.rs
@@ -128,6 +128,7 @@ impl DistExtensionPlanner {
                &table_name.catalog_name,
                &table_name.schema_name,
                &table_name.table_name,
+                None,
            )
            .await
            .context(CatalogSnafu)?
--- a/src/query/src/planner.rs
+++ b/src/query/src/planner.rs
@@ -68,7 +68,7 @@ impl DfLogicalPlanner {
        let table_provider = DfTableSourceProvider::new(
            self.engine_state.catalog_manager().clone(),
            self.engine_state.disallow_cross_catalog_query(),
-            query_ctx.as_ref(),
+            query_ctx.clone(),
            Arc::new(DefaultPlanDecoder::new(
                self.session_state.clone(),
                &query_ctx,
@@ -144,14 +144,15 @@ impl DfLogicalPlanner {

    #[tracing::instrument(skip_all)]
    async fn plan_pql(&self, stmt: EvalStmt, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
+        let plan_decoder = Arc::new(DefaultPlanDecoder::new(
+            self.session_state.clone(),
+            &query_ctx,
+        )?);
        let table_provider = DfTableSourceProvider::new(
            self.engine_state.catalog_manager().clone(),
            self.engine_state.disallow_cross_catalog_query(),
-            query_ctx.as_ref(),
-            Arc::new(DefaultPlanDecoder::new(
-                self.session_state.clone(),
-                &query_ctx,
-            )?),
+            query_ctx,
+            plan_decoder,
            self.session_state
                .config_options()
                .sql_parser
--- a/src/query/src/promql/planner.rs
+++ b/src/query/src/promql/planner.rs
@@ -2379,7 +2379,7 @@ mod test {
        DfTableSourceProvider::new(
            catalog_list,
            false,
-            QueryContext::arc().as_ref(),
+            QueryContext::arc(),
            DummyDecoder::arc(),
            false,
        )
@@ -3219,7 +3219,7 @@ mod test {
            DfTableSourceProvider::new(
                catalog_list.clone(),
                false,
-                QueryContext::arc().as_ref(),
+                QueryContext::arc(),
                DummyDecoder::arc(),
                true,
            ),
@@ -3249,7 +3249,7 @@ mod test {
            DfTableSourceProvider::new(
                catalog_list.clone(),
                false,
-                QueryContext::arc().as_ref(),
+                QueryContext::arc(),
                DummyDecoder::arc(),
                true,
            ),
--- a/src/query/src/sql.rs
+++ b/src/query/src/sql.rs
@@ -232,6 +232,7 @@ async fn query_from_information_schema_table(
            query_ctx.current_catalog(),
            INFORMATION_SCHEMA_NAME,
            table_name,
+            Some(&query_ctx),
        )
        .await
        .context(error::CatalogSnafu)?
--- a/src/servers/src/http.rs
+++ b/src/servers/src/http.rs
@@ -753,6 +753,7 @@ impl HttpServer {
                "/pipelines/:pipeline_name",
                routing::delete(event::delete_pipeline),
            )
+            .route("/pipelines/dryrun", routing::post(event::pipeline_dryrun))
            .layer(
                ServiceBuilder::new()
                    .layer(HandleErrorLayer::new(handle_error))
--- a/src/servers/src/http/event.rs
+++ b/src/servers/src/http/event.rs
@@ -23,15 +23,16 @@ use axum::headers::ContentType;
 use axum::http::header::CONTENT_TYPE;
 use axum::http::{Request, StatusCode};
 use axum::response::{IntoResponse, Response};
-use axum::{async_trait, BoxError, Extension, TypedHeader};
+use axum::{async_trait, BoxError, Extension, Json, TypedHeader};
 use common_query::{Output, OutputData};
 use common_telemetry::{error, warn};
+use datatypes::value::column_data_to_json;
 use pipeline::error::PipelineTransformSnafu;
 use pipeline::util::to_pipeline_version;
 use pipeline::PipelineVersion;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
-use serde_json::{Deserializer, Value};
+use serde_json::{Deserializer, Map, Value};
 use session::context::{Channel, QueryContext, QueryContextRef};
 use snafu::{ensure, OptionExt, ResultExt};

@@ -230,6 +231,117 @@ fn transform_ndjson_array_factory(
        })
 }

+#[axum_macros::debug_handler]
+pub async fn pipeline_dryrun(
+    State(log_state): State<LogState>,
+    Query(query_params): Query<LogIngesterQueryParams>,
+    Extension(mut query_ctx): Extension<QueryContext>,
+    TypedHeader(content_type): TypedHeader<ContentType>,
+    payload: String,
+) -> Result<Response> {
+    let handler = log_state.log_handler;
+    let pipeline_name = query_params.pipeline_name.context(InvalidParameterSnafu {
+        reason: "pipeline_name is required",
+    })?;
+
+    let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?;
+
+    let ignore_errors = query_params.ignore_errors.unwrap_or(false);
+
+    let value = extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?;
+
+    if value.len() > 10 {
+        return Err(InvalidParameterSnafu {
+            reason: "too many rows for dryrun",
+        }
+        .build());
+    }
+
+    query_ctx.set_channel(Channel::Http);
+    let query_ctx = Arc::new(query_ctx);
+
+    let pipeline = handler
+        .get_pipeline(&pipeline_name, version, query_ctx.clone())
+        .await?;
+
+    let mut intermediate_state = pipeline.init_intermediate_state();
+
+    let mut results = Vec::with_capacity(value.len());
+    for v in value {
+        pipeline
+            .prepare(v, &mut intermediate_state)
+            .map_err(|reason| PipelineTransformSnafu { reason }.build())
+            .context(PipelineSnafu)?;
+        let r = pipeline
+            .exec_mut(&mut intermediate_state)
+            .map_err(|reason| PipelineTransformSnafu { reason }.build())
+            .context(PipelineSnafu)?;
+        results.push(r);
+        pipeline.reset_intermediate_state(&mut intermediate_state);
+    }
+
+    let colume_type_key = "colume_type";
+    let data_type_key = "data_type";
+    let name_key = "name";
+
+    let schema = pipeline
+        .schemas()
+        .iter()
+        .map(|cs| {
+            let mut map = Map::new();
+            map.insert(name_key.to_string(), Value::String(cs.column_name.clone()));
+            map.insert(
+                data_type_key.to_string(),
+                Value::String(cs.datatype().as_str_name().to_string()),
+            );
+            map.insert(
+                colume_type_key.to_string(),
+                Value::String(cs.semantic_type().as_str_name().to_string()),
+            );
+            map.insert(
+                "fulltext".to_string(),
+                Value::Bool(
+                    cs.options
+                        .clone()
+                        .is_some_and(|x| x.options.contains_key("fulltext")),
+                ),
+            );
+            Value::Object(map)
+        })
+        .collect::<Vec<_>>();
+    let rows = results
+        .into_iter()
+        .map(|row| {
+            let row = row
+                .values
+                .into_iter()
+                .enumerate()
+                .map(|(idx, v)| {
+                    v.value_data
+                        .map(|d| {
+                            let mut map = Map::new();
+                            map.insert("value".to_string(), column_data_to_json(d));
+                            map.insert("key".to_string(), schema[idx][name_key].clone());
+                            map.insert(
+                                "semantic_type".to_string(),
+                                schema[idx][colume_type_key].clone(),
+                            );
+                            map.insert("data_type".to_string(), schema[idx][data_type_key].clone());
+                            Value::Object(map)
+                        })
+                        .unwrap_or(Value::Null)
+                })
+                .collect();
+            Value::Array(row)
+        })
+        .collect::<Vec<_>>();
+    let mut result = Map::new();
+    result.insert("schema".to_string(), Value::Array(schema));
+    result.insert("rows".to_string(), Value::Array(rows));
+    let result = Value::Object(result);
+    Ok(Json(result).into_response())
+}
+
 #[axum_macros::debug_handler]
 pub async fn log_ingester(
    State(log_state): State<LogState>,
--- a/src/servers/src/http/prometheus.rs
+++ b/src/servers/src/http/prometheus.rs
@@ -405,11 +405,11 @@ async fn get_all_column_names(
    schema: &str,
    manager: &CatalogManagerRef,
 ) -> std::result::Result<HashSet<String>, catalog::error::Error> {
-    let table_names = manager.table_names(catalog, schema).await?;
+    let table_names = manager.table_names(catalog, schema, None).await?;

    let mut labels = HashSet::new();
    for table_name in table_names {
-        let Some(table) = manager.table(catalog, schema, &table_name).await? else {
+        let Some(table) = manager.table(catalog, schema, &table_name, None).await? else {
            continue;
        };
        for column in table.primary_key_columns() {
@@ -436,6 +436,7 @@ async fn retrieve_series_from_query_result(
            query_ctx.current_catalog(),
            &query_ctx.current_schema(),
            table_name,
+            Some(query_ctx),
        )
        .await
        .context(CatalogSnafu)?
@@ -691,7 +692,7 @@ pub async fn label_values_query(
    if label_name == METRIC_NAME_LABEL {
        let mut table_names = match handler
            .catalog_manager()
-            .table_names(&catalog, &schema)
+            .table_names(&catalog, &schema, Some(&query_ctx))
            .await
        {
            Ok(table_names) => table_names,
@@ -777,7 +778,11 @@ async fn retrieve_field_names(

    if matches.is_empty() {
        // query all tables if no matcher is provided
-        while let Some(table) = manager.tables(catalog, &schema).next().await {
+        while let Some(table) = manager
+            .tables(catalog, &schema, Some(query_ctx))
+            .next()
+            .await
+        {
            let table = table.context(CatalogSnafu)?;
            for column in table.field_columns() {
                field_columns.insert(column.name);
@@ -788,7 +793,7 @@ async fn retrieve_field_names(

    for table_name in matches {
        let table = manager
-            .table(catalog, &schema, &table_name)
+            .table(catalog, &schema, &table_name, Some(query_ctx))
            .await
            .context(CatalogSnafu)?
            .with_context(|| TableNotFoundSnafu {
--- a/src/session/src/context.rs
+++ b/src/session/src/context.rs
@@ -261,6 +261,7 @@ impl QueryContext {

 impl QueryContextBuilder {
    pub fn build(self) -> QueryContext {
+        let channel = self.channel.unwrap_or_default();
        QueryContext {
            current_catalog: self
                .current_catalog
@@ -270,8 +271,10 @@ impl QueryContextBuilder {
                .sql_dialect
                .unwrap_or_else(|| Arc::new(GreptimeDbDialect {})),
            extensions: self.extensions.unwrap_or_default(),
-            configuration_parameter: self.configuration_parameter.unwrap_or_default(),
-            channel: self.channel.unwrap_or_default(),
+            configuration_parameter: self
+                .configuration_parameter
+                .unwrap_or_else(|| Arc::new(ConfigurationVariables::default())),
+            channel,
        }
    }

--- a/src/store-api/src/region_engine.rs
+++ b/src/store-api/src/region_engine.rs
@@ -233,6 +233,9 @@ pub trait RegionScanner: Debug + DisplayAs + Send {
    /// # Panics
    /// Panics if the `partition` is out of bound.
    fn scan_partition(&self, partition: usize) -> Result<SendableRecordBatchStream, BoxedError>;
+
+    /// Check if there is any predicate that may be executed in this scanner.
+    fn has_predicate(&self) -> bool;
 }

 pub type RegionScannerRef = Box<dyn RegionScanner>;
@@ -367,6 +370,10 @@ impl RegionScanner for SinglePartitionScanner {
            ))
        })
    }
+
+    fn has_predicate(&self) -> bool {
+        false
+    }
 }

 impl DisplayAs for SinglePartitionScanner {
--- a/src/table/src/table/scan.rs
+++ b/src/table/src/table/scan.rs
@@ -180,7 +180,7 @@ impl ExecutionPlan for RegionScanExec {
    }

    fn statistics(&self) -> DfResult<Statistics> {
-        let statistics = if self.append_mode {
+        let statistics = if self.append_mode && !self.scanner.lock().unwrap().has_predicate() {
            let column_statistics = self
                .arrow_schema
                .fields
--- a/tests-integration/src/grpc.rs
+++ b/tests-integration/src/grpc.rs
@@ -181,7 +181,8 @@ mod test {
            .table(
                "greptime",
                "database_created_through_grpc",
-                "table_created_through_grpc"
+                "table_created_through_grpc",
+                None,
            )
            .await
            .unwrap()
@@ -510,7 +511,7 @@ CREATE TABLE {table_name} (
        let table = instance
            .frontend()
            .catalog_manager()
-            .table("greptime", "public", table_name)
+            .table("greptime", "public", table_name, None)
            .await
            .unwrap()
            .unwrap();
--- a/tests-integration/src/instance.rs
+++ b/tests-integration/src/instance.rs
@@ -278,7 +278,7 @@ mod tests {
        assert!(instance
            .frontend()
            .catalog_manager()
-            .table("greptime", "public", "demo")
+            .table("greptime", "public", "demo", None)
            .await
            .unwrap()
            .is_none())
--- a/tests-integration/src/tests/instance_test.rs
+++ b/tests-integration/src/tests/instance_test.rs
@@ -462,7 +462,6 @@ async fn test_execute_show_databases_tables(instance: Arc<dyn MockInstance>) {
 +--------------------+
 | greptime_private   |
 | information_schema |
-| pg_catalog         |
 | public             |
 +--------------------+\
 ";
@@ -1900,7 +1899,6 @@ async fn test_show_databases(instance: Arc<dyn MockInstance>) {
 +--------------------+
 | greptime_private   |
 | information_schema |
-| pg_catalog         |
 | public             |
 +--------------------+";
    check_output_stream(output, expected).await;
@@ -1914,7 +1912,6 @@ async fn test_show_databases(instance: Arc<dyn MockInstance>) {
 | Database           |
 +--------------------+
 | information_schema |
-| pg_catalog         |
 +--------------------+";
    check_output_stream(output, expected).await;
 }
--- a/tests-integration/tests/http.rs
+++ b/tests-integration/tests/http.rs
@@ -78,6 +78,7 @@ macro_rules! http_tests {
                test_vm_proto_remote_write,

                test_pipeline_api,
+                test_test_pipeline_api,
                test_plain_text_ingestion,
            );
        )*
@@ -1146,6 +1147,171 @@ transform:
    guard.remove_all().await;
 }

+pub async fn test_test_pipeline_api(store_type: StorageType) {
+    common_telemetry::init_default_ut_logging();
+    let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "test_pipeline_api").await;
+
+    // handshake
+    let client = TestClient::new(app);
+
+    let body = r#"
+processors:
+  - date:
+      field: time
+      formats:
+        - "%Y-%m-%d %H:%M:%S%.3f"
+      ignore_missing: true
+
+transform:
+  - fields:
+      - id1
+      - id2
+    type: int32
+  - fields:
+      - type
+      - log
+      - logger
+    type: string
+  - field: time
+    type: time
+    index: timestamp
+"#;
+
+    // 1. create pipeline
+    let res = client
+        .post("/v1/events/pipelines/test")
+        .header("Content-Type", "application/x-yaml")
+        .body(body)
+        .send()
+        .await;
+
+    assert_eq!(res.status(), StatusCode::OK);
+
+    let content = res.text().await;
+
+    let content = serde_json::from_str(&content);
+    assert!(content.is_ok());
+    //  {"execution_time_ms":13,"pipelines":[{"name":"test","version":"2024-07-04 08:31:00.987136"}]}
+    let content: Value = content.unwrap();
+
+    let execution_time = content.get("execution_time_ms");
+    assert!(execution_time.unwrap().is_number());
+    let pipelines = content.get("pipelines");
+    let pipelines = pipelines.unwrap().as_array().unwrap();
+    assert_eq!(pipelines.len(), 1);
+    let pipeline = pipelines.first().unwrap();
+    assert_eq!(pipeline.get("name").unwrap(), "test");
+
+    // 2. write data
+    let data_body = r#"
+        [
+          {
+            "id1": "2436",
+            "id2": "2528",
+            "logger": "INTERACT.MANAGER",
+            "type": "I",
+            "time": "2024-05-25 20:16:37.217",
+            "log": "ClusterAdapter:enter sendTextDataToCluster\\n"
+          }
+        ]
+        "#;
+    let res = client
+        .post("/v1/events/pipelines/dryrun?pipeline_name=test")
+        .header("Content-Type", "application/json")
+        .body(data_body)
+        .send()
+        .await;
+    assert_eq!(res.status(), StatusCode::OK);
+    let body: serde_json::Value = res.json().await;
+    let schema = &body["schema"];
+    let rows = &body["rows"];
+    assert_eq!(
+        schema,
+        &json!([
+            {
+                "colume_type": "FIELD",
+                "data_type": "INT32",
+                "fulltext": false,
+                "name": "id1"
+            },
+            {
+                "colume_type": "FIELD",
+                "data_type": "INT32",
+                "fulltext": false,
+                "name": "id2"
+            },
+            {
+                "colume_type": "FIELD",
+                "data_type": "STRING",
+                "fulltext": false,
+                "name": "type"
+            },
+            {
+                "colume_type": "FIELD",
+                "data_type": "STRING",
+                "fulltext": false,
+                "name": "log"
+            },
+            {
+                "colume_type": "FIELD",
+                "data_type": "STRING",
+                "fulltext": false,
+                "name": "logger"
+            },
+            {
+                "colume_type": "TIMESTAMP",
+                "data_type": "TIMESTAMP_NANOSECOND",
+                "fulltext": false,
+                "name": "time"
+            }
+        ])
+    );
+    assert_eq!(
+        rows,
+        &json!([
+            [
+                {
+                    "data_type": "INT32",
+                    "key": "id1",
+                    "semantic_type": "FIELD",
+                    "value": 2436
+                },
+                {
+                    "data_type": "INT32",
+                    "key": "id2",
+                    "semantic_type": "FIELD",
+                    "value": 2528
+                },
+                {
+                    "data_type": "STRING",
+                    "key": "type",
+                    "semantic_type": "FIELD",
+                    "value": "I"
+                },
+                {
+                    "data_type": "STRING",
+                    "key": "log",
+                    "semantic_type": "FIELD",
+                    "value": "ClusterAdapter:enter sendTextDataToCluster\\n"
+                },
+                {
+                    "data_type": "STRING",
+                    "key": "logger",
+                    "semantic_type": "FIELD",
+                    "value": "INTERACT.MANAGER"
+                },
+                {
+                    "data_type": "TIMESTAMP_NANOSECOND",
+                    "key": "time",
+                    "semantic_type": "TIMESTAMP",
+                    "value": "2024-05-25 20:16:37.217+0000"
+                }
+            ]
+        ])
+    );
+    guard.remove_all().await;
+}
+
 pub async fn test_plain_text_ingestion(store_type: StorageType) {
    common_telemetry::init_default_ut_logging();
    let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "test_pipeline_api").await;
--- a/tests-integration/tests/region_migration.rs
+++ b/tests-integration/tests/region_migration.rs
@@ -1013,7 +1013,7 @@ async fn prepare_testing_metric_table(cluster: &GreptimeDbCluster) -> TableId {
    let table = cluster
        .frontend
        .catalog_manager()
-        .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy")
+        .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "phy", None)
        .await
        .unwrap()
        .unwrap();
@@ -1039,7 +1039,12 @@ async fn prepare_testing_table(cluster: &GreptimeDbCluster) -> TableId {
    let table = cluster
        .frontend
        .catalog_manager()
-        .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, TEST_TABLE_NAME)
+        .table(
+            DEFAULT_CATALOG_NAME,
+            DEFAULT_SCHEMA_NAME,
+            TEST_TABLE_NAME,
+            None,
+        )
        .await
        .unwrap()
        .unwrap();
--- a/tests/cases/standalone/common/aggregate/count.result
+++ b/tests/cases/standalone/common/aggregate/count.result
@@ -54,3 +54,50 @@ drop table test;

 Affected Rows: 0

+-- Append table
+create table count_where_bug (
+    tag String,
+    ts TimestampMillisecond time index,
+    num Int64,
+    primary key (tag),
+) engine=mito with('append_mode'='true');
+
+Affected Rows: 0
+
+insert into count_where_bug (tag, ts, num)
+values  ('a', '2024-09-06T06:00:01Z', 1),
+        ('a', '2024-09-06T06:00:02Z', 2),
+        ('a', '2024-09-06T06:00:03Z', 3),
+        ('b', '2024-09-06T06:00:04Z', 4),
+        ('b', '2024-09-06T06:00:05Z', 5);
+
+Affected Rows: 5
+
+select count(1) from count_where_bug where tag = 'b';
+
+-----------------+
+| COUNT(Int64(1)) |
+-----------------+
+| 2               |
+-----------------+
+
+select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z';
+
+-----------------+
+| COUNT(Int64(1)) |
+-----------------+
+| 1               |
+-----------------+
+
+select count(1) from count_where_bug where num != 3;
+
+-----------------+
+| COUNT(Int64(1)) |
+-----------------+
+| 4               |
+-----------------+
+
+drop table count_where_bug;
+
+Affected Rows: 0
+
--- a/tests/cases/standalone/common/aggregate/count.sql
+++ b/tests/cases/standalone/common/aggregate/count.sql
@@ -17,3 +17,27 @@ select count(*) from (select * from test cross join "HelloWorld");
 drop table "HelloWorld";

 drop table test;
+
+-- Append table
+
+create table count_where_bug (
+    tag String,
+    ts TimestampMillisecond time index,
+    num Int64,
+    primary key (tag),
+) engine=mito with('append_mode'='true');
+
+insert into count_where_bug (tag, ts, num)
+values  ('a', '2024-09-06T06:00:01Z', 1),
+        ('a', '2024-09-06T06:00:02Z', 2),
+        ('a', '2024-09-06T06:00:03Z', 3),
+        ('b', '2024-09-06T06:00:04Z', 4),
+        ('b', '2024-09-06T06:00:05Z', 5);
+
+select count(1) from count_where_bug where tag = 'b';
+
+select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z';
+
+select count(1) from count_where_bug where num != 3;
+
+drop table count_where_bug;
--- a/tests/cases/standalone/common/create/create_database.result
+++ b/tests/cases/standalone/common/create/create_database.result
@@ -18,7 +18,6 @@ show databases;
 | greptime_private   |
 | illegal-database   |
 | information_schema |
-| pg_catalog         |
 | public             |
 +--------------------+

--- a/tests/cases/standalone/common/create/create_database_opts.result
+++ b/tests/cases/standalone/common/create/create_database_opts.result
@@ -10,7 +10,6 @@ SHOW DATABASES;
 | greptime_private   |
 | information_schema |
 | mydb               |
-| pg_catalog         |
 | public             |
 +--------------------+

@@ -22,7 +21,6 @@ SHOW FULL DATABASES;
 | greptime_private   |          |
 | information_schema |          |
 | mydb               | ttl='1h' |
-| pg_catalog         |          |
 | public             |          |
 +--------------------+----------+

@@ -78,7 +76,6 @@ SHOW DATABASES;
 +--------------------+
 | greptime_private   |
 | information_schema |
-| pg_catalog         |
 | public             |
 +--------------------+

--- a/tests/cases/standalone/common/information_schema/tables.result
+++ b/tests/cases/standalone/common/information_schema/tables.result
@@ -24,16 +24,13 @@ Affected Rows: 0

 select table_catalog, table_schema, table_name from information_schema.tables where table_schema != 'information_schema';

-+---------------+--------------+--------------+
-| table_catalog | table_schema | table_name   |
-+---------------+--------------+--------------+
-| greptime      | abc          | t            |
-| greptime      | abcde        | t            |
-| greptime      | pg_catalog   | pg_class     |
-| greptime      | pg_catalog   | pg_type      |
-| greptime      | pg_catalog   | pg_namespace |
-| greptime      | public       | numbers      |
-+---------------+--------------+--------------+
+---------------+--------------+------------+
+| table_catalog | table_schema | table_name |
+---------------+--------------+------------+
+| greptime      | abc          | t          |
+| greptime      | abcde        | t          |
+| greptime      | public       | numbers    |
+---------------+--------------+------------+

 use public;

--- a/tests/cases/standalone/common/show/show_databases_tables.result
+++ b/tests/cases/standalone/common/show/show_databases_tables.result
@@ -5,7 +5,6 @@ SHOW DATABASES;
 +--------------------+
 | greptime_private   |
 | information_schema |
-| pg_catalog         |
 | public             |
 +--------------------+

@@ -16,7 +15,6 @@ SHOW FULL DATABASES;
 +--------------------+---------+
 | greptime_private   |         |
 | information_schema |         |
-| pg_catalog         |         |
 | public             |         |
 +--------------------+---------+

--- a/tests/cases/standalone/common/system/information_schema.result
+++ b/tests/cases/standalone/common/system/information_schema.result
@@ -45,9 +45,6 @@ order by table_schema, table_name;
 |greptime|information_schema|tables|LOCALTEMPORARY|3|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y|
 |greptime|information_schema|triggers|LOCALTEMPORARY|24|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y|
 |greptime|information_schema|views|LOCALTEMPORARY|32|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y|
-|greptime|pg_catalog|pg_class|LOCALTEMPORARY|256|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y|
-|greptime|pg_catalog|pg_namespace|LOCALTEMPORARY|258|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y|
-|greptime|pg_catalog|pg_type|LOCALTEMPORARY|257|0|0|0|0|0||11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y|
 |greptime|public|numbers|LOCALTEMPORARY|2|0|0|0|0|0|test_engine|11|Fixed|0|0|0|DATETIME|||utf8_bin|0|||Y|
 +++++++++++++++++++++++++

@@ -413,16 +410,6 @@ select * from information_schema.columns order by table_schema, table_name, colu
 | greptime      | information_schema | views                                 | table_name                        | 3                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string          | FIELD         |                | No          | string          |                |        |
 | greptime      | information_schema | views                                 | table_schema                      | 2                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string          | FIELD         |                | No          | string          |                |        |
 | greptime      | information_schema | views                                 | view_definition                   | 4                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string          | FIELD         |                | No          | string          |                |        |
-| greptime      | pg_catalog         | pg_class                              | oid                               | 1                |                          |                        | 10                | 0             |                    |                    |                |            |       | select,insert |                       | UInt32               | int unsigned    | FIELD         |                | No          | int unsigned    |                |        |
-| greptime      | pg_catalog         | pg_class                              | relkind                           | 4                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string          | FIELD         |                | No          | string          |                |        |
-| greptime      | pg_catalog         | pg_class                              | relname                           | 2                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string          | FIELD         |                | No          | string          |                |        |
-| greptime      | pg_catalog         | pg_class                              | relnamespace                      | 3                |                          |                        | 10                | 0             |                    |                    |                |            |       | select,insert |                       | UInt32               | int unsigned    | FIELD         |                | No          | int unsigned    |                |        |
-| greptime      | pg_catalog         | pg_class                              | relowner                          | 5                |                          |                        | 10                | 0             |                    |                    |                |            |       | select,insert |                       | UInt32               | int unsigned    | FIELD         |                | No          | int unsigned    |                |        |
-| greptime      | pg_catalog         | pg_namespace                          | nspname                           | 2                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string          | FIELD         |                | No          | string          |                |        |
-| greptime      | pg_catalog         | pg_namespace                          | oid                               | 1                |                          |                        | 10                | 0             |                    |                    |                |            |       | select,insert |                       | UInt32               | int unsigned    | FIELD         |                | No          | int unsigned    |                |        |
-| greptime      | pg_catalog         | pg_type                               | oid                               | 1                |                          |                        | 10                | 0             |                    |                    |                |            |       | select,insert |                       | UInt32               | int unsigned    | FIELD         |                | No          | int unsigned    |                |        |
-| greptime      | pg_catalog         | pg_type                               | typlen                            | 3                |                          |                        | 5                 | 0             |                    |                    |                |            |       | select,insert |                       | Int16                | smallint        | FIELD         |                | No          | smallint        |                |        |
-| greptime      | pg_catalog         | pg_type                               | typname                           | 2                | 2147483647               | 2147483647             |                   |               |                    | utf8               | utf8_bin       |            |       | select,insert |                       | String               | string          | FIELD         |                | No          | string          |                |        |
 | greptime      | public             | numbers                               | number                            | 1                |                          |                        | 10                | 0             |                    |                    |                | PRI        |       | select,insert |                       | UInt32               | int unsigned    | TAG           |                | No          | int unsigned    |                |        |
 +---------------+--------------------+---------------------------------------+-----------------------------------+------------------+--------------------------+------------------------+-------------------+---------------+--------------------+--------------------+----------------+------------+-------+---------------+-----------------------+----------------------+-----------------+---------------+----------------+-------------+-----------------+----------------+--------+

@@ -596,7 +583,6 @@ select * from schemata where catalog_name = 'greptime' and schema_name != 'publi
 +--------------+--------------------+----------------------------+------------------------+----------+---------+
 | greptime     | greptime_private   | utf8                       | utf8_bin               |          |         |
 | greptime     | information_schema | utf8                       | utf8_bin               |          |         |
-| greptime     | pg_catalog         | utf8                       | utf8_bin               |          |         |
 +--------------+--------------------+----------------------------+------------------------+----------+---------+

 -- test engines
--- a/tests/cases/standalone/common/system/pg_catalog.result
+++ b/tests/cases/standalone/common/system/pg_catalog.result
@@ -5,30 +5,7 @@ Error: 1004(InvalidArguments), Schema pg_catalog already exists

 select * from pg_catalog.pg_type order by oid;

-+-----+-----------+--------+
-| oid | typname   | typlen |
-+-----+-----------+--------+
-| 1   | String    | -1     |
-| 2   | Binary    | -1     |
-| 3   | Int8      | 1      |
-| 4   | Int16     | 2      |
-| 5   | Int32     | 4      |
-| 6   | Int64     | 8      |
-| 7   | UInt8     | 1      |
-| 8   | UInt16    | 2      |
-| 9   | UInt32    | 4      |
-| 10  | UInt64    | 8      |
-| 11  | Float32   | 4      |
-| 12  | Float64   | 8      |
-| 13  | Decimal   | 16     |
-| 14  | Date      | 4      |
-| 15  | DateTime  | 8      |
-| 16  | Timestamp | 8      |
-| 17  | Time      | 8      |
-| 18  | Duration  | 8      |
-| 19  | Interval  | 16     |
-| 20  | List      | -1     |
-+-----+-----------+--------+
+Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_type

 -- \d
 SELECT n.nspname as "Schema",
@@ -44,11 +21,7 @@ WHERE c.relkind IN ('r','p','v','m','S','f','')
  AND pg_catalog.pg_table_is_visible(c.oid)
 ORDER BY 1,2;

-+--------+---------+-------+-------+
-| Schema | Name    | Type  | Owner |
-+--------+---------+-------+-------+
-| public | numbers | table |       |
-+--------+---------+-------+-------+
+Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class

 -- \dt
 SELECT n.nspname as "Schema",
@@ -64,11 +37,7 @@ WHERE c.relkind IN ('r','p','')
  AND pg_catalog.pg_table_is_visible(c.oid)
 ORDER BY 1,2;

-+--------+---------+-------+-------+
-| Schema | Name    | Type  | Owner |
-+--------+---------+-------+-------+
-| public | numbers | table |       |
-+--------+---------+-------+-------+
+Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class

 -- make sure oid of namespace keep stable
 SELECT * FROM pg_namespace ORDER BY oid;
@@ -100,11 +69,7 @@ where relnamespace = (
    where nspname = 'my_db'
 );

-+---------+
-| relname |
-+---------+
-| foo     |
-+---------+
+Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class

 -- \dt
 SELECT n.nspname as "Schema",
@@ -120,12 +85,7 @@ WHERE c.relkind IN ('r','p','')
  AND pg_catalog.pg_table_is_visible(c.oid)
 ORDER BY 1,2;

-+--------+---------+-------+-------+
-| Schema | Name    | Type  | Owner |
-+--------+---------+-------+-------+
-| my_db  | foo     | table |       |
-| public | numbers | table |       |
-+--------+---------+-------+-------+
+Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class

 -- show tables in `my_db`, `public`
 select relname
@@ -137,12 +97,7 @@ where relnamespace in (
 )
 order by relname;

-+---------+
-| relname |
-+---------+
-| foo     |
-| numbers |
-+---------+
+Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class

 select relname
 from pg_catalog.pg_class
@@ -152,11 +107,7 @@ where relnamespace in (
    where nspname like 'my%'
 );

-+---------+
-| relname |
-+---------+
-| foo     |
-+---------+
+Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class

 select relnamespace, relname, relkind
 from pg_catalog.pg_class
@@ -169,11 +120,7 @@ where relnamespace in (
 )
 order by relnamespace, relname;

-+--------------+---------+---------+
-| relnamespace | relname | relkind |
-+--------------+---------+---------+
-| 434869349    | foo     | r       |
-+--------------+---------+---------+
+Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class

 use public;

@@ -190,24 +137,11 @@ Affected Rows: 0
 -- pg_class
 desc table pg_class;

-+--------------+--------+-----+------+---------+---------------+
-| Column       | Type   | Key | Null | Default | Semantic Type |
-+--------------+--------+-----+------+---------+---------------+
-| oid          | UInt32 |     | NO   |         | FIELD         |
-| relname      | String |     | NO   |         | FIELD         |
-| relnamespace | UInt32 |     | NO   |         | FIELD         |
-| relkind      | String |     | NO   |         | FIELD         |
-| relowner     | UInt32 |     | NO   |         | FIELD         |
-+--------------+--------+-----+------+---------+---------------+
+Error: 4001(TableNotFound), Table not found: pg_class

 desc table pg_namespace;

-+---------+--------+-----+------+---------+---------------+
-| Column  | Type   | Key | Null | Default | Semantic Type |
-+---------+--------+-----+------+---------+---------------+
-| oid     | UInt32 |     | NO   |         | FIELD         |
-| nspname | String |     | NO   |         | FIELD         |
-+---------+--------+-----+------+---------+---------------+
+Error: 4001(TableNotFound), Table not found: pg_namespace

 drop table my_db.foo;

--- a/tests/cases/standalone/common/view/create.result
+++ b/tests/cases/standalone/common/view/create.result
@@ -77,11 +77,7 @@ WHERE c.relkind IN ('v','')
  AND pg_catalog.pg_table_is_visible(c.oid)
 ORDER BY 1,2;

-+--------+-----------+------+-------+
-| Schema | Name      | Type | Owner |
-+--------+-----------+------+-------+
-| public | test_view | view |       |
-+--------+-----------+------+-------+
+Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class

 -- SQLNESS REPLACE (\s\d+\s) ID
 -- SQLNESS REPLACE (\s[\-0-9T:\.]{15,}) DATETIME
@@ -110,9 +106,6 @@ SELECT * FROM INFORMATION_SCHEMA.TABLES ORDER BY TABLE_NAME, TABLE_TYPE;
 |greptime|information_schema|optimizer_trace|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y|
 |greptime|information_schema|parameters|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y|
 |greptime|information_schema|partitions|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y|
-|greptime|pg_catalog|pg_class|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y|
-|greptime|pg_catalog|pg_namespace|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y|
-|greptime|pg_catalog|pg_type|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y|
 |greptime|information_schema|profiling|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y|
 |greptime|information_schema|referential_constraints|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y|
 |greptime|information_schema|region_peers|LOCALTEMPORARY|ID|ID|ID|ID|ID|ID||ID|Fixed|ID|ID|ID|DATETIME|||utf8_bin|ID|||Y|
@@ -205,6 +198,5 @@ WHERE c.relkind IN ('v','')
  AND pg_catalog.pg_table_is_visible(c.oid)
 ORDER BY 1,2;

-++
-++
+Error: 4001(TableNotFound), Failed to plan SQL: Table not found: greptime.pg_catalog.pg_class
Author	SHA1	Message	Date
JohnsonLee	a8477e4142	fix: table resolving logic related to pg_catalog (#4580 ) * fix: table resolving logic related to pg_catalog refer to https://github.com/GreptimeTeam/greptimedb/issues/3560#issuecomment-2287794348 and #4543 * refactor: remove CatalogProtocol type * fix: sqlness * fix: forbid create database pg_catalog with mysql client * refactor: use QueryContext as arguments rather than Channel * refactor: pass None as default behaviour in information_schema * test: fix test	2024-09-09 00:47:59 +00:00
Yiran	b950e705f5	chore: update the document link in README.md (#4690 )	2024-09-07 15:27:32 +00:00
Ruihang Xia	d2d62e0c6f	fix: unconditional statistics (#4694 ) * fix: unconditional statistics Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add more sqlness case Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-09-07 04:28:11 +00:00
localhost	5d9f8a3be7	feat: add test pipeline api (#4667 ) * chore: add test pipeline api * chore: add test for test pipeline api * chore: fix taplo check * chore: change pipeline dryrun api path * chore: add more info for pipeline dryrun api	2024-09-06 08:36:49 +00:00
jeremyhi	e88465840d	feat: add extension field to HeartbeatRequest (#4688 ) * feat: add extension field to HeartbeatRequest * chore: extension to extensions * chore: upgrade proto	2024-09-06 08:29:20 +00:00
localhost	67d95d2088	refactor!: add processor builder and transform buidler (#4571 ) * chore: add processor builder and transform buidler * chore: in process * chore: intermediate state from hashmap to vector in pipeline * chore: remove useless code and rename some struct * chore: fix typos * chore: format code * chore: add error handling and optimize code readability * chore: fix typos * chore: remove useless code * chore: add some doc * chore: fix by pr commit * chore: remove useless code and change struct name * chore: modify the location of the find_key_index function.	2024-09-06 07:51:08 +00:00