From 89a3da8a3a1e639dc9534fb5ad5e9622103d9199 Mon Sep 17 00:00:00 2001
From: liyang <ly18846162402@163.com>
Date: Fri, 6 Sep 2024 00:00:53 +0800
Subject: [PATCH 1/8] chore(dockerfile): remove mysql and postgresql clients in
 greptimedb image (#4685)

---
 docker/ci/ubuntu/Dockerfile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docker/ci/ubuntu/Dockerfile b/docker/ci/ubuntu/Dockerfile
index cc3bed6f25..580b73e56f 100644
--- a/docker/ci/ubuntu/Dockerfile
+++ b/docker/ci/ubuntu/Dockerfile
@@ -11,9 +11,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
     python3.10 \
     python3.10-dev \
     python3-pip \
-    curl \
-    mysql-client \
-    postgresql-client
+    curl
 
 COPY $DOCKER_BUILD_ROOT/docker/python/requirements.txt /etc/greptime/requirements.txt
 

From 114772ba8756f44cd9980a984dbe77285f57f30c Mon Sep 17 00:00:00 2001
From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com>
Date: Fri, 6 Sep 2024 10:31:41 +0800
Subject: [PATCH 2/8] chore: bump version v0.9.3 (#4684)

---
 Cargo.lock | 136 ++++++++++++++++++++++++++---------------------------
 Cargo.toml |   2 +-
 2 files changed, 69 insertions(+), 69 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4d1aff23c6..a433b41841 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -214,7 +214,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
 
 [[package]]
 name = "api"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "common-base",
  "common-decimal",
@@ -762,7 +762,7 @@ dependencies = [
 
 [[package]]
 name = "auth"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "async-trait",
@@ -1286,7 +1286,7 @@ dependencies = [
 
 [[package]]
 name = "cache"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "catalog",
  "common-error",
@@ -1294,7 +1294,7 @@ dependencies = [
  "common-meta",
  "moka",
  "snafu 0.8.4",
- "substrait 0.9.2",
+ "substrait 0.9.3",
 ]
 
 [[package]]
@@ -1321,7 +1321,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "catalog"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "arrow",
@@ -1647,7 +1647,7 @@ checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70"
 
 [[package]]
 name = "client"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "arc-swap",
@@ -1677,7 +1677,7 @@ dependencies = [
  "serde_json",
  "snafu 0.8.4",
  "substrait 0.37.3",
- "substrait 0.9.2",
+ "substrait 0.9.3",
  "tokio",
  "tokio-stream",
  "tonic 0.11.0",
@@ -1707,7 +1707,7 @@ dependencies = [
 
 [[package]]
 name = "cmd"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "async-trait",
  "auth",
@@ -1763,7 +1763,7 @@ dependencies = [
  "session",
  "snafu 0.8.4",
  "store-api",
- "substrait 0.9.2",
+ "substrait 0.9.3",
  "table",
  "temp-env",
  "tempfile",
@@ -1809,7 +1809,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
 
 [[package]]
 name = "common-base"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "anymap",
  "bitvec",
@@ -1825,7 +1825,7 @@ dependencies = [
 
 [[package]]
 name = "common-catalog"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "chrono",
  "common-error",
@@ -1836,7 +1836,7 @@ dependencies = [
 
 [[package]]
 name = "common-config"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "common-base",
  "common-error",
@@ -1859,7 +1859,7 @@ dependencies = [
 
 [[package]]
 name = "common-datasource"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1896,7 +1896,7 @@ dependencies = [
 
 [[package]]
 name = "common-decimal"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "bigdecimal",
  "common-error",
@@ -1909,7 +1909,7 @@ dependencies = [
 
 [[package]]
 name = "common-error"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "snafu 0.8.4",
  "strum 0.25.0",
@@ -1918,7 +1918,7 @@ dependencies = [
 
 [[package]]
 name = "common-frontend"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "async-trait",
@@ -1933,7 +1933,7 @@ dependencies = [
 
 [[package]]
 name = "common-function"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "arc-swap",
@@ -1970,7 +1970,7 @@ dependencies = [
 
 [[package]]
 name = "common-greptimedb-telemetry"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "async-trait",
  "common-runtime",
@@ -1987,7 +1987,7 @@ dependencies = [
 
 [[package]]
 name = "common-grpc"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "arrow-flight",
@@ -2013,7 +2013,7 @@ dependencies = [
 
 [[package]]
 name = "common-grpc-expr"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "common-base",
@@ -2031,7 +2031,7 @@ dependencies = [
 
 [[package]]
 name = "common-macro"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "arc-swap",
  "common-query",
@@ -2045,7 +2045,7 @@ dependencies = [
 
 [[package]]
 name = "common-mem-prof"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "common-error",
  "common-macro",
@@ -2058,7 +2058,7 @@ dependencies = [
 
 [[package]]
 name = "common-meta"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "anymap2",
  "api",
@@ -2114,11 +2114,11 @@ dependencies = [
 
 [[package]]
 name = "common-plugins"
-version = "0.9.2"
+version = "0.9.3"
 
 [[package]]
 name = "common-procedure"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "async-stream",
  "async-trait",
@@ -2144,7 +2144,7 @@ dependencies = [
 
 [[package]]
 name = "common-procedure-test"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "async-trait",
  "common-procedure",
@@ -2152,7 +2152,7 @@ dependencies = [
 
 [[package]]
 name = "common-query"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "async-trait",
@@ -2178,7 +2178,7 @@ dependencies = [
 
 [[package]]
 name = "common-recordbatch"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "arc-swap",
  "common-error",
@@ -2197,7 +2197,7 @@ dependencies = [
 
 [[package]]
 name = "common-runtime"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "async-trait",
  "common-error",
@@ -2219,7 +2219,7 @@ dependencies = [
 
 [[package]]
 name = "common-telemetry"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "atty",
  "backtrace",
@@ -2246,7 +2246,7 @@ dependencies = [
 
 [[package]]
 name = "common-test-util"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "client",
  "common-query",
@@ -2258,7 +2258,7 @@ dependencies = [
 
 [[package]]
 name = "common-time"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "arrow",
  "chrono",
@@ -2274,7 +2274,7 @@ dependencies = [
 
 [[package]]
 name = "common-version"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "build-data",
  "const_format",
@@ -2285,7 +2285,7 @@ dependencies = [
 
 [[package]]
 name = "common-wal"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "common-base",
  "common-error",
@@ -3093,7 +3093,7 @@ dependencies = [
 
 [[package]]
 name = "datanode"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "arrow-flight",
@@ -3142,7 +3142,7 @@ dependencies = [
  "session",
  "snafu 0.8.4",
  "store-api",
- "substrait 0.9.2",
+ "substrait 0.9.3",
  "table",
  "tokio",
  "toml 0.8.14",
@@ -3151,7 +3151,7 @@ dependencies = [
 
 [[package]]
 name = "datatypes"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -3721,7 +3721,7 @@ dependencies = [
 
 [[package]]
 name = "file-engine"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "async-trait",
@@ -3823,7 +3823,7 @@ checksum = "28a80e3145d8ad11ba0995949bbcf48b9df2be62772b3d351ef017dff6ecb853"
 
 [[package]]
 name = "flow"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "arrow",
@@ -3880,7 +3880,7 @@ dependencies = [
  "snafu 0.8.4",
  "store-api",
  "strum 0.25.0",
- "substrait 0.9.2",
+ "substrait 0.9.3",
  "table",
  "tokio",
  "tonic 0.11.0",
@@ -3927,7 +3927,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
 
 [[package]]
 name = "frontend"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "arc-swap",
@@ -5078,7 +5078,7 @@ dependencies = [
 
 [[package]]
 name = "index"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "async-trait",
  "asynchronous-codec",
@@ -5858,7 +5858,7 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
 
 [[package]]
 name = "log-store"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "async-stream",
  "async-trait",
@@ -6170,7 +6170,7 @@ dependencies = [
 
 [[package]]
 name = "meta-client"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "async-trait",
@@ -6196,7 +6196,7 @@ dependencies = [
 
 [[package]]
 name = "meta-srv"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "async-trait",
@@ -6274,7 +6274,7 @@ dependencies = [
 
 [[package]]
 name = "metric-engine"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "aquamarine",
@@ -6365,7 +6365,7 @@ dependencies = [
 
 [[package]]
 name = "mito2"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "aquamarine",
@@ -7012,7 +7012,7 @@ dependencies = [
 
 [[package]]
 name = "object-store"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "anyhow",
  "bytes",
@@ -7259,7 +7259,7 @@ dependencies = [
 
 [[package]]
 name = "operator"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "async-trait",
@@ -7304,7 +7304,7 @@ dependencies = [
  "sql",
  "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
  "store-api",
- "substrait 0.9.2",
+ "substrait 0.9.3",
  "table",
  "tokio",
  "tokio-util",
@@ -7554,7 +7554,7 @@ dependencies = [
 
 [[package]]
 name = "partition"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "async-trait",
@@ -7843,7 +7843,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
 [[package]]
 name = "pipeline"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "ahash 0.8.11",
  "api",
@@ -8004,7 +8004,7 @@ dependencies = [
 
 [[package]]
 name = "plugins"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "auth",
  "common-base",
@@ -8273,7 +8273,7 @@ dependencies = [
 
 [[package]]
 name = "promql"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "ahash 0.8.11",
  "async-trait",
@@ -8508,7 +8508,7 @@ dependencies = [
 
 [[package]]
 name = "puffin"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "async-compression 0.4.11",
  "async-trait",
@@ -8630,7 +8630,7 @@ dependencies = [
 
 [[package]]
 name = "query"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "ahash 0.8.11",
  "api",
@@ -8693,7 +8693,7 @@ dependencies = [
  "stats-cli",
  "store-api",
  "streaming-stats",
- "substrait 0.9.2",
+ "substrait 0.9.3",
  "table",
  "tokio",
  "tokio-stream",
@@ -10055,7 +10055,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
 [[package]]
 name = "script"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "arc-swap",
@@ -10349,7 +10349,7 @@ dependencies = [
 
 [[package]]
 name = "servers"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "aide",
  "api",
@@ -10455,7 +10455,7 @@ dependencies = [
 
 [[package]]
 name = "session"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "arc-swap",
@@ -10756,7 +10756,7 @@ dependencies = [
 
 [[package]]
 name = "sql"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "chrono",
@@ -10816,7 +10816,7 @@ dependencies = [
 
 [[package]]
 name = "sqlness-runner"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "async-trait",
  "clap 4.5.7",
@@ -11033,7 +11033,7 @@ dependencies = [
 
 [[package]]
 name = "store-api"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "aquamarine",
@@ -11202,7 +11202,7 @@ dependencies = [
 
 [[package]]
 name = "substrait"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "async-trait",
  "bytes",
@@ -11403,7 +11403,7 @@ dependencies = [
 
 [[package]]
 name = "table"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "async-trait",
@@ -11668,7 +11668,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
 
 [[package]]
 name = "tests-fuzz"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "arbitrary",
  "async-trait",
@@ -11710,7 +11710,7 @@ dependencies = [
 
 [[package]]
 name = "tests-integration"
-version = "0.9.2"
+version = "0.9.3"
 dependencies = [
  "api",
  "arrow-flight",
@@ -11770,7 +11770,7 @@ dependencies = [
  "sql",
  "sqlx",
  "store-api",
- "substrait 0.9.2",
+ "substrait 0.9.3",
  "table",
  "tempfile",
  "time",
diff --git a/Cargo.toml b/Cargo.toml
index b596558713..e4a04c1f47 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -64,7 +64,7 @@ members = [
 resolver = "2"
 
 [workspace.package]
-version = "0.9.2"
+version = "0.9.3"
 edition = "2021"
 license = "Apache-2.0"
 

From 506dc20765f892b3d7ad77af841f6bbf7c1a3892 Mon Sep 17 00:00:00 2001
From: Yingwen <realevenyag@gmail.com>
Date: Fri, 6 Sep 2024 12:13:23 +0800
Subject: [PATCH 3/8] fix: last non null iter not init (#4687)

---
 src/mito2/src/read/dedup.rs | 61 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

diff --git a/src/mito2/src/read/dedup.rs b/src/mito2/src/read/dedup.rs
index 52ff05fd12..ddc96049e7 100644
--- a/src/mito2/src/read/dedup.rs
+++ b/src/mito2/src/read/dedup.rs
@@ -258,13 +258,18 @@ impl LastFieldsBuilder {
     fn maybe_init(&mut self, batch: &Batch) {
         debug_assert!(!batch.is_empty());
 
-        if self.initialized || batch.fields().is_empty() {
+        if self.initialized {
             // Already initialized or no fields to merge.
             return;
         }
 
         self.initialized = true;
 
+        if batch.fields().is_empty() {
+            // No fields to merge.
+            return;
+        }
+
         let last_idx = batch.num_rows() - 1;
         let fields = batch.fields();
         // Safety: The last_idx is valid.
@@ -1165,4 +1170,58 @@ mod tests {
         ];
         assert_eq!(&expect, &actual[..]);
     }
+
+    /// Returns a new [Batch] without fields.
+    fn new_batch_no_fields(
+        primary_key: &[u8],
+        timestamps: &[i64],
+        sequences: &[u64],
+        op_types: &[OpType],
+    ) -> Batch {
+        let mut builder = BatchBuilder::new(primary_key.to_vec());
+        builder
+            .timestamps_array(Arc::new(TimestampMillisecondArray::from_iter_values(
+                timestamps.iter().copied(),
+            )))
+            .unwrap()
+            .sequences_array(Arc::new(UInt64Array::from_iter_values(
+                sequences.iter().copied(),
+            )))
+            .unwrap()
+            .op_types_array(Arc::new(UInt8Array::from_iter_values(
+                op_types.iter().map(|v| *v as u8),
+            )))
+            .unwrap();
+        builder.build().unwrap()
+    }
+
+    #[test]
+    fn test_last_non_null_iter_no_batch() {
+        let input = [
+            new_batch_no_fields(
+                b"k1",
+                &[1, 1, 2],
+                &[13, 12, 13],
+                &[OpType::Put, OpType::Put, OpType::Put],
+            ),
+            new_batch_no_fields(b"k1", &[2, 3], &[12, 13], &[OpType::Put, OpType::Delete]),
+            new_batch_no_fields(
+                b"k2",
+                &[1, 1, 2],
+                &[13, 12, 13],
+                &[OpType::Put, OpType::Put, OpType::Put],
+            ),
+        ];
+        let iter = input.into_iter().map(Ok);
+        let iter = LastNonNullIter::new(iter);
+        let actual: Vec<_> = iter.map(|batch| batch.unwrap()).collect();
+        let expect = [
+            new_batch_no_fields(b"k1", &[1], &[13], &[OpType::Put]),
+            new_batch_no_fields(b"k1", &[2], &[13], &[OpType::Put]),
+            new_batch_no_fields(b"k1", &[3], &[13], &[OpType::Delete]),
+            new_batch_no_fields(b"k2", &[1], &[13], &[OpType::Put]),
+            new_batch_no_fields(b"k2", &[2], &[13], &[OpType::Put]),
+        ];
+        assert_eq!(&expect, &actual[..]);
+    }
 }

From 67d95d2088e76077e27dc5f522f8dd12fa0bc8e0 Mon Sep 17 00:00:00 2001
From: localhost <xpaomian@gmail.com>
Date: Fri, 6 Sep 2024 15:51:08 +0800
Subject: [PATCH 4/8] refactor!: add processor builder and transform buidler
 (#4571)

* chore: add processor builder and transform buidler

* chore: in process

* chore: intermediate state from hashmap to vector in pipeline

* chore: remove useless code and rename some struct

* chore: fix typos

* chore: format code

* chore: add error handling and optimize code readability

* chore: fix typos

* chore: remove useless code

* chore: add some doc

* chore: fix by pr commit

* chore: remove useless code and change struct name

* chore: modify the location of the find_key_index function.
---
 src/pipeline/benches/processor.rs             |  25 +-
 src/pipeline/src/etl.rs                       | 331 +++-----
 src/pipeline/src/etl/field.rs                 | 360 +++++----
 src/pipeline/src/etl/processor.rs             | 216 +++---
 src/pipeline/src/etl/processor/cmcd.rs        | 384 ++++++----
 src/pipeline/src/etl/processor/csv.rs         | 388 +++++-----
 src/pipeline/src/etl/processor/date.rs        | 279 ++++---
 src/pipeline/src/etl/processor/dissect.rs     | 704 ++++++++++--------
 src/pipeline/src/etl/processor/epoch.rs       | 136 ++--
 src/pipeline/src/etl/processor/gsub.rs        | 235 +++---
 src/pipeline/src/etl/processor/join.rs        | 174 +++--
 src/pipeline/src/etl/processor/letter.rs      | 174 ++---
 src/pipeline/src/etl/processor/regex.rs       | 544 +++++++++-----
 src/pipeline/src/etl/processor/timestamp.rs   | 171 +++--
 src/pipeline/src/etl/processor/urlencoding.rs | 173 +++--
 src/pipeline/src/etl/transform.rs             | 169 +++--
 .../src/etl/transform/transformer/greptime.rs | 162 ++--
 .../transform/transformer/greptime/coerce.rs  |  34 +-
 src/pipeline/tests/common.rs                  |  37 +-
 src/pipeline/tests/dissect.rs                 |  10 +-
 src/pipeline/tests/pipeline.rs                |  40 +-
 21 files changed, 2530 insertions(+), 2216 deletions(-)

diff --git a/src/pipeline/benches/processor.rs b/src/pipeline/benches/processor.rs
index 8ed0021a6e..281d8ce0ef 100644
--- a/src/pipeline/benches/processor.rs
+++ b/src/pipeline/benches/processor.rs
@@ -13,27 +13,13 @@
 // limitations under the License.
 
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use pipeline::{parse, Array, Content, GreptimeTransformer, Pipeline, Value as PipelineValue};
+use pipeline::{parse, Content, GreptimeTransformer, Pipeline};
 use serde_json::{Deserializer, Value};
 
-fn processor_map(
-    pipeline: &Pipeline<GreptimeTransformer>,
-    input_values: Vec<Value>,
-) -> impl IntoIterator<Item = greptime_proto::v1::Rows> {
-    let pipeline_data = input_values
-        .into_iter()
-        .map(|v| PipelineValue::try_from(v).unwrap())
-        .collect::<Vec<_>>();
-
-    pipeline.exec(PipelineValue::Array(Array {
-        values: pipeline_data,
-    }))
-}
-
 fn processor_mut(
     pipeline: &Pipeline<GreptimeTransformer>,
     input_values: Vec<Value>,
-) -> impl IntoIterator<Item = Vec<greptime_proto::v1::Row>> {
+) -> Result<Vec<greptime_proto::v1::Row>, String> {
     let mut payload = pipeline.init_intermediate_state();
     let mut result = Vec::with_capacity(input_values.len());
 
@@ -249,11 +235,10 @@ fn criterion_benchmark(c: &mut Criterion) {
     let pipeline = prepare_pipeline();
     let mut group = c.benchmark_group("pipeline");
     group.sample_size(50);
-    group.bench_function("processor map", |b| {
-        b.iter(|| processor_map(black_box(&pipeline), black_box(input_value.clone())))
-    });
     group.bench_function("processor mut", |b| {
-        b.iter(|| processor_mut(black_box(&pipeline), black_box(input_value.clone())))
+        b.iter(|| {
+            processor_mut(black_box(&pipeline), black_box(input_value.clone())).unwrap();
+        })
     });
     group.finish();
 }
diff --git a/src/pipeline/src/etl.rs b/src/pipeline/src/etl.rs
index b2c8802dd5..de4c544a01 100644
--- a/src/pipeline/src/etl.rs
+++ b/src/pipeline/src/etl.rs
@@ -19,92 +19,24 @@ pub mod processor;
 pub mod transform;
 pub mod value;
 
-use ahash::{HashMap, HashSet};
-use common_telemetry::{debug, warn};
+use ahash::HashSet;
+use common_telemetry::debug;
 use itertools::{merge, Itertools};
-use processor::Processor;
-use transform::{Transformer, Transforms};
-use value::{Map, Value};
+use processor::{Processor, ProcessorBuilder, Processors};
+use transform::{TransformBuilders, Transformer, Transforms};
+use value::Value;
 use yaml_rust::YamlLoader;
 
 const DESCRIPTION: &str = "description";
 const PROCESSORS: &str = "processors";
 const TRANSFORM: &str = "transform";
+const TRANSFORMS: &str = "transforms";
 
 pub enum Content {
     Json(String),
     Yaml(String),
 }
 
-/// set the index for the processor keys
-/// the index is the position of the key in the final intermediate keys
-fn set_processor_keys_index(
-    processors: &mut processor::Processors,
-    final_intermediate_keys: &Vec<String>,
-) -> Result<(), String> {
-    let final_intermediate_key_index = final_intermediate_keys
-        .iter()
-        .enumerate()
-        .map(|(i, k)| (k.as_str(), i))
-        .collect::<HashMap<_, _>>();
-    for processor in processors.iter_mut() {
-        for field in processor.fields_mut().iter_mut() {
-            let index = final_intermediate_key_index.get(field.input_field.name.as_str()).ok_or(format!(
-                    "input field {} is not found in intermediate keys: {final_intermediate_keys:?} when set processor keys index",
-                    field.input_field.name
-                ))?;
-            field.set_input_index(*index);
-            for (k, v) in field.output_fields_index_mapping.iter_mut() {
-                let index = final_intermediate_key_index.get(k.as_str());
-                match index {
-                    Some(index) => {
-                        *v = *index;
-                    }
-                    None => {
-                        warn!(
-                            "output field {k} is not found in intermediate keys: {final_intermediate_keys:?} when set processor keys index"
-                        );
-                    }
-                }
-            }
-        }
-    }
-    Ok(())
-}
-
-fn set_transform_keys_index(
-    transforms: &mut Transforms,
-    final_intermediate_keys: &[String],
-    output_keys: &[String],
-) -> Result<(), String> {
-    let final_intermediate_key_index = final_intermediate_keys
-        .iter()
-        .enumerate()
-        .map(|(i, k)| (k.as_str(), i))
-        .collect::<HashMap<_, _>>();
-    let output_key_index = output_keys
-        .iter()
-        .enumerate()
-        .map(|(i, k)| (k.as_str(), i))
-        .collect::<HashMap<_, _>>();
-    for transform in transforms.iter_mut() {
-        for field in transform.fields.iter_mut() {
-            let index = final_intermediate_key_index.get(field.input_field.name.as_str()).ok_or(format!(
-                    "input field {} is not found in intermediate keys: {final_intermediate_keys:?} when set transform keys index",
-                    field.input_field.name
-                ))?;
-            field.set_input_index(*index);
-            for (k, v) in field.output_fields_index_mapping.iter_mut() {
-                let index = output_key_index.get(k.as_str()).ok_or(format!(
-                    "output field {k} is not found in output keys: {final_intermediate_keys:?} when set transform keys index"
-                ))?;
-                *v = *index;
-            }
-        }
-    }
-    Ok(())
-}
-
 pub fn parse<T>(input: &Content) -> Result<Pipeline<T>, String>
 where
     T: Transformer,
@@ -117,24 +49,22 @@ where
 
             let description = doc[DESCRIPTION].as_str().map(|s| s.to_string());
 
-            let mut processors = if let Some(v) = doc[PROCESSORS].as_vec() {
+            let processor_builder_list = if let Some(v) = doc[PROCESSORS].as_vec() {
                 v.try_into()?
             } else {
-                processor::Processors::default()
+                processor::ProcessorBuilderList::default()
             };
 
-            let transforms = if let Some(v) = doc[TRANSFORM].as_vec() {
-                v.try_into()?
-            } else {
-                Transforms::default()
-            };
+            let transform_builders =
+                if let Some(v) = doc[TRANSFORMS].as_vec().or(doc[TRANSFORM].as_vec()) {
+                    v.try_into()?
+                } else {
+                    TransformBuilders::default()
+                };
 
-            let mut transformer = T::new(transforms)?;
-            let transforms = transformer.transforms_mut();
-
-            let processors_output_keys = processors.output_keys();
-            let processors_required_keys = processors.required_keys();
-            let processors_required_original_keys = processors.required_original_keys();
+            let processors_required_keys = &processor_builder_list.input_keys;
+            let processors_output_keys = &processor_builder_list.output_keys;
+            let processors_required_original_keys = &processor_builder_list.original_input_keys;
 
             debug!(
                 "processors_required_original_keys: {:?}",
@@ -143,7 +73,7 @@ where
             debug!("processors_required_keys: {:?}", processors_required_keys);
             debug!("processors_output_keys: {:?}", processors_output_keys);
 
-            let transforms_required_keys = transforms.required_keys();
+            let transforms_required_keys = &transform_builders.required_keys;
             let mut tr_keys = Vec::with_capacity(50);
             for key in transforms_required_keys.iter() {
                 if !processors_output_keys.contains(key)
@@ -183,9 +113,33 @@ where
 
             final_intermediate_keys.extend(intermediate_keys_exclude_original);
 
-            let output_keys = transforms.output_keys().clone();
-            set_processor_keys_index(&mut processors, &final_intermediate_keys)?;
-            set_transform_keys_index(transforms, &final_intermediate_keys, &output_keys)?;
+            let output_keys = transform_builders.output_keys.clone();
+
+            let processors_kind_list = processor_builder_list
+                .processor_builders
+                .into_iter()
+                .map(|builder| builder.build(&final_intermediate_keys))
+                .collect::<Result<Vec<_>, _>>()?;
+            let processors = Processors {
+                processors: processors_kind_list,
+                required_keys: processors_required_keys.clone(),
+                output_keys: processors_output_keys.clone(),
+                required_original_keys: processors_required_original_keys.clone(),
+            };
+
+            let transfor_list = transform_builders
+                .builders
+                .into_iter()
+                .map(|builder| builder.build(&final_intermediate_keys, &output_keys))
+                .collect::<Result<Vec<_>, String>>()?;
+
+            let transformers = Transforms {
+                transforms: transfor_list,
+                required_keys: transforms_required_keys.clone(),
+                output_keys: output_keys.clone(),
+            };
+
+            let transformer = T::new(transformers)?;
 
             Ok(Pipeline {
                 description,
@@ -238,38 +192,6 @@ impl<T> Pipeline<T>
 where
     T: Transformer,
 {
-    fn exec_map(&self, map: &mut Map) -> Result<(), String> {
-        let v = map;
-        for processor in self.processors.iter() {
-            processor.exec_map(v)?;
-        }
-        Ok(())
-    }
-
-    pub fn exec(&self, mut val: Value) -> Result<T::Output, String> {
-        let result = match val {
-            Value::Map(ref mut map) => {
-                self.exec_map(map)?;
-                val
-            }
-            Value::Array(arr) => arr
-                .values
-                .into_iter()
-                .map(|mut v| match v {
-                    Value::Map(ref mut map) => {
-                        self.exec_map(map)?;
-                        Ok(v)
-                    }
-                    _ => Err(format!("expected a map, but got {}", v)),
-                })
-                .collect::<Result<Vec<Value>, String>>()
-                .map(|values| Value::Array(value::Array { values }))?,
-            _ => return Err(format!("expected a map or array, but got {}", val)),
-        };
-
-        self.transformer.transform(result)
-    }
-
     pub fn exec_mut(&self, val: &mut Vec<Value>) -> Result<T::VecOutput, String> {
         for processor in self.processors.iter() {
             processor.exec_mut(val)?;
@@ -347,9 +269,24 @@ where
     }
 }
 
+pub(crate) fn find_key_index(
+    intermediate_keys: &[String],
+    key: &str,
+    kind: &str,
+) -> Result<usize, String> {
+    intermediate_keys
+        .iter()
+        .position(|k| k == key)
+        .ok_or(format!(
+            "{} processor.{} not found in intermediate keys",
+            kind, key
+        ))
+}
+
 #[cfg(test)]
 mod tests {
 
+    use api::v1::Rows;
     use greptime_proto::v1::value::ValueData;
     use greptime_proto::v1::{self, ColumnDataType, SemanticType};
 
@@ -359,96 +296,43 @@ mod tests {
 
     #[test]
     fn test_pipeline_prepare() {
-        {
-            let input_value_str = r#"
-            {
-                "my_field": "1,2",
-                "foo": "bar"
-            }
-        "#;
-            let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
-
-            let pipeline_yaml = r#"
----
-description: Pipeline for Apache Tomcat
+        let input_value_str = r#"
+                {
+                    "my_field": "1,2",
+                    "foo": "bar"
+                }
+            "#;
+        let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
 
+        let pipeline_yaml = r#"description: 'Pipeline for Apache Tomcat'
 processors:
   - csv:
-      field: my_field, my_field,field1, field2
-
+      field: my_field
+      target_fields: field1, field2
 transform:
   - field: field1
     type: uint32
   - field: field2
     type: uint32
 "#;
-            let pipeline: Pipeline<GreptimeTransformer> =
-                parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
-            let mut payload = pipeline.init_intermediate_state();
-            pipeline.prepare(input_value, &mut payload).unwrap();
-            assert_eq!(
-                &["greptime_timestamp", "my_field"].to_vec(),
-                pipeline.required_keys()
-            );
-            assert_eq!(
-                payload,
-                vec![
-                    Value::Null,
-                    Value::String("1,2".to_string()),
-                    Value::Null,
-                    Value::Null
-                ]
-            );
-            let result = pipeline.exec_mut(&mut payload).unwrap();
+        let pipeline: Pipeline<GreptimeTransformer> =
+            parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
+        let mut payload = pipeline.init_intermediate_state();
+        pipeline.prepare(input_value, &mut payload).unwrap();
+        assert_eq!(&["my_field"].to_vec(), pipeline.required_keys());
+        assert_eq!(
+            payload,
+            vec![Value::String("1,2".to_string()), Value::Null, Value::Null]
+        );
+        let result = pipeline.exec_mut(&mut payload).unwrap();
 
-            assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
-            assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
-            match &result.values[2].value_data {
-                Some(ValueData::TimestampNanosecondValue(v)) => {
-                    assert_ne!(*v, 0);
-                }
-                _ => panic!("expect null value"),
+        assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
+        assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
+        match &result.values[2].value_data {
+            Some(ValueData::TimestampNanosecondValue(v)) => {
+                assert_ne!(*v, 0);
             }
-        }
-        {
-            let input_value_str = r#"
-          {
-            "reqTimeSec": "1573840000.000"
-          }
-    "#;
-
-            let pipeline_yaml = r#"
----
-description: Pipeline for Demo Log
-
-processors:
-  - gsub:
-      field: reqTimeSec
-      pattern: "\\."
-      replacement: ""
-  - epoch:
-      field: reqTimeSec
-      resolution: millisecond
-      ignore_missing: true
-
-transform:
-  - field: reqTimeSec
-    type: epoch, millisecond
-    index: timestamp
-"#;
-            let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
-            let pipeline: Pipeline<GreptimeTransformer> =
-                parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
-            let mut payload = pipeline.init_intermediate_state();
-            pipeline.prepare(input_value, &mut payload).unwrap();
-            assert_eq!(&["reqTimeSec"].to_vec(), pipeline.required_keys());
-            assert_eq!(payload, vec![Value::String("1573840000.000".to_string())]);
-            let result = pipeline.exec_mut(&mut payload).unwrap();
-
-            assert_eq!(
-                result.values[0].value_data,
-                Some(ValueData::TimestampMillisecondValue(1573840000000))
-            );
+            _ => panic!("expect null value"),
         }
     }
 
@@ -541,21 +425,19 @@ transform:
     #[test]
     fn test_csv_pipeline() {
         let input_value_str = r#"
-            {
-                "my_field": "1,2",
-                "foo": "bar"
-            }
-        "#;
+                {
+                    "my_field": "1,2",
+                    "foo": "bar"
+                }
+            "#;
         let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
 
         let pipeline_yaml = r#"
----
 description: Pipeline for Apache Tomcat
-
 processors:
   - csv:
-      field: my_field,my_field, field1, field2
-
+      field: my_field
+      target_fields: field1, field2
 transform:
   - field: field1
     type: uint32
@@ -565,8 +447,22 @@ transform:
 
         let pipeline: Pipeline<GreptimeTransformer> =
             parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
-        let output = pipeline.exec(input_value.try_into().unwrap());
-        assert!(output.is_ok());
+        let mut payload = pipeline.init_intermediate_state();
+        pipeline.prepare(input_value, &mut payload).unwrap();
+        assert_eq!(&["my_field"].to_vec(), pipeline.required_keys());
+        assert_eq!(
+            payload,
+            vec![Value::String("1,2".to_string()), Value::Null, Value::Null]
+        );
+        let result = pipeline.exec_mut(&mut payload).unwrap();
+        assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
+        assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
+        match &result.values[2].value_data {
+            Some(ValueData::TimestampNanosecondValue(v)) => {
+                assert_ne!(*v, 0);
+            }
+            _ => panic!("expect null value"),
+        }
     }
 
     #[test]
@@ -596,7 +492,14 @@ transform:
 
         let pipeline: Pipeline<GreptimeTransformer> =
             parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
-        let output = pipeline.exec(input_value.try_into().unwrap()).unwrap();
+        let schema = pipeline.schemas().clone();
+        let mut result = pipeline.init_intermediate_state();
+        pipeline.prepare(input_value, &mut result).unwrap();
+        let row = pipeline.exec_mut(&mut result).unwrap();
+        let output = Rows {
+            schema,
+            rows: vec![row],
+        };
         let schemas = output.schema;
 
         assert_eq!(schemas.len(), 1);
diff --git a/src/pipeline/src/etl/field.rs b/src/pipeline/src/etl/field.rs
index 80d19c0056..ff2f1ee7b5 100644
--- a/src/pipeline/src/etl/field.rs
+++ b/src/pipeline/src/etl/field.rs
@@ -12,69 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::BTreeMap;
+use std::ops::Deref;
+use std::str::FromStr;
 
-use ahash::{HashSet, HashSetExt};
-use itertools::Itertools;
-
-#[derive(Debug, Default, Clone)]
-pub struct Fields(Vec<Field>);
-
-impl Fields {
-    pub(crate) fn new(fields: Vec<Field>) -> Result<Self, String> {
-        let ff = Fields(fields);
-        ff.check()
-    }
-
-    pub(crate) fn one(field: Field) -> Self {
-        Fields(vec![field])
-    }
-
-    pub(crate) fn get_target_fields(&self) -> Vec<&str> {
-        self.0.iter().map(|f| f.get_target_field()).collect()
-    }
-
-    fn check(self) -> Result<Self, String> {
-        if self.0.is_empty() {
-            return Err("fields must not be empty".to_string());
-        }
-
-        let mut set = HashSet::new();
-        for f in self.0.iter() {
-            if set.contains(&f.input_field.name) {
-                return Err(format!(
-                    "field name must be unique, but got duplicated: {}",
-                    f.input_field.name
-                ));
-            }
-            set.insert(&f.input_field.name);
-        }
-
-        Ok(self)
-    }
-}
-
-impl std::fmt::Display for Fields {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        let s = self.0.iter().map(|f| f.to_string()).join(";");
-        write!(f, "{s}")
-    }
-}
-
-impl std::ops::Deref for Fields {
-    type Target = Vec<Field>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.0
-    }
-}
-
-impl std::ops::DerefMut for Fields {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.0
-    }
-}
+use crate::etl::find_key_index;
 
+/// Information about the input field including the name and index in intermediate keys.
 #[derive(Debug, Default, Clone)]
 pub struct InputFieldInfo {
     pub(crate) name: String,
@@ -82,132 +25,202 @@ pub struct InputFieldInfo {
 }
 
 impl InputFieldInfo {
+    /// Create a new input field info with the given field name and index.
     pub(crate) fn new(field: impl Into<String>, index: usize) -> Self {
         InputFieldInfo {
             name: field.into(),
             index,
         }
     }
+}
 
-    pub(crate) fn name(field: impl Into<String>) -> Self {
-        InputFieldInfo {
-            name: field.into(),
-            index: 0,
+/// Information about a field that has one input and one output.
+#[derive(Debug, Default, Clone)]
+pub struct OneInputOneOutputField {
+    input: InputFieldInfo,
+    output: Option<(String, usize)>,
+}
+
+impl OneInputOneOutputField {
+    /// Create a new field with the given input and output.
+    pub(crate) fn new(input: InputFieldInfo, output: (String, usize)) -> Self {
+        OneInputOneOutputField {
+            input,
+            output: Some(output),
+        }
+    }
+
+    /// Build a new field with the given processor kind, intermediate keys, input field, and target field.
+    pub(crate) fn build(
+        processor_kind: &str,
+        intermediate_keys: &[String],
+        input_field: &str,
+        target_field: &str,
+    ) -> Result<Self, String> {
+        let input_index = find_key_index(intermediate_keys, input_field, processor_kind)?;
+
+        let input_field_info = InputFieldInfo::new(input_field, input_index);
+        let output_index = find_key_index(intermediate_keys, target_field, processor_kind)?;
+        Ok(OneInputOneOutputField::new(
+            input_field_info,
+            (target_field.to_string(), output_index),
+        ))
+    }
+
+    /// Get the input field information.
+    pub(crate) fn input(&self) -> &InputFieldInfo {
+        &self.input
+    }
+
+    /// Get the index of the input field.
+    pub(crate) fn input_index(&self) -> usize {
+        self.input.index
+    }
+
+    /// Get the name of the input field.
+    pub(crate) fn input_name(&self) -> &str {
+        &self.input.name
+    }
+
+    /// Get the index of the output field.
+    pub(crate) fn output_index(&self) -> usize {
+        *self.output().1
+    }
+
+    /// Get the name of the output field.
+    pub(crate) fn output_name(&self) -> &str {
+        self.output().0
+    }
+
+    /// Get the output field information.
+    pub(crate) fn output(&self) -> (&String, &usize) {
+        if let Some((name, index)) = &self.output {
+            (name, index)
+        } else {
+            (&self.input.name, &self.input.index)
         }
     }
 }
 
-/// Used to represent the input and output fields of a processor or transform.
+/// Information about a field that has one input and multiple outputs.
+#[derive(Debug, Default, Clone)]
+pub struct OneInputMultiOutputField {
+    input: InputFieldInfo,
+    /// Typically, processors that output multiple keys need to be distinguished by splicing the keys together.
+    prefix: Option<String>,
+}
+
+impl OneInputMultiOutputField {
+    /// Create a new field with the given input and prefix.
+    pub(crate) fn new(input: InputFieldInfo, prefix: Option<String>) -> Self {
+        OneInputMultiOutputField { input, prefix }
+    }
+
+    /// Get the input field information.
+    pub(crate) fn input(&self) -> &InputFieldInfo {
+        &self.input
+    }
+
+    /// Get the index of the input field.
+    pub(crate) fn input_index(&self) -> usize {
+        self.input.index
+    }
+
+    /// Get the name of the input field.
+    pub(crate) fn input_name(&self) -> &str {
+        &self.input.name
+    }
+
+    /// Get the prefix for the output fields.
+    pub(crate) fn target_prefix(&self) -> &str {
+        self.prefix.as_deref().unwrap_or(&self.input.name)
+    }
+}
+
+/// Raw processor-defined inputs and outputs
 #[derive(Debug, Default, Clone)]
 pub struct Field {
-    /// The input field name and index.
-    pub input_field: InputFieldInfo,
-
-    /// The output field name and index mapping.
-    pub output_fields_index_mapping: BTreeMap<String, usize>,
-
-    // rename
-    pub target_field: Option<String>,
-
-    // 1-to-many mapping
-    // processors:
-    //  - csv
-    pub target_fields: Option<Vec<String>>,
+    pub(crate) input_field: String,
+    pub(crate) target_field: Option<String>,
 }
 
-impl Field {
-    pub(crate) fn new(field: impl Into<String>) -> Self {
-        Field {
-            input_field: InputFieldInfo::name(field.into()),
-            output_fields_index_mapping: BTreeMap::new(),
-            target_field: None,
-            target_fields: None,
-        }
-    }
-
-    /// target column_name in processor or transform
-    /// if target_field is None, return input field name
-    pub(crate) fn get_target_field(&self) -> &str {
-        self.target_field
-            .as_deref()
-            .unwrap_or(&self.input_field.name)
-    }
-
-    /// input column_name in processor or transform
-    pub(crate) fn get_field_name(&self) -> &str {
-        &self.input_field.name
-    }
-
-    /// set input column index in processor or transform
-    pub(crate) fn set_input_index(&mut self, index: usize) {
-        self.input_field.index = index;
-    }
-
-    pub(crate) fn set_output_index(&mut self, key: &str, index: usize) {
-        if let Some(v) = self.output_fields_index_mapping.get_mut(key) {
-            *v = index;
-        }
-    }
-
-    pub(crate) fn insert_output_index(&mut self, key: String, index: usize) {
-        self.output_fields_index_mapping.insert(key, index);
-    }
-}
-
-impl std::str::FromStr for Field {
+impl FromStr for Field {
     type Err = String;
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
         let mut parts = s.split(',');
-        let field = parts.next().ok_or("field is missing")?.trim().to_string();
+        let input_field = parts
+            .next()
+            .ok_or("input field is missing")?
+            .trim()
+            .to_string();
+        let target_field = parts.next().map(|x| x.trim().to_string());
 
-        if field.is_empty() {
-            return Err("field is empty".to_string());
+        if input_field.is_empty() {
+            return Err("input field is empty".to_string());
         }
 
-        let renamed_field = match parts.next() {
-            Some(s) if !s.trim().is_empty() => Some(s.trim().to_string()),
-            _ => None,
-        };
-
-        // TODO(qtang): ???? what's this?
-        // weird design? field: <field>,<target_field>,<target_fields>,<target_fields>....
-        // and only use in csv processor
-        let fields: Vec<_> = parts
-            .map(|s| s.trim())
-            .filter(|s| !s.is_empty())
-            .map(|s| s.to_string())
-            .collect();
-        let target_fields = if fields.is_empty() {
-            None
-        } else {
-            Some(fields)
-        };
-
         Ok(Field {
-            input_field: InputFieldInfo::name(field),
-            output_fields_index_mapping: BTreeMap::new(),
-            target_field: renamed_field,
-            target_fields,
+            input_field,
+            target_field,
         })
     }
 }
 
-impl std::fmt::Display for Field {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match (&self.target_field, &self.target_fields) {
-            (Some(target_field), None) => write!(f, "{}, {target_field}", self.input_field.name),
-            (None, Some(target_fields)) => {
-                write!(
-                    f,
-                    "{}, {}",
-                    self.input_field.name,
-                    target_fields.iter().join(",")
-                )
-            }
-            _ => write!(f, "{}", self.input_field.name),
+impl Field {
+    /// Create a new field with the given input and target fields.
+    pub(crate) fn new(input_field: impl Into<String>, target_field: Option<String>) -> Self {
+        Field {
+            input_field: input_field.into(),
+            target_field,
         }
     }
+
+    /// Get the input field.
+    pub(crate) fn input_field(&self) -> &str {
+        &self.input_field
+    }
+
+    /// Get the target field.
+    pub(crate) fn target_field(&self) -> Option<&str> {
+        self.target_field.as_deref()
+    }
+
+    /// Get the target field or the input field if the target field is not set.
+    pub(crate) fn target_or_input_field(&self) -> &str {
+        self.target_field.as_deref().unwrap_or(&self.input_field)
+    }
+}
+
+/// A collection of fields.
+#[derive(Debug, Default, Clone)]
+pub struct Fields(Vec<Field>);
+
+impl Fields {
+    pub(crate) fn new(fields: Vec<Field>) -> Self {
+        Fields(fields)
+    }
+
+    pub(crate) fn one(field: Field) -> Self {
+        Fields(vec![field])
+    }
+}
+
+impl Deref for Fields {
+    type Target = Vec<Field>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl IntoIterator for Fields {
+    type Item = Field;
+    type IntoIter = std::vec::IntoIter<Field>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
 }
 
 #[cfg(test)]
@@ -227,35 +240,14 @@ mod tests {
 
         let cases = [
             // ("field", "field", None, None),
-            (
-                "field, target_field",
-                "field",
-                Some("target_field".into()),
-                None,
-            ),
-            (
-                "field, target_field1, target_field2, target_field3",
-                "field",
-                Some("target_field1".into()),
-                Some(vec!["target_field2".into(), "target_field3".into()]),
-            ),
-            (
-                "field,, target_field1, target_field2, target_field3",
-                "field",
-                None,
-                Some(vec![
-                    "target_field1".into(),
-                    "target_field2".into(),
-                    "target_field3".into(),
-                ]),
-            ),
+            ("field, target_field", "field", Some("target_field")),
+            ("field", "field", None),
         ];
 
-        for (s, field, target_field, target_fields) in cases.into_iter() {
+        for (s, field, target_field) in cases.into_iter() {
             let f: Field = s.parse().unwrap();
-            assert_eq!(f.get_field_name(), field, "{s}");
-            assert_eq!(f.target_field, target_field, "{s}");
-            assert_eq!(f.target_fields, target_fields, "{s}");
+            assert_eq!(f.input_field(), field, "{s}");
+            assert_eq!(f.target_field(), target_field, "{s}");
         }
     }
 }
diff --git a/src/pipeline/src/etl/processor.rs b/src/pipeline/src/etl/processor.rs
index 185b155c32..257cce4dfc 100644
--- a/src/pipeline/src/etl/processor.rs
+++ b/src/pipeline/src/etl/processor.rs
@@ -25,22 +25,22 @@ pub mod timestamp;
 pub mod urlencoding;
 
 use ahash::{HashSet, HashSetExt};
-use cmcd::CmcdProcessor;
-use csv::CsvProcessor;
-use date::DateProcessor;
-use dissect::DissectProcessor;
+use cmcd::{CmcdProcessor, CmcdProcessorBuilder};
+use csv::{CsvProcessor, CsvProcessorBuilder};
+use date::{DateProcessor, DateProcessorBuilder};
+use dissect::{DissectProcessor, DissectProcessorBuilder};
 use enum_dispatch::enum_dispatch;
-use epoch::EpochProcessor;
-use gsub::GsubProcessor;
+use epoch::{EpochProcessor, EpochProcessorBuilder};
+use gsub::{GsubProcessor, GsubProcessorBuilder};
 use itertools::Itertools;
-use join::JoinProcessor;
-use letter::LetterProcessor;
-use regex::RegexProcessor;
-use timestamp::TimestampProcessor;
-use urlencoding::UrlEncodingProcessor;
+use join::{JoinProcessor, JoinProcessorBuilder};
+use letter::{LetterProcessor, LetterProcessorBuilder};
+use regex::{RegexProcessor, RegexProcessorBuilder};
+use timestamp::{TimestampProcessor, TimestampProcessorBuilder};
+use urlencoding::{UrlEncodingProcessor, UrlEncodingProcessorBuilder};
 
-use crate::etl::field::{Field, Fields};
-use crate::etl::value::{Map, Value};
+use super::field::{Field, Fields};
+use crate::etl::value::Value;
 
 const FIELD_NAME: &str = "field";
 const FIELDS_NAME: &str = "fields";
@@ -49,6 +49,7 @@ const METHOD_NAME: &str = "method";
 const PATTERN_NAME: &str = "pattern";
 const PATTERNS_NAME: &str = "patterns";
 const SEPARATOR_NAME: &str = "separator";
+const TARGET_FIELDS_NAME: &str = "target_fields";
 
 // const IF_NAME: &str = "if";
 // const IGNORE_FAILURE_NAME: &str = "ignore_failure";
@@ -62,55 +63,14 @@ const SEPARATOR_NAME: &str = "separator";
 /// The output of a processor is a map of key-value pairs that will be merged into the document when you use exec_map method.
 #[enum_dispatch(ProcessorKind)]
 pub trait Processor: std::fmt::Debug + Send + Sync + 'static {
-    /// Get the processor's fields
-    /// fields is just the same processor for multiple keys. It is not the case that a processor has multiple inputs
-    fn fields(&self) -> &Fields;
-
-    /// Get the processor's fields mutably
-    fn fields_mut(&mut self) -> &mut Fields;
-
     /// Get the processor's kind
     fn kind(&self) -> &str;
 
     /// Whether to ignore missing
     fn ignore_missing(&self) -> bool;
 
-    /// processor all output keys
-    /// if a processor has multiple output keys, it should return all of them
-    fn output_keys(&self) -> HashSet<String>;
-
-    /// Execute the processor on a document
-    /// and return a map of key-value pairs
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String>;
-
     /// Execute the processor on a vector which be preprocessed by the pipeline
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String>;
-
-    /// Execute the processor on a map
-    /// and merge the output into the original map
-    fn exec_map(&self, map: &mut Map) -> Result<(), String> {
-        for ff @ Field {
-            input_field: field_info,
-            ..
-        } in self.fields().iter()
-        {
-            match map.get(&field_info.name) {
-                Some(v) => {
-                    map.extend(self.exec_field(v, ff)?);
-                }
-                None if self.ignore_missing() => {}
-                None => {
-                    return Err(format!(
-                        "{} processor: field '{}' is required but missing in {map}",
-                        self.kind(),
-                        field_info.name,
-                    ))
-                }
-            }
-        }
-
-        Ok(())
-    }
 }
 
 #[derive(Debug)]
@@ -129,6 +89,42 @@ pub enum ProcessorKind {
     Date(DateProcessor),
 }
 
+/// ProcessorBuilder trait defines the interface for all processor builders
+/// A processor builder is used to create a processor
+#[enum_dispatch(ProcessorBuilders)]
+pub trait ProcessorBuilder: std::fmt::Debug + Send + Sync + 'static {
+    /// Get the processor's output keys
+    fn output_keys(&self) -> HashSet<&str>;
+    /// Get the processor's input keys
+    fn input_keys(&self) -> HashSet<&str>;
+    /// Build the processor
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String>;
+}
+
+#[derive(Debug)]
+#[enum_dispatch]
+pub enum ProcessorBuilders {
+    Cmcd(CmcdProcessorBuilder),
+    Csv(CsvProcessorBuilder),
+    Dissect(DissectProcessorBuilder),
+    Gsub(GsubProcessorBuilder),
+    Join(JoinProcessorBuilder),
+    Letter(LetterProcessorBuilder),
+    Regex(RegexProcessorBuilder),
+    Timestamp(TimestampProcessorBuilder),
+    UrlEncoding(UrlEncodingProcessorBuilder),
+    Epoch(EpochProcessorBuilder),
+    Date(DateProcessorBuilder),
+}
+
+#[derive(Debug, Default)]
+pub struct ProcessorBuilderList {
+    pub(crate) processor_builders: Vec<ProcessorBuilders>,
+    pub(crate) input_keys: Vec<String>,
+    pub(crate) output_keys: Vec<String>,
+    pub(crate) original_input_keys: Vec<String>,
+}
+
 #[derive(Debug, Default)]
 pub struct Processors {
     /// A ordered list of processors
@@ -174,52 +170,63 @@ impl Processors {
     }
 }
 
-impl TryFrom<&Vec<yaml_rust::Yaml>> for Processors {
+impl TryFrom<&Vec<yaml_rust::Yaml>> for ProcessorBuilderList {
     type Error = String;
 
     fn try_from(vec: &Vec<yaml_rust::Yaml>) -> Result<Self, Self::Error> {
-        let mut processors = vec![];
+        let mut processors_builders = vec![];
         let mut all_output_keys = HashSet::with_capacity(50);
         let mut all_required_keys = HashSet::with_capacity(50);
         let mut all_required_original_keys = HashSet::with_capacity(50);
         for doc in vec {
             let processor = parse_processor(doc)?;
-
-            // get all required keys
-            let processor_required_keys: Vec<String> = processor
-                .fields()
-                .iter()
-                .map(|f| f.input_field.name.clone())
-                .collect();
-
-            for key in &processor_required_keys {
-                if !all_output_keys.contains(key) {
-                    all_required_original_keys.insert(key.clone());
-                }
-            }
-
-            all_required_keys.extend(processor_required_keys);
-
-            let processor_output_keys = processor.output_keys().into_iter();
-            all_output_keys.extend(processor_output_keys);
-
-            processors.push(processor);
+            processors_builders.push(processor);
         }
 
-        let all_required_keys = all_required_keys.into_iter().sorted().collect();
-        let all_output_keys = all_output_keys.into_iter().sorted().collect();
-        let all_required_original_keys = all_required_original_keys.into_iter().sorted().collect();
+        for processor in processors_builders.iter() {
+            {
+                // get all required keys
+                let processor_required_keys = processor.input_keys();
 
-        Ok(Processors {
-            processors,
-            required_keys: all_required_keys,
+                for key in &processor_required_keys {
+                    if !all_output_keys.contains(key) {
+                        all_required_original_keys.insert(*key);
+                    }
+                }
+
+                all_required_keys.extend(processor_required_keys);
+
+                let processor_output_keys = processor.output_keys().into_iter();
+                all_output_keys.extend(processor_output_keys);
+            }
+        }
+
+        let all_required_keys = all_required_keys
+            .into_iter()
+            .map(|x| x.to_string())
+            .sorted()
+            .collect();
+        let all_output_keys = all_output_keys
+            .into_iter()
+            .map(|x| x.to_string())
+            .sorted()
+            .collect();
+        let all_required_original_keys = all_required_original_keys
+            .into_iter()
+            .map(|x| x.to_string())
+            .sorted()
+            .collect();
+
+        Ok(ProcessorBuilderList {
+            processor_builders: processors_builders,
+            input_keys: all_required_keys,
             output_keys: all_output_keys,
-            required_original_keys: all_required_original_keys,
+            original_input_keys: all_required_original_keys,
         })
     }
 }
 
-fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind, String> {
+fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorBuilders, String> {
     let map = doc.as_hash().ok_or("processor must be a map".to_string())?;
 
     let key = map
@@ -238,20 +245,24 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind, String> {
         .ok_or("processor key must be a string".to_string())?;
 
     let processor = match str_key {
-        cmcd::PROCESSOR_CMCD => ProcessorKind::Cmcd(CmcdProcessor::try_from(value)?),
-        csv::PROCESSOR_CSV => ProcessorKind::Csv(CsvProcessor::try_from(value)?),
-        dissect::PROCESSOR_DISSECT => ProcessorKind::Dissect(DissectProcessor::try_from(value)?),
-        epoch::PROCESSOR_EPOCH => ProcessorKind::Epoch(EpochProcessor::try_from(value)?),
-        date::PROCESSOR_DATE => ProcessorKind::Date(DateProcessor::try_from(value)?),
-        gsub::PROCESSOR_GSUB => ProcessorKind::Gsub(GsubProcessor::try_from(value)?),
-        join::PROCESSOR_JOIN => ProcessorKind::Join(JoinProcessor::try_from(value)?),
-        letter::PROCESSOR_LETTER => ProcessorKind::Letter(LetterProcessor::try_from(value)?),
-        regex::PROCESSOR_REGEX => ProcessorKind::Regex(RegexProcessor::try_from(value)?),
+        cmcd::PROCESSOR_CMCD => ProcessorBuilders::Cmcd(CmcdProcessorBuilder::try_from(value)?),
+        csv::PROCESSOR_CSV => ProcessorBuilders::Csv(CsvProcessorBuilder::try_from(value)?),
+        dissect::PROCESSOR_DISSECT => {
+            ProcessorBuilders::Dissect(DissectProcessorBuilder::try_from(value)?)
+        }
+        epoch::PROCESSOR_EPOCH => ProcessorBuilders::Epoch(EpochProcessorBuilder::try_from(value)?),
+        date::PROCESSOR_DATE => ProcessorBuilders::Date(DateProcessorBuilder::try_from(value)?),
+        gsub::PROCESSOR_GSUB => ProcessorBuilders::Gsub(GsubProcessorBuilder::try_from(value)?),
+        join::PROCESSOR_JOIN => ProcessorBuilders::Join(JoinProcessorBuilder::try_from(value)?),
+        letter::PROCESSOR_LETTER => {
+            ProcessorBuilders::Letter(LetterProcessorBuilder::try_from(value)?)
+        }
+        regex::PROCESSOR_REGEX => ProcessorBuilders::Regex(RegexProcessorBuilder::try_from(value)?),
         timestamp::PROCESSOR_TIMESTAMP => {
-            ProcessorKind::Timestamp(TimestampProcessor::try_from(value)?)
+            ProcessorBuilders::Timestamp(TimestampProcessorBuilder::try_from(value)?)
         }
         urlencoding::PROCESSOR_URL_ENCODING => {
-            ProcessorKind::UrlEncoding(UrlEncodingProcessor::try_from(value)?)
+            ProcessorBuilders::UrlEncoding(UrlEncodingProcessorBuilder::try_from(value)?)
         }
         _ => return Err(format!("unsupported {} processor", str_key)),
     };
@@ -301,19 +312,10 @@ where
     })
 }
 
-pub(crate) fn yaml_fields(v: &yaml_rust::Yaml, field: &str) -> Result<Fields, String> {
-    let v = yaml_parse_strings(v, field)?;
-    Fields::new(v)
+pub(crate) fn yaml_new_fields(v: &yaml_rust::Yaml, field: &str) -> Result<Fields, String> {
+    yaml_parse_strings(v, field).map(Fields::new)
 }
 
-pub(crate) fn yaml_field(v: &yaml_rust::Yaml, field: &str) -> Result<Field, String> {
+pub(crate) fn yaml_new_field(v: &yaml_rust::Yaml, field: &str) -> Result<Field, String> {
     yaml_parse_string(v, field)
 }
-
-pub(crate) fn update_one_one_output_keys(fields: &mut Fields) {
-    for field in fields.iter_mut() {
-        field
-            .output_fields_index_mapping
-            .insert(field.get_target_field().to_string(), 0_usize);
-    }
-}
diff --git a/src/pipeline/src/etl/processor/cmcd.rs b/src/pipeline/src/etl/processor/cmcd.rs
index f4e6aa9d36..1556829d65 100644
--- a/src/pipeline/src/etl/processor/cmcd.rs
+++ b/src/pipeline/src/etl/processor/cmcd.rs
@@ -12,14 +12,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::collections::BTreeMap;
+
 use ahash::HashSet;
 use urlencoding::decode;
 
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Field, Fields, InputFieldInfo, OneInputMultiOutputField};
+use crate::etl::find_key_index;
 use crate::etl::processor::{
-    yaml_bool, yaml_field, yaml_fields, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, Processor, ProcessorBuilder, ProcessorKind,
+    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;
 
 pub(crate) const PROCESSOR_CMCD: &str = "cmcd";
 
@@ -63,6 +67,178 @@ const CMCD_KEYS: [&str; 18] = [
     CMCD_KEY_V,
 ];
 
+/// CmcdProcessorBuilder is a builder for CmcdProcessor
+/// parse from raw yaml
+#[derive(Debug, Default)]
+pub struct CmcdProcessorBuilder {
+    fields: Fields,
+    output_keys: HashSet<String>,
+    ignore_missing: bool,
+}
+
+impl CmcdProcessorBuilder {
+    /// build_cmcd_outputs build cmcd output info
+    /// generate index and function for each output
+    pub(super) fn build_cmcd_outputs(
+        field: &Field,
+        intermediate_keys: &[String],
+    ) -> Result<(BTreeMap<String, usize>, Vec<CmcdOutputInfo>), String> {
+        let mut output_index = BTreeMap::new();
+        let mut cmcd_field_outputs = Vec::with_capacity(CMCD_KEYS.len());
+        for cmcd in CMCD_KEYS {
+            let final_key = generate_key(field.target_or_input_field(), cmcd);
+            let index = find_key_index(intermediate_keys, &final_key, "cmcd")?;
+            output_index.insert(final_key.clone(), index);
+            match cmcd {
+                CMCD_KEY_BS | CMCD_KEY_SU => {
+                    let output_info = CmcdOutputInfo::new(final_key, cmcd, index, bs_su);
+                    cmcd_field_outputs.push(output_info);
+                }
+                CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP
+                | CMCD_KEY_RTP | CMCD_KEY_TB => {
+                    let output_info = CmcdOutputInfo::new(final_key, cmcd, index, br_tb);
+                    cmcd_field_outputs.push(output_info);
+                }
+                CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID
+                | CMCD_KEY_ST | CMCD_KEY_V => {
+                    let output_info = CmcdOutputInfo::new(final_key, cmcd, index, cid_v);
+                    cmcd_field_outputs.push(output_info);
+                }
+                CMCD_KEY_NOR => {
+                    let output_info = CmcdOutputInfo::new(final_key, cmcd, index, nor);
+                    cmcd_field_outputs.push(output_info);
+                }
+                CMCD_KEY_PR => {
+                    let output_info = CmcdOutputInfo::new(final_key, cmcd, index, pr);
+                    cmcd_field_outputs.push(output_info);
+                }
+                _ => {}
+            }
+        }
+        Ok((output_index, cmcd_field_outputs))
+    }
+
+    /// build CmcdProcessor from CmcdProcessorBuilder
+    pub fn build(self, intermediate_keys: &[String]) -> Result<CmcdProcessor, String> {
+        let mut real_fields = vec![];
+        let mut cmcd_outputs = Vec::with_capacity(CMCD_KEYS.len());
+        for field in self.fields.into_iter() {
+            let input_index = find_key_index(intermediate_keys, field.input_field(), "cmcd")?;
+
+            let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
+
+            let (_, cmcd_field_outputs) = Self::build_cmcd_outputs(&field, intermediate_keys)?;
+
+            cmcd_outputs.push(cmcd_field_outputs);
+
+            let real_field = OneInputMultiOutputField::new(input_field_info, field.target_field);
+            real_fields.push(real_field);
+        }
+        Ok(CmcdProcessor {
+            fields: real_fields,
+            cmcd_outputs,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+impl ProcessorBuilder for CmcdProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.output_keys.iter().map(|s| s.as_str()).collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Cmcd)
+    }
+}
+
+fn generate_key(prefix: &str, key: &str) -> String {
+    format!("{}_{}", prefix, key)
+}
+
+/// CmcdOutputInfo is a struct to store output info
+#[derive(Debug)]
+pub(super) struct CmcdOutputInfo {
+    /// {input_field}_{cmcd_key}
+    final_key: String,
+    /// cmcd key
+    key: &'static str,
+    /// index in intermediate_keys
+    index: usize,
+    /// function to resolve value
+    f: fn(&str, &str, Option<&str>) -> Result<Value, String>,
+}
+
+impl CmcdOutputInfo {
+    fn new(
+        final_key: String,
+        key: &'static str,
+        index: usize,
+        f: fn(&str, &str, Option<&str>) -> Result<Value, String>,
+    ) -> Self {
+        Self {
+            final_key,
+            key,
+            index,
+            f,
+        }
+    }
+}
+
+impl Default for CmcdOutputInfo {
+    fn default() -> Self {
+        Self {
+            final_key: String::default(),
+            key: "",
+            index: 0,
+            f: |_, _, _| Ok(Value::Null),
+        }
+    }
+}
+
+/// function to resolve CMCD_KEY_BS | CMCD_KEY_SU
+fn bs_su(_: &str, _: &str, _: Option<&str>) -> Result<Value, String> {
+    Ok(Value::Boolean(true))
+}
+
+/// function to resolve CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP | CMCD_KEY_RTP | CMCD_KEY_TB
+fn br_tb(s: &str, k: &str, v: Option<&str>) -> Result<Value, String> {
+    let v = v.ok_or(format!("{k} missing value in {s}"))?;
+    let val: i64 = v
+        .parse()
+        .map_err(|_| format!("failed to parse {v} as i64"))?;
+    Ok(Value::Int64(val))
+}
+
+/// function to resolve CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID | CMCD_KEY_V
+fn cid_v(s: &str, k: &str, v: Option<&str>) -> Result<Value, String> {
+    let v = v.ok_or(format!("{k} missing value in {s}"))?;
+    Ok(Value::String(v.to_string()))
+}
+
+/// function to resolve CMCD_KEY_NOR
+fn nor(s: &str, k: &str, v: Option<&str>) -> Result<Value, String> {
+    let v = v.ok_or(format!("{k} missing value in {s}"))?;
+    let val = match decode(v) {
+        Ok(val) => val.to_string(),
+        Err(_) => v.to_string(),
+    };
+    Ok(Value::String(val))
+}
+
+/// function to resolve CMCD_KEY_PR
+fn pr(s: &str, k: &str, v: Option<&str>) -> Result<Value, String> {
+    let v = v.ok_or(format!("{k} missing value in {s}"))?;
+    let val: f64 = v
+        .parse()
+        .map_err(|_| format!("failed to parse {v} as f64"))?;
+    Ok(Value::Float64(val))
+}
+
 /// Common Media Client Data Specification:
 /// https://cdn.cta.tech/cta/media/media/resources/standards/pdfs/cta-5004-final.pdf
 ///
@@ -100,98 +276,43 @@ const CMCD_KEYS: [&str; 18] = [
 /// 12. Transport Layer Security SHOULD be used to protect all transmission of CMCD data.
 #[derive(Debug, Default)]
 pub struct CmcdProcessor {
-    fields: Fields,
+    fields: Vec<OneInputMultiOutputField>,
+    cmcd_outputs: Vec<Vec<CmcdOutputInfo>>,
 
     ignore_missing: bool,
 }
 
 impl CmcdProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        Self::update_output_keys(&mut fields);
-        self.fields = fields;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
     fn generate_key(prefix: &str, key: &str) -> String {
         format!("{}_{}", prefix, key)
     }
 
-    fn parse(prefix: &str, s: &str) -> Result<Map, String> {
-        let mut map = Map::default();
+    fn parse(&self, field_index: usize, s: &str) -> Result<Vec<(usize, Value)>, String> {
         let parts = s.split(',');
+        let mut result = Vec::new();
         for part in parts {
             let mut kv = part.split('=');
             let k = kv.next().ok_or(format!("{part} missing key in {s}"))?;
             let v = kv.next();
 
-            let key = Self::generate_key(prefix, k);
-            match k {
-                CMCD_KEY_BS | CMCD_KEY_SU => {
-                    map.insert(key, Value::Boolean(true));
+            for cmcd_key in self.cmcd_outputs[field_index].iter() {
+                if cmcd_key.key == k {
+                    let val = (cmcd_key.f)(s, k, v)?;
+                    result.push((cmcd_key.index, val));
                 }
-                CMCD_KEY_BR | CMCD_KEY_BL | CMCD_KEY_D | CMCD_KEY_DL | CMCD_KEY_MTP
-                | CMCD_KEY_RTP | CMCD_KEY_TB => {
-                    let v = v.ok_or(format!("{k} missing value in {s}"))?;
-                    let val: i64 = v
-                        .parse()
-                        .map_err(|_| format!("failed to parse {v} as i64"))?;
-                    map.insert(key, Value::Int64(val));
-                }
-                CMCD_KEY_CID | CMCD_KEY_NRR | CMCD_KEY_OT | CMCD_KEY_SF | CMCD_KEY_SID
-                | CMCD_KEY_ST | CMCD_KEY_V => {
-                    let v = v.ok_or(format!("{k} missing value in {s}"))?;
-                    map.insert(key, Value::String(v.to_string()));
-                }
-                CMCD_KEY_NOR => {
-                    let v = v.ok_or(format!("{k} missing value in {s}"))?;
-                    let val = match decode(v) {
-                        Ok(val) => val.to_string(),
-                        Err(_) => v.to_string(),
-                    };
-                    map.insert(key, Value::String(val));
-                }
-                CMCD_KEY_PR => {
-                    let v = v.ok_or(format!("{k} missing value in {s}"))?;
-                    let val: f64 = v
-                        .parse()
-                        .map_err(|_| format!("failed to parse {v} as f64"))?;
-                    map.insert(key, Value::Float64(val));
-                }
-                _ => match v {
-                    Some(v) => map.insert(key, Value::String(v.to_string())),
-                    None => map.insert(k, Value::Boolean(true)),
-                },
             }
         }
 
-        Ok(map)
-    }
-
-    fn process_field(&self, val: &str, field: &Field) -> Result<Map, String> {
-        let prefix = field.get_target_field();
-
-        Self::parse(prefix, val)
-    }
-
-    fn update_output_keys(fields: &mut Fields) {
-        for field in fields.iter_mut() {
-            for key in CMCD_KEYS.iter() {
-                field
-                    .output_fields_index_mapping
-                    .insert(Self::generate_key(field.get_target_field(), key), 0);
-            }
-        }
+        Ok(result)
     }
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for CmcdProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for CmcdProcessorBuilder {
     type Error = String;
 
     fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = CmcdProcessor::default();
+        let mut fields = Fields::default();
+        let mut ignore_missing = false;
 
         for (k, v) in value.iter() {
             let key = k
@@ -199,25 +320,40 @@ impl TryFrom<&yaml_rust::yaml::Hash> for CmcdProcessor {
                 .ok_or(format!("key must be a string, but got {k:?}"))?;
             match key {
                 FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                 }
                 FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                 }
 
                 IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                 }
 
                 _ => {}
             }
         }
 
-        Ok(processor)
+        let output_keys = fields
+            .iter()
+            .flat_map(|f| {
+                CMCD_KEYS
+                    .iter()
+                    .map(|cmcd_key| generate_key(f.target_or_input_field(), cmcd_key))
+            })
+            .collect();
+
+        let builder = CmcdProcessorBuilder {
+            fields,
+            output_keys,
+            ignore_missing,
+        };
+
+        Ok(builder)
     }
 }
 
-impl crate::etl::processor::Processor for CmcdProcessor {
+impl Processor for CmcdProcessor {
     fn kind(&self) -> &str {
         PROCESSOR_CMCD
     }
@@ -226,51 +362,14 @@ impl crate::etl::processor::Processor for CmcdProcessor {
         self.ignore_missing
     }
 
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|field| {
-                field
-                    .target_field
-                    .clone()
-                    .unwrap_or_else(|| field.get_field_name().to_string())
-            })
-            .flat_map(|keys| {
-                CMCD_KEYS
-                    .iter()
-                    .map(move |key| format!("{}_{}", keys, *key))
-            })
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => self.process_field(val, field),
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
-        for field in self.fields.iter() {
-            match val.get(field.input_field.index) {
+        for (field_index, field) in self.fields.iter().enumerate() {
+            let field_value_index = field.input_index();
+            match val.get(field_value_index) {
                 Some(Value::String(v)) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let map = self.process_field(v, field)?;
-                    for (k, v) in map.values.into_iter() {
-                        if let Some(index) = field.output_fields_index_mapping.get(&k) {
-                            val[*index] = v;
-                        }
+                    let result_list = self.parse(field_index, v)?;
+                    for (output_index, v) in result_list {
+                        val[output_index] = v;
                     }
                 }
                 Some(Value::Null) | None => {
@@ -278,7 +377,7 @@ impl crate::etl::processor::Processor for CmcdProcessor {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                         ));
                     }
                 }
@@ -299,7 +398,8 @@ mod tests {
     use ahash::HashMap;
     use urlencoding::decode;
 
-    use super::CmcdProcessor;
+    use super::{CmcdProcessorBuilder, CMCD_KEYS};
+    use crate::etl::field::{Field, Fields};
     use crate::etl::value::{Map, Value};
 
     #[test]
@@ -329,6 +429,7 @@ mod tests {
                 ],
             ),
             (
+                // we not resolve `b` key
                 "b%2Crtp%3D15000%2Csid%3D%226e2fb550-c457-11e9-bb97-0800200c9a66%22",
                 vec![
                     (
@@ -336,7 +437,6 @@ mod tests {
                         Value::String("\"6e2fb550-c457-11e9-bb97-0800200c9a66\"".into()),
                     ),
                     ("prefix_rtp", Value::Int64(15000)),
-                    ("b", Value::Boolean(true)),
                 ],
             ),
             (
@@ -347,16 +447,17 @@ mod tests {
                 ],
             ),
             (
+                // we not resolve custom key
                 "d%3D4004%2Ccom.example-myNumericKey%3D500%2Ccom.examplemyStringKey%3D%22myStringValue%22",
                 vec![
-                    (
-                        "prefix_com.example-myNumericKey",
-                        Value::String("500".into()),
-                    ),
-                    (
-                        "prefix_com.examplemyStringKey",
-                        Value::String("\"myStringValue\"".into()),
-                    ),
+                    // (
+                    //     "prefix_com.example-myNumericKey",
+                    //     Value::String("500".into()),
+                    // ),
+                    // (
+                    //     "prefix_com.examplemyStringKey",
+                    //     Value::String("\"myStringValue\"".into()),
+                    // ),
                     ("prefix_d", Value::Int64(4004)),
                 ],
             ),
@@ -431,6 +532,24 @@ mod tests {
             ),
         ];
 
+        let field = Field::new("prefix", None);
+
+        let output_keys = CMCD_KEYS
+            .iter()
+            .map(|k| format!("prefix_{}", k))
+            .collect::<Vec<String>>();
+
+        let mut intermediate_keys = vec!["prefix".to_string()];
+        intermediate_keys.append(&mut (output_keys.clone()));
+
+        let builder = CmcdProcessorBuilder {
+            fields: Fields::new(vec![field]),
+            output_keys: output_keys.iter().map(|s| s.to_string()).collect(),
+            ignore_missing: false,
+        };
+
+        let processor = builder.build(&intermediate_keys).unwrap();
+
         for (s, vec) in ss.into_iter() {
             let decoded = decode(s).unwrap().to_string();
 
@@ -440,7 +559,12 @@ mod tests {
                 .collect::<HashMap<String, Value>>();
             let expected = Map { values };
 
-            let actual = CmcdProcessor::parse("prefix", &decoded).unwrap();
+            let actual = processor.parse(0, &decoded).unwrap();
+            let actual = actual
+                .into_iter()
+                .map(|(index, value)| (intermediate_keys[index].clone(), value))
+                .collect::<HashMap<String, Value>>();
+            let actual = Map { values: actual };
             assert_eq!(actual, expected);
         }
     }
diff --git a/src/pipeline/src/etl/processor/csv.rs b/src/pipeline/src/etl/processor/csv.rs
index 2f0750865a..fb1fca2bfb 100644
--- a/src/pipeline/src/etl/processor/csv.rs
+++ b/src/pipeline/src/etl/processor/csv.rs
@@ -14,17 +14,18 @@
 
 // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/csv-processor.html
 
-use ahash::{HashMap, HashSet};
+use ahash::HashSet;
 use csv::{ReaderBuilder, Trim};
 use itertools::EitherOrBoth::{Both, Left, Right};
 use itertools::Itertools;
 
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, InputFieldInfo, OneInputMultiOutputField};
+use crate::etl::find_key_index;
 use crate::etl::processor::{
-    yaml_bool, yaml_field, yaml_fields, yaml_string, Processor, FIELDS_NAME, FIELD_NAME,
-    IGNORE_MISSING_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
+    ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;
 
 pub(crate) const PROCESSOR_CSV: &str = "csv";
 
@@ -32,18 +33,78 @@ const SEPARATOR_NAME: &str = "separator";
 const QUOTE_NAME: &str = "quote";
 const TRIM_NAME: &str = "trim";
 const EMPTY_VALUE_NAME: &str = "empty_value";
+const TARGET_FIELDS: &str = "target_fields";
+
+#[derive(Debug, Default)]
+pub struct CsvProcessorBuilder {
+    reader: ReaderBuilder,
+
+    fields: Fields,
+    ignore_missing: bool,
+
+    // Value used to fill empty fields, empty fields will be skipped if this is not provided.
+    empty_value: Option<String>,
+    target_fields: Vec<String>,
+    // description
+    // if
+    // ignore_failure
+    // on_failure
+    // tag
+}
+
+impl CsvProcessorBuilder {
+    fn build(self, intermediate_keys: &[String]) -> Result<CsvProcessor, String> {
+        let mut real_fields = vec![];
+
+        for field in self.fields {
+            let input_index = find_key_index(intermediate_keys, field.input_field(), "csv")?;
+
+            let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
+            let real_field = OneInputMultiOutputField::new(input_field_info, None);
+            real_fields.push(real_field);
+        }
+
+        let output_index_info = self
+            .target_fields
+            .iter()
+            .map(|f| find_key_index(intermediate_keys, f, "csv"))
+            .collect::<Result<Vec<_>, String>>()?;
+        Ok(CsvProcessor {
+            reader: self.reader,
+            fields: real_fields,
+            ignore_missing: self.ignore_missing,
+            empty_value: self.empty_value,
+            output_index_info,
+        })
+    }
+}
+
+impl ProcessorBuilder for CsvProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.target_fields.iter().map(|s| s.as_str()).collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Csv)
+    }
+}
 
 /// only support string value
 #[derive(Debug)]
 pub struct CsvProcessor {
     reader: ReaderBuilder,
 
-    fields: Fields,
+    fields: Vec<OneInputMultiOutputField>,
 
     ignore_missing: bool,
 
     // Value used to fill empty fields, empty fields will be skipped if this is not provided.
     empty_value: Option<String>,
+    output_index_info: Vec<usize>,
     // description
     // if
     // ignore_failure
@@ -52,81 +113,19 @@ pub struct CsvProcessor {
 }
 
 impl CsvProcessor {
-    fn new() -> Self {
-        let mut reader = ReaderBuilder::new();
-        reader.has_headers(false);
-
-        Self {
-            reader,
-            fields: Fields::default(),
-            ignore_missing: false,
-            empty_value: None,
-        }
-    }
-
-    fn with_fields(&mut self, fields: Fields) {
-        self.fields = fields;
-    }
-
-    fn try_separator(&mut self, separator: String) -> Result<(), String> {
-        if separator.len() != 1 {
-            Err(format!(
-                "'{}' must be a single character, but got '{}'",
-                SEPARATOR_NAME, separator
-            ))
-        } else {
-            self.reader.delimiter(separator.as_bytes()[0]);
-            Ok(())
-        }
-    }
-
-    fn try_quote(&mut self, quote: String) -> Result<(), String> {
-        if quote.len() != 1 {
-            Err(format!(
-                "'{}' must be a single character, but got '{}'",
-                QUOTE_NAME, quote
-            ))
-        } else {
-            self.reader.quote(quote.as_bytes()[0]);
-            Ok(())
-        }
-    }
-
-    fn with_trim(&mut self, trim: bool) {
-        if trim {
-            self.reader.trim(Trim::All);
-        } else {
-            self.reader.trim(Trim::None);
-        }
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
-    fn with_empty_value(&mut self, empty_value: String) {
-        self.empty_value = Some(empty_value);
-    }
-
     // process the csv format string to a map with target_fields as keys
-    fn process_field(&self, val: &str, field: &Field) -> Result<Map, String> {
+    fn process(&self, val: &str) -> Result<Vec<(usize, Value)>, String> {
         let mut reader = self.reader.from_reader(val.as_bytes());
 
         if let Some(result) = reader.records().next() {
             let record: csv::StringRecord = result.map_err(|e| e.to_string())?;
 
-            let values: HashMap<String, Value> = field
-                .target_fields
-                .as_ref()
-                .ok_or(format!(
-                    "target fields must be set after '{}'",
-                    field.get_field_name()
-                ))?
+            let values: Vec<(usize, Value)> = self
+                .output_index_info
                 .iter()
-                .map(|f| f.to_string())
                 .zip_longest(record.iter())
                 .filter_map(|zipped| match zipped {
-                    Both(target_field, val) => Some((target_field, Value::String(val.into()))),
+                    Both(target_field, val) => Some((*target_field, Value::String(val.into()))),
                     // if target fields are more than extracted fields, fill the rest with empty value
                     Left(target_field) => {
                         let value = self
@@ -134,69 +133,101 @@ impl CsvProcessor {
                             .as_ref()
                             .map(|s| Value::String(s.clone()))
                             .unwrap_or(Value::Null);
-                        Some((target_field, value))
+                        Some((*target_field, value))
                     }
                     // if extracted fields are more than target fields, ignore the rest
                     Right(_) => None,
                 })
                 .collect();
 
-            Ok(Map { values })
+            Ok(values)
         } else {
             Err("expected at least one record from csv format, but got none".into())
         }
     }
-
-    fn update_output_keys(&mut self) {
-        self.fields.iter_mut().for_each(|f| {
-            if let Some(tfs) = f.target_fields.as_ref() {
-                tfs.iter().for_each(|tf| {
-                    if !tf.is_empty() {
-                        f.output_fields_index_mapping.insert(tf.to_string(), 0);
-                    }
-                });
-            }
-        })
-    }
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for CsvProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for CsvProcessorBuilder {
     type Error = String;
 
     fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = CsvProcessor::new();
+        let mut reader = ReaderBuilder::new();
+        reader.has_headers(false);
+
+        let mut fields = Fields::default();
+        let mut ignore_missing = false;
+        let mut empty_value = None;
+        let mut target_fields = vec![];
+
         for (k, v) in hash {
             let key = k
                 .as_str()
                 .ok_or(format!("key must be a string, but got {k:?}"))?;
             match key {
                 FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                 }
                 FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
+                }
+                TARGET_FIELDS => {
+                    target_fields = yaml_string(v, TARGET_FIELDS)?
+                        .split(',')
+                        .map(|s| s.trim().to_string())
+                        .filter(|s| !s.is_empty())
+                        .collect();
                 }
                 SEPARATOR_NAME => {
-                    processor.try_separator(yaml_string(v, SEPARATOR_NAME)?)?;
+                    let separator = yaml_string(v, SEPARATOR_NAME)?;
+                    if separator.len() != 1 {
+                        return Err(format!(
+                            "'{}' must be a single character, but got '{}'",
+                            SEPARATOR_NAME, separator
+                        ));
+                    } else {
+                        reader.delimiter(separator.as_bytes()[0]);
+                    }
                 }
                 QUOTE_NAME => {
-                    processor.try_quote(yaml_string(v, QUOTE_NAME)?)?;
+                    let quote = yaml_string(v, QUOTE_NAME)?;
+                    if quote.len() != 1 {
+                        return Err(format!(
+                            "'{}' must be a single character, but got '{}'",
+                            QUOTE_NAME, quote
+                        ));
+                    } else {
+                        reader.quote(quote.as_bytes()[0]);
+                    }
                 }
                 TRIM_NAME => {
-                    processor.with_trim(yaml_bool(v, TRIM_NAME)?);
+                    let trim = yaml_bool(v, TRIM_NAME)?;
+                    if trim {
+                        reader.trim(Trim::All);
+                    } else {
+                        reader.trim(Trim::None);
+                    }
                 }
                 IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                 }
                 EMPTY_VALUE_NAME => {
-                    processor.with_empty_value(yaml_string(v, EMPTY_VALUE_NAME)?);
+                    empty_value = Some(yaml_string(v, EMPTY_VALUE_NAME)?);
                 }
 
                 _ => {}
             }
         }
-        processor.update_output_keys();
-        Ok(processor)
+        let builder = {
+            CsvProcessorBuilder {
+                reader,
+                fields,
+                ignore_missing,
+                empty_value,
+                target_fields,
+            }
+        };
+
+        Ok(builder)
     }
 }
 
@@ -209,41 +240,14 @@ impl Processor for CsvProcessor {
         self.ignore_missing
     }
 
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .flat_map(|f| f.target_fields.clone().unwrap_or_default())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => self.process_field(val, field),
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
         for field in self.fields.iter() {
-            match val.get(field.input_field.index) {
+            let index = field.input_index();
+            match val.get(index) {
                 Some(Value::String(v)) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let map = self.process_field(v, field)?;
-                    for (k, v) in map.values.into_iter() {
-                        if let Some(index) = field.output_fields_index_mapping.get(&k) {
-                            val[*index] = v;
-                        }
+                    let resule_list = self.process(v)?;
+                    for (k, v) in resule_list {
+                        val[k] = v;
                     }
                 }
                 Some(Value::Null) | None => {
@@ -251,7 +255,7 @@ impl Processor for CsvProcessor {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                         ));
                     }
                 }
@@ -267,116 +271,140 @@ impl Processor for CsvProcessor {
     }
 }
 
-// TODO(yuanbohan): more test cases
 #[cfg(test)]
 mod tests {
+
     use ahash::HashMap;
 
-    use super::{CsvProcessor, Value};
-    use crate::etl::field::Fields;
-    use crate::etl::processor::Processor;
-    use crate::etl::value::Map;
+    use super::Value;
+    use crate::etl::processor::csv::CsvProcessorBuilder;
 
     #[test]
     fn test_equal_length() {
-        let mut processor = CsvProcessor::new();
-        let field = "data,, a, b".parse().unwrap();
-        processor.with_fields(Fields::one(field));
+        let mut reader = csv::ReaderBuilder::new();
+        reader.has_headers(false);
+        let builder = CsvProcessorBuilder {
+            reader,
+            target_fields: vec!["a".into(), "b".into()],
+            ..Default::default()
+        };
 
-        let values: HashMap<String, Value> = [("data".into(), Value::String("1,2".into()))]
+        let intermediate_keys = vec!["data".into(), "a".into(), "b".into()];
+
+        let processor = builder.build(&intermediate_keys).unwrap();
+        let result = processor
+            .process("1,2")
+            .unwrap()
             .into_iter()
-            .collect();
-        let mut m = Map { values };
-
-        processor.exec_map(&mut m).unwrap();
+            .map(|(k, v)| (intermediate_keys[k].clone(), v))
+            .collect::<HashMap<_, _>>();
 
         let values = [
-            ("data".into(), Value::String("1,2".into())),
             ("a".into(), Value::String("1".into())),
             ("b".into(), Value::String("2".into())),
         ]
         .into_iter()
-        .collect();
-        let expected = Map { values };
+        .collect::<HashMap<_, _>>();
 
-        assert_eq!(expected, m);
+        assert_eq!(result, values);
     }
 
     // test target_fields length larger than the record length
     #[test]
     fn test_target_fields_has_more_length() {
-        let values = [("data".into(), Value::String("1,2".into()))]
-            .into_iter()
-            .collect();
-        let mut input = Map { values };
-
         // with no empty value
         {
-            let mut processor = CsvProcessor::new();
-            let field = "data,, a,b,c".parse().unwrap();
-            processor.with_fields(Fields::one(field));
+            let mut reader = csv::ReaderBuilder::new();
+            reader.has_headers(false);
+            let builder = CsvProcessorBuilder {
+                reader,
+                target_fields: vec!["a".into(), "b".into(), "c".into()],
+                ..Default::default()
+            };
 
-            processor.exec_map(&mut input).unwrap();
+            let intermediate_keys = vec!["data".into(), "a".into(), "b".into(), "c".into()];
+
+            let processor = builder.build(&intermediate_keys).unwrap();
+            let result = processor
+                .process("1,2")
+                .unwrap()
+                .into_iter()
+                .map(|(k, v)| (intermediate_keys[k].clone(), v))
+                .collect::<HashMap<_, _>>();
 
             let values = [
-                ("data".into(), Value::String("1,2".into())),
                 ("a".into(), Value::String("1".into())),
                 ("b".into(), Value::String("2".into())),
                 ("c".into(), Value::Null),
             ]
             .into_iter()
-            .collect();
-            let expected = Map { values };
+            .collect::<HashMap<_, _>>();
 
-            assert_eq!(expected, input);
+            assert_eq!(result, values);
         }
 
         // with empty value
         {
-            let mut processor = CsvProcessor::new();
-            let field = "data,, a,b,c".parse().unwrap();
-            processor.with_fields(Fields::one(field));
-            processor.with_empty_value("default".into());
+            let mut reader = csv::ReaderBuilder::new();
+            reader.has_headers(false);
+            let builder = CsvProcessorBuilder {
+                reader,
+                target_fields: vec!["a".into(), "b".into(), "c".into()],
+                empty_value: Some("default".into()),
+                ..Default::default()
+            };
 
-            processor.exec_map(&mut input).unwrap();
+            let intermediate_keys = vec!["data".into(), "a".into(), "b".into(), "c".into()];
+
+            let processor = builder.build(&intermediate_keys).unwrap();
+            let result = processor
+                .process("1,2")
+                .unwrap()
+                .into_iter()
+                .map(|(k, v)| (intermediate_keys[k].clone(), v))
+                .collect::<HashMap<_, _>>();
 
             let values = [
-                ("data".into(), Value::String("1,2".into())),
                 ("a".into(), Value::String("1".into())),
                 ("b".into(), Value::String("2".into())),
                 ("c".into(), Value::String("default".into())),
             ]
             .into_iter()
             .collect();
-            let expected = Map { values };
 
-            assert_eq!(expected, input);
+            assert_eq!(result, values);
         }
     }
 
     // test record has larger length
     #[test]
     fn test_target_fields_has_less_length() {
-        let values = [("data".into(), Value::String("1,2,3".into()))]
+        let mut reader = csv::ReaderBuilder::new();
+        reader.has_headers(false);
+        let builder = CsvProcessorBuilder {
+            reader,
+            target_fields: vec!["a".into(), "b".into()],
+            empty_value: Some("default".into()),
+            ..Default::default()
+        };
+
+        let intermediate_keys = vec!["data".into(), "a".into(), "b".into()];
+
+        let processor = builder.build(&intermediate_keys).unwrap();
+        let result = processor
+            .process("1,2")
+            .unwrap()
             .into_iter()
-            .collect();
-        let mut input = Map { values };
-
-        let mut processor = CsvProcessor::new();
-        let field = "data,,a,b".parse().unwrap();
-        processor.with_fields(Fields::one(field));
-
-        processor.exec_map(&mut input).unwrap();
+            .map(|(k, v)| (intermediate_keys[k].clone(), v))
+            .collect::<HashMap<_, _>>();
 
         let values = [
-            ("data".into(), Value::String("1,2,3".into())),
             ("a".into(), Value::String("1".into())),
             ("b".into(), Value::String("2".into())),
         ]
         .into_iter()
         .collect();
-        let expected = Map { values };
 
-        assert_eq!(expected, input);
+        assert_eq!(result, values);
     }
 }
diff --git a/src/pipeline/src/etl/processor/date.rs b/src/pipeline/src/etl/processor/date.rs
index 3230c497f4..b9bfcf3b6c 100644
--- a/src/pipeline/src/etl/processor/date.rs
+++ b/src/pipeline/src/etl/processor/date.rs
@@ -19,12 +19,12 @@ use chrono::{DateTime, NaiveDateTime};
 use chrono_tz::Tz;
 use lazy_static::lazy_static;
 
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, yaml_strings,
-    Processor, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor,
+    ProcessorBuilder, ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
 };
-use crate::etl::value::{Map, Timestamp, Value};
+use crate::etl::value::{Timestamp, Value};
 
 pub(crate) const PROCESSOR_DATE: &str = "date";
 
@@ -57,9 +57,15 @@ lazy_static! {
                 .collect();
 }
 
-#[derive(Debug, Default)]
+#[derive(Debug)]
 struct Formats(Vec<Arc<String>>);
 
+impl Default for Formats {
+    fn default() -> Self {
+        Formats(DEFAULT_FORMATS.clone())
+    }
+}
+
 impl Formats {
     fn new(mut formats: Vec<Arc<String>>) -> Self {
         formats.sort();
@@ -76,16 +82,119 @@ impl std::ops::Deref for Formats {
     }
 }
 
+#[derive(Debug, Default)]
+pub struct DateProcessorBuilder {
+    fields: Fields,
+    formats: Formats,
+    timezone: Option<Arc<String>>,
+    locale: Option<Arc<String>>,
+    ignore_missing: bool,
+}
+
+impl ProcessorBuilder for DateProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Date)
+    }
+}
+
+impl DateProcessorBuilder {
+    pub fn build(self, intermediate_keys: &[String]) -> Result<DateProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "date",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+        Ok(DateProcessor {
+            fields: real_fields,
+            formats: self.formats,
+            timezone: self.timezone,
+            locale: self.locale,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+impl TryFrom<&yaml_rust::yaml::Hash> for DateProcessorBuilder {
+    type Error = String;
+
+    fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
+        let mut fields = Fields::default();
+        let mut formats = Formats::default();
+        let mut timezone = None;
+        let mut locale = None;
+        let mut ignore_missing = false;
+
+        for (k, v) in hash {
+            let key = k
+                .as_str()
+                .ok_or(format!("key must be a string, but got {k:?}"))?;
+
+            match key {
+                FIELD_NAME => {
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
+                }
+                FIELDS_NAME => {
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
+                }
+
+                FORMATS_NAME => {
+                    let format_strs = yaml_strings(v, FORMATS_NAME)?;
+                    if format_strs.is_empty() {
+                        formats = Formats::new(DEFAULT_FORMATS.clone());
+                    } else {
+                        formats = Formats::new(format_strs.into_iter().map(Arc::new).collect());
+                    }
+                }
+                TIMEZONE_NAME => {
+                    timezone = Some(Arc::new(yaml_string(v, TIMEZONE_NAME)?));
+                }
+                LOCALE_NAME => {
+                    locale = Some(Arc::new(yaml_string(v, LOCALE_NAME)?));
+                }
+                IGNORE_MISSING_NAME => {
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
+                }
+
+                _ => {}
+            }
+        }
+
+        let builder = DateProcessorBuilder {
+            fields,
+            formats,
+            timezone,
+            locale,
+            ignore_missing,
+        };
+
+        Ok(builder)
+    }
+}
+
 /// deprecated it should be removed in the future
 /// Reserved for compatibility only
 #[derive(Debug, Default)]
 pub struct DateProcessor {
-    fields: Fields,
-
+    fields: Vec<OneInputOneOutputField>,
     formats: Formats,
     timezone: Option<Arc<String>>,
     locale: Option<Arc<String>>, // to support locale
-    output_format: Option<Arc<String>>,
 
     ignore_missing: bool,
     // description
@@ -96,43 +205,6 @@ pub struct DateProcessor {
 }
 
 impl DateProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields
-    }
-
-    fn with_formats(&mut self, v: Option<Vec<Arc<String>>>) {
-        let v = match v {
-            Some(v) if !v.is_empty() => v,
-            _ => DEFAULT_FORMATS.clone(),
-        };
-
-        let formats = Formats::new(v);
-        self.formats = formats;
-    }
-
-    fn with_timezone(&mut self, timezone: String) {
-        if !timezone.is_empty() {
-            self.timezone = Some(Arc::new(timezone));
-        }
-    }
-
-    fn with_locale(&mut self, locale: String) {
-        if !locale.is_empty() {
-            self.locale = Some(Arc::new(locale));
-        }
-    }
-
-    fn with_output_format(&mut self, output_format: String) {
-        if !output_format.is_empty() {
-            self.output_format = Some(Arc::new(output_format));
-        }
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
     fn parse(&self, val: &str) -> Result<Timestamp, String> {
         let mut tz = Tz::UTC;
         if let Some(timezone) = &self.timezone {
@@ -147,61 +219,6 @@ impl DateProcessor {
 
         Err(format!("{} processor: failed to parse {val}", self.kind(),))
     }
-
-    fn process_field(&self, val: &str, field: &Field) -> Result<Map, String> {
-        let key = field.get_target_field();
-
-        Ok(Map::one(key, Value::Timestamp(self.parse(val)?)))
-    }
-}
-
-impl TryFrom<&yaml_rust::yaml::Hash> for DateProcessor {
-    type Error = String;
-
-    fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = DateProcessor::default();
-
-        let mut formats_opt = None;
-
-        for (k, v) in hash {
-            let key = k
-                .as_str()
-                .ok_or(format!("key must be a string, but got {k:?}"))?;
-
-            match key {
-                FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
-                }
-                FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
-                }
-
-                FORMATS_NAME => {
-                    let formats = yaml_strings(v, FORMATS_NAME)?;
-                    formats_opt = Some(formats.into_iter().map(Arc::new).collect());
-                }
-                TIMEZONE_NAME => {
-                    processor.with_timezone(yaml_string(v, TIMEZONE_NAME)?);
-                }
-                LOCALE_NAME => {
-                    processor.with_locale(yaml_string(v, LOCALE_NAME)?);
-                }
-                OUTPUT_FORMAT_NAME => {
-                    processor.with_output_format(yaml_string(v, OUTPUT_FORMAT_NAME)?);
-                }
-
-                IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
-                }
-
-                _ => {}
-            }
-        }
-
-        processor.with_formats(formats_opt);
-
-        Ok(processor)
-    }
 }
 
 impl Processor for DateProcessor {
@@ -213,53 +230,21 @@ impl Processor for DateProcessor {
         self.ignore_missing
     }
 
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(s) => self.process_field(s, field),
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
-        for field in self.fields().iter() {
-            let index = field.input_field.index;
+        for field in self.fields.iter() {
+            let index = field.input_index();
             match val.get(index) {
                 Some(Value::String(s)) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut map = self.process_field(s, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let timestamp = self.parse(s)?;
+                    let output_index = field.output_index();
+                    val[output_index] = Value::Timestamp(timestamp);
                 }
                 Some(Value::Null) | None => {
                     if !self.ignore_missing {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                         ));
                     }
                 }
@@ -318,8 +303,7 @@ mod tests {
 
     #[test]
     fn test_parse() {
-        let mut processor = DateProcessor::default();
-        processor.with_formats(None);
+        let processor = DateProcessor::default();
 
         let values: Vec<&str> = vec![
             "2014-5-17T12:34:56",
@@ -340,7 +324,6 @@ mod tests {
 
     #[test]
     fn test_parse_with_formats() {
-        let mut processor = DateProcessor::default();
         let formats = vec![
             "%Y-%m-%dT%H:%M:%S%:z",
             "%Y-%m-%dT%H:%M:%S%.3f%:z",
@@ -349,8 +332,11 @@ mod tests {
         ]
         .into_iter()
         .map(|s| Arc::new(s.to_string()))
-        .collect();
-        processor.with_formats(Some(formats));
+        .collect::<Vec<_>>();
+        let processor = DateProcessor {
+            formats: super::Formats(formats),
+            ..Default::default()
+        };
 
         let values: Vec<&str> = vec![
             "2014-5-17T12:34:56",
@@ -371,9 +357,10 @@ mod tests {
 
     #[test]
     fn test_parse_with_timezone() {
-        let mut processor = DateProcessor::default();
-        processor.with_formats(None);
-        processor.with_timezone("Asia/Tokyo".to_string());
+        let processor = DateProcessor {
+            timezone: Some(Arc::new("Asia/Tokyo".to_string())),
+            ..Default::default()
+        };
 
         let values: Vec<&str> = vec![
             "2014-5-17T12:34:56",
diff --git a/src/pipeline/src/etl/processor/dissect.rs b/src/pipeline/src/etl/processor/dissect.rs
index ae544f5c43..9a4b8a966e 100644
--- a/src/pipeline/src/etl/processor/dissect.rs
+++ b/src/pipeline/src/etl/processor/dissect.rs
@@ -12,16 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::ops::Deref;
+
 use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
 use common_telemetry::warn;
 use itertools::Itertools;
 
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, InputFieldInfo, OneInputMultiOutputField};
+use crate::etl::find_key_index;
 use crate::etl::processor::{
-    yaml_bool, yaml_field, yaml_fields, yaml_parse_string, yaml_parse_strings, yaml_string,
-    Processor, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERNS_NAME, PATTERN_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_parse_string, yaml_parse_strings, yaml_string,
+    Processor, ProcessorBuilder, ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
+    PATTERNS_NAME, PATTERN_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;
 
 pub(crate) const PROCESSOR_DISSECT: &str = "dissect";
 
@@ -59,13 +63,13 @@ impl std::fmt::Display for EndModifier {
 }
 
 #[derive(Debug, PartialEq, Default)]
-struct Name {
+struct NameInfo {
     name: String,
     start_modifier: Option<StartModifier>,
     end_modifier: Option<EndModifier>,
 }
 
-impl Name {
+impl NameInfo {
     fn is_name_empty(&self) -> bool {
         self.name.is_empty()
     }
@@ -125,18 +129,87 @@ impl Name {
     }
 }
 
+impl std::fmt::Display for NameInfo {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "{}", self.name)
+    }
+}
+
+impl From<&str> for NameInfo {
+    fn from(value: &str) -> Self {
+        NameInfo {
+            name: value.to_string(),
+            start_modifier: None,
+            end_modifier: None,
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Default)]
+struct Name {
+    name: String,
+    index: usize,
+    start_modifier: Option<StartModifier>,
+    end_modifier: Option<EndModifier>,
+}
+
 impl std::fmt::Display for Name {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         write!(f, "{}", self.name)
     }
 }
 
-impl From<&str> for Name {
-    fn from(value: &str) -> Self {
+impl From<NameInfo> for Name {
+    fn from(value: NameInfo) -> Self {
         Name {
-            name: value.to_string(),
-            start_modifier: None,
-            end_modifier: None,
+            name: value.name,
+            index: 0,
+            start_modifier: value.start_modifier,
+            end_modifier: value.end_modifier,
+        }
+    }
+}
+
+impl Name {
+    fn is_name_empty(&self) -> bool {
+        self.name.is_empty()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.name.is_empty() && self.start_modifier.is_none() && self.end_modifier.is_none()
+    }
+
+    fn is_end_modifier_set(&self) -> bool {
+        self.end_modifier.is_some()
+    }
+}
+
+#[derive(Debug, PartialEq)]
+enum PartInfo {
+    Split(String),
+    Name(NameInfo),
+}
+
+impl PartInfo {
+    fn is_empty(&self) -> bool {
+        match self {
+            PartInfo::Split(v) => v.is_empty(),
+            PartInfo::Name(v) => v.is_empty(),
+        }
+    }
+
+    fn empty_split() -> Self {
+        PartInfo::Split(String::new())
+    }
+
+    fn empty_name() -> Self {
+        PartInfo::Name(NameInfo::default())
+    }
+
+    fn push(&mut self, ch: char) {
+        match self {
+            PartInfo::Split(v) => v.push(ch),
+            PartInfo::Name(v) => v.name.push(ch),
         }
     }
 }
@@ -162,11 +235,13 @@ impl Part {
     fn empty_name() -> Self {
         Part::Name(Name::default())
     }
+}
 
-    fn push(&mut self, ch: char) {
-        match self {
-            Part::Split(v) => v.push(ch),
-            Part::Name(v) => v.name.push(ch),
+impl From<PartInfo> for Part {
+    fn from(value: PartInfo) -> Self {
+        match value {
+            PartInfo::Split(v) => Part::Split(v),
+            PartInfo::Name(v) => Part::Name(v.into()),
         }
     }
 }
@@ -177,7 +252,7 @@ struct Pattern {
     parts: Vec<Part>,
 }
 
-impl std::ops::Deref for Pattern {
+impl Deref for Pattern {
     type Target = Vec<Part>;
 
     fn deref(&self) -> &Self::Target {
@@ -185,18 +260,42 @@ impl std::ops::Deref for Pattern {
     }
 }
 
-impl std::ops::DerefMut for Pattern {
+impl From<PatternInfo> for Pattern {
+    fn from(value: PatternInfo) -> Self {
+        let parts = value.parts.into_iter().map(|x| x.into()).collect();
+        Pattern {
+            origin: value.origin,
+            parts,
+        }
+    }
+}
+
+#[derive(Debug, Default)]
+struct PatternInfo {
+    origin: String,
+    parts: Vec<PartInfo>,
+}
+
+impl std::ops::Deref for PatternInfo {
+    type Target = Vec<PartInfo>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.parts
+    }
+}
+
+impl std::ops::DerefMut for PatternInfo {
     fn deref_mut(&mut self) -> &mut Self::Target {
         &mut self.parts
     }
 }
 
-impl std::str::FromStr for Pattern {
+impl std::str::FromStr for PatternInfo {
     type Err = String;
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
         let mut parts = vec![];
-        let mut cursor = Part::empty_split();
+        let mut cursor = PartInfo::empty_split();
 
         let origin = s.to_string();
         let chars: Vec<char> = origin.chars().collect();
@@ -206,27 +305,27 @@ impl std::str::FromStr for Pattern {
             let ch = chars[pos];
             match (ch, &mut cursor) {
                 // if cursor is Split part, and found %{, then ready to start a Name part
-                ('%', Part::Split(_)) if matches!(chars.get(pos + 1), Some('{')) => {
+                ('%', PartInfo::Split(_)) if matches!(chars.get(pos + 1), Some('{')) => {
                     if !cursor.is_empty() {
                         parts.push(cursor);
                     }
 
-                    cursor = Part::empty_name();
+                    cursor = PartInfo::empty_name();
                     pos += 1; // skip '{'
                 }
                 // if cursor is Split part, and not found % or {, then continue the Split part
-                (_, Part::Split(_)) => {
+                (_, PartInfo::Split(_)) => {
                     cursor.push(ch);
                 }
                 // if cursor is Name part, and found }, then end the Name part, start the next Split part
-                ('}', Part::Name(_)) => {
+                ('}', PartInfo::Name(_)) => {
                     parts.push(cursor);
-                    cursor = Part::empty_split();
+                    cursor = PartInfo::empty_split();
                 }
-                ('+', Part::Name(name)) if !name.is_start_modifier_set() => {
+                ('+', PartInfo::Name(name)) if !name.is_start_modifier_set() => {
                     name.try_start_modifier(StartModifier::Append(None))?;
                 }
-                ('/', Part::Name(name)) if name.is_append_modifier_set() => {
+                ('/', PartInfo::Name(name)) if name.is_append_modifier_set() => {
                     let mut order = 0;
                     let mut j = pos + 1;
                     while j < chars.len() {
@@ -248,16 +347,16 @@ impl std::str::FromStr for Pattern {
                     name.try_append_order(order)?;
                     pos = j - 1; // this will change the position to the last digit of the order
                 }
-                ('?', Part::Name(name)) if !name.is_start_modifier_set() => {
+                ('?', PartInfo::Name(name)) if !name.is_start_modifier_set() => {
                     name.try_start_modifier(StartModifier::NamedSkip)?;
                 }
-                ('*', Part::Name(name)) if !name.is_start_modifier_set() => {
+                ('*', PartInfo::Name(name)) if !name.is_start_modifier_set() => {
                     name.try_start_modifier(StartModifier::MapKey)?;
                 }
-                ('&', Part::Name(name)) if !name.is_start_modifier_set() => {
+                ('&', PartInfo::Name(name)) if !name.is_start_modifier_set() => {
                     name.try_start_modifier(StartModifier::MapVal)?;
                 }
-                ('-', Part::Name(name)) if !name.is_end_modifier_set() => {
+                ('-', PartInfo::Name(name)) if !name.is_end_modifier_set() => {
                     if let Some('>') = chars.get(pos + 1) {
                     } else {
                         return Err(format!(
@@ -273,7 +372,7 @@ impl std::str::FromStr for Pattern {
                     name.try_end_modifier()?;
                     pos += 1; // only skip '>', the next loop will skip '}'
                 }
-                (_, Part::Name(name)) if !is_valid_char(ch) => {
+                (_, PartInfo::Name(name)) if !is_valid_char(ch) => {
                     let tail: String = if name.is_name_empty() {
                         format!("Invalid '{ch}'")
                     } else {
@@ -281,7 +380,7 @@ impl std::str::FromStr for Pattern {
                     };
                     return Err(format!("Invalid Pattern: '{s}'. {tail}"));
                 }
-                (_, Part::Name(_)) => {
+                (_, PartInfo::Name(_)) => {
                     cursor.push(ch);
                 }
             }
@@ -290,8 +389,8 @@ impl std::str::FromStr for Pattern {
         }
 
         match cursor {
-            Part::Split(ref split) if !split.is_empty() => parts.push(cursor),
-            Part::Name(name) if !name.is_empty() => {
+            PartInfo::Split(ref split) if !split.is_empty() => parts.push(cursor),
+            PartInfo::Name(name) if !name.is_empty() => {
                 return Err(format!("Invalid Pattern: '{s}'. '{name}' is not closed"))
             }
             _ => {}
@@ -303,7 +402,7 @@ impl std::str::FromStr for Pattern {
     }
 }
 
-impl Pattern {
+impl PatternInfo {
     fn check(&self) -> Result<(), String> {
         if self.len() == 0 {
             return Err("Empty pattern is not allowed".to_string());
@@ -316,19 +415,19 @@ impl Pattern {
             let this_part = &self[i];
             let next_part = self.get(i + 1);
             match (this_part, next_part) {
-                (Part::Split(split), _) if split.is_empty() => {
+                (PartInfo::Split(split), _) if split.is_empty() => {
                     return Err(format!(
                         "Invalid Pattern: '{}'. Empty split is not allowed",
                         self.origin
                     ));
                 }
-                (Part::Name(name1), Some(Part::Name(name2))) => {
+                (PartInfo::Name(name1), Some(PartInfo::Name(name2))) => {
                     return Err(format!(
                         "Invalid Pattern: '{}'. consecutive names are not allowed: '{}' '{}'",
                         self.origin, name1, name2
                     ));
                 }
-                (Part::Name(name), _) if name.is_name_empty() => {
+                (PartInfo::Name(name), _) if name.is_name_empty() => {
                     if let Some(ref m) = name.start_modifier {
                         return Err(format!(
                             "Invalid Pattern: '{}'. only '{}' modifier is invalid",
@@ -336,7 +435,7 @@ impl Pattern {
                         ));
                     }
                 }
-                (Part::Name(name), _) => match name.start_modifier {
+                (PartInfo::Name(name), _) => match name.start_modifier {
                     Some(StartModifier::MapKey) => {
                         if map_keys.contains(&name.name) {
                             return Err(format!(
@@ -379,15 +478,131 @@ impl Pattern {
     }
 }
 
-impl std::fmt::Display for Pattern {
+impl std::fmt::Display for PatternInfo {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         write!(f, "{}", self.origin)
     }
 }
 
 #[derive(Debug, Default)]
-pub struct DissectProcessor {
+pub struct DissectProcessorBuilder {
     fields: Fields,
+    patterns: Vec<PatternInfo>,
+    ignore_missing: bool,
+    append_separator: Option<String>,
+    output_keys: HashSet<String>,
+}
+
+impl DissectProcessorBuilder {
+    fn build_output_keys(patterns: &[PatternInfo]) -> HashSet<String> {
+        patterns
+            .iter()
+            .flat_map(|pattern| pattern.iter())
+            .filter_map(|p| match p {
+                PartInfo::Name(name) => {
+                    if !name.is_empty()
+                        && (name.start_modifier.is_none()
+                            || name
+                                .start_modifier
+                                .as_ref()
+                                .is_some_and(|x| matches!(x, StartModifier::Append(_))))
+                    {
+                        Some(name.to_string())
+                    } else {
+                        None
+                    }
+                }
+                _ => None,
+            })
+            .collect()
+    }
+
+    fn part_info_to_part(
+        part_info: PartInfo,
+        intermediate_keys: &[String],
+    ) -> Result<Part, String> {
+        match part_info {
+            PartInfo::Split(s) => Ok(Part::Split(s)),
+            PartInfo::Name(n) => match n.start_modifier {
+                None | Some(StartModifier::Append(_)) => {
+                    let index = find_key_index(intermediate_keys, &n.name, "dissect")?;
+                    Ok(Part::Name(Name {
+                        name: n.name,
+                        index,
+                        start_modifier: n.start_modifier,
+                        end_modifier: n.end_modifier,
+                    }))
+                }
+                _ => Ok(Part::Name(Name {
+                    name: n.name,
+                    index: usize::MAX,
+                    start_modifier: n.start_modifier,
+                    end_modifier: n.end_modifier,
+                })),
+            },
+        }
+    }
+
+    fn pattern_info_to_pattern(
+        pattern_info: PatternInfo,
+        intermediate_keys: &[String],
+    ) -> Result<Pattern, String> {
+        let original = pattern_info.origin;
+        let pattern = pattern_info
+            .parts
+            .into_iter()
+            .map(|part_info| Self::part_info_to_part(part_info, intermediate_keys))
+            .collect::<Result<Vec<_>, String>>()?;
+        Ok(Pattern {
+            origin: original,
+            parts: pattern,
+        })
+    }
+
+    fn build_patterns_from_pattern_infos(
+        patterns: Vec<PatternInfo>,
+        intermediate_keys: &[String],
+    ) -> Result<Vec<Pattern>, String> {
+        patterns
+            .into_iter()
+            .map(|pattern_info| Self::pattern_info_to_pattern(pattern_info, intermediate_keys))
+            .collect()
+    }
+}
+
+impl ProcessorBuilder for DissectProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.output_keys.iter().map(|s| s.as_str()).collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input_index = find_key_index(intermediate_keys, field.input_field(), "dissect")?;
+
+            let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
+
+            let real_field = OneInputMultiOutputField::new(input_field_info, field.target_field);
+            real_fields.push(real_field);
+        }
+        let patterns = Self::build_patterns_from_pattern_infos(self.patterns, intermediate_keys)?;
+        let processor = DissectProcessor {
+            fields: real_fields,
+            patterns,
+            ignore_missing: self.ignore_missing,
+            append_separator: self.append_separator,
+        };
+        Ok(ProcessorKind::Dissect(processor))
+    }
+}
+
+#[derive(Debug, Default)]
+pub struct DissectProcessor {
+    fields: Vec<OneInputMultiOutputField>,
     patterns: Vec<Pattern>,
     ignore_missing: bool,
 
@@ -396,59 +611,51 @@ pub struct DissectProcessor {
 }
 
 impl DissectProcessor {
-    fn with_fields(&mut self, fields: Fields) {
-        self.fields = fields;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
-    fn with_patterns(&mut self, patterns: Vec<Pattern>) {
-        self.patterns = patterns;
-    }
-
-    fn with_append_separator(&mut self, append_separator: String) {
-        self.append_separator = Some(append_separator);
-    }
-
-    fn process_pattern(&self, chs: &[char], pattern: &Pattern) -> Result<Map, String> {
-        let mut map = Map::default();
+    fn process_pattern(
+        &self,
+        chs: &[char],
+        pattern: &Pattern,
+    ) -> Result<Vec<(usize, Value)>, String> {
+        let mut map = Vec::new();
         let mut pos = 0;
 
-        let mut appends: HashMap<String, Vec<(String, u32)>> = HashMap::new();
-        let mut maps: HashMap<String, String> = HashMap::new();
+        let mut appends: HashMap<usize, Vec<(String, u32)>> = HashMap::new();
+        // let mut maps: HashMap<usize, (String,String)> = HashMap::new();
 
         let mut process_name_value = |name: &Name, value: String| {
-            let name_str = name.to_string();
+            let name_index = name.index;
             match name.start_modifier {
                 Some(StartModifier::NamedSkip) => {
                     // do nothing, ignore this match
                 }
                 Some(StartModifier::Append(order)) => {
                     appends
-                        .entry(name_str)
+                        .entry(name_index)
                         .or_default()
                         .push((value, order.unwrap_or_default()));
                 }
-                Some(StartModifier::MapKey) => match maps.get(&name_str) {
-                    Some(map_val) => {
-                        map.insert(value, Value::String(map_val.to_string()));
-                    }
-                    None => {
-                        maps.insert(name_str, value);
-                    }
-                },
-                Some(StartModifier::MapVal) => match maps.get(&name_str) {
-                    Some(map_key) => {
-                        map.insert(map_key, Value::String(value));
-                    }
-                    None => {
-                        maps.insert(name_str, value);
-                    }
-                },
+                // Some(StartModifier::MapKey) => match maps.get(&name_index) {
+                //     Some(map_val) => {
+                //         map.insert(value, Value::String(map_val.to_string()));
+                //     }
+                //     None => {
+                //         maps.insert(name_index, value);
+                //     }
+                // },
+                // Some(StartModifier::MapVal) => match maps.get(&name_index) {
+                //     Some(map_key) => {
+                //         map.insert(map_key, Value::String(value));
+                //     }
+                //     None => {
+                //         maps.insert(name_index, value);
+                //     }
+                // },
+                Some(_) => {
+                    // do nothing, ignore MapKey and MapVal
+                    // because transform can know the key name
+                }
                 None => {
-                    map.insert(name.to_string(), Value::String(value));
+                    map.push((name_index, Value::String(value)));
                 }
             }
         };
@@ -524,60 +731,37 @@ impl DissectProcessor {
             for (name, mut values) in appends {
                 values.sort_by(|a, b| a.1.cmp(&b.1));
                 let value = values.into_iter().map(|(a, _)| a).join(sep);
-                map.insert(name, Value::String(value));
+                map.push((name, Value::String(value)));
             }
         }
 
         Ok(map)
     }
 
-    fn process(&self, val: &str) -> Result<Map, String> {
+    fn process(&self, val: &str) -> Result<Vec<(usize, Value)>, String> {
         let chs = val.chars().collect::<Vec<char>>();
 
         for pattern in &self.patterns {
-            if let Ok(map) = self.process_pattern(&chs, pattern) {
-                return Ok(map);
+            match self.process_pattern(&chs, pattern) {
+                Ok(map) => return Ok(map),
+                Err(e) => {
+                    warn!("dissect processor: {}", e);
+                }
             }
         }
 
         Err("No matching pattern found".to_string())
     }
-
-    /// Update the output keys for each field.
-    fn update_output_keys(&mut self) {
-        // every pattern had been checked, so we can get all the output keys
-        let output_keys = self
-            .patterns
-            .iter()
-            .flat_map(|pattern| pattern.iter())
-            .filter_map(|p| match p {
-                Part::Name(name) => {
-                    if !name.is_empty()
-                        && !name.start_modifier.as_ref().is_some_and(|x| {
-                            *x == StartModifier::NamedSkip || *x == StartModifier::MapVal
-                        })
-                    {
-                        Some(name)
-                    } else {
-                        None
-                    }
-                }
-                _ => None,
-            })
-            .collect::<Vec<_>>();
-        for field in self.fields.iter_mut() {
-            for k in &output_keys {
-                field.output_fields_index_mapping.insert(k.to_string(), 0);
-            }
-        }
-    }
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for DissectProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for DissectProcessorBuilder {
     type Error = String;
 
     fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = Self::default();
+        let mut fields = Fields::default();
+        let mut patterns = vec![];
+        let mut ignore_missing = false;
+        let mut append_separator = None;
 
         for (k, v) in value.iter() {
             let key = k
@@ -585,27 +769,38 @@ impl TryFrom<&yaml_rust::yaml::Hash> for DissectProcessor {
                 .ok_or(format!("key must be a string, but got '{k:?}'"))?;
 
             match key {
-                FIELD_NAME => processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?)),
-                FIELDS_NAME => processor.with_fields(yaml_fields(v, FIELDS_NAME)?),
+                FIELD_NAME => {
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
+                }
+                FIELDS_NAME => {
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
+                }
                 PATTERN_NAME => {
-                    let pattern: Pattern = yaml_parse_string(v, PATTERN_NAME)?;
-                    processor.with_patterns(vec![pattern]);
+                    let pattern: PatternInfo = yaml_parse_string(v, PATTERN_NAME)?;
+                    patterns = vec![pattern];
                 }
                 PATTERNS_NAME => {
-                    let patterns = yaml_parse_strings(v, PATTERNS_NAME)?;
-                    processor.with_patterns(patterns);
+                    patterns = yaml_parse_strings(v, PATTERNS_NAME)?;
                 }
                 IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?)
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                 }
                 APPEND_SEPARATOR_NAME => {
-                    processor.with_append_separator(yaml_string(v, APPEND_SEPARATOR_NAME)?)
+                    append_separator = Some(yaml_string(v, APPEND_SEPARATOR_NAME)?);
                 }
                 _ => {}
             }
         }
-        processor.update_output_keys();
-        Ok(processor)
+        let output_keys = Self::build_output_keys(&patterns);
+        let builder = DissectProcessorBuilder {
+            fields,
+            patterns,
+            ignore_missing,
+            append_separator,
+            output_keys,
+        };
+
+        Ok(builder)
     }
 }
 
@@ -618,59 +813,15 @@ impl Processor for DissectProcessor {
         self.ignore_missing
     }
 
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        let mut result = HashSet::with_capacity(30);
-        for pattern in &self.patterns {
-            for part in pattern.iter() {
-                if let Part::Name(name) = part {
-                    if !name.is_empty() {
-                        result.insert(name.to_string());
-                    }
-                }
-            }
-        }
-        result
-    }
-
-    fn exec_field(&self, val: &Value, _field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => match self.process(val) {
-                Ok(map) => Ok(map),
-                Err(e) => {
-                    warn!("dissect processor: {}", e);
-                    Ok(Map::default())
-                }
-            },
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
         for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
             match val.get(index) {
-                // TODO(qtang): Let this method use the intermediate state collection directly.
                 Some(Value::String(val_str)) => match self.process(val_str) {
-                    Ok(mut map) => {
-                        field
-                            .output_fields_index_mapping
-                            .iter()
-                            .for_each(|(k, output_index)| {
-                                if let Some(v) = map.remove(k) {
-                                    val[*output_index] = v
-                                }
-                            });
+                    Ok(r) => {
+                        for (k, v) in r {
+                            val[k] = v;
+                        }
                     }
                     Err(e) => {
                         warn!("dissect processor: {}", e);
@@ -681,7 +832,7 @@ impl Processor for DissectProcessor {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                         ));
                     }
                 }
@@ -705,17 +856,29 @@ fn is_valid_char(ch: char) -> bool {
 mod tests {
     use ahash::HashMap;
 
-    use super::{DissectProcessor, EndModifier, Name, Part, Pattern, StartModifier};
-    use crate::etl::value::{Map, Value};
+    use super::{DissectProcessor, EndModifier, NameInfo, PartInfo, PatternInfo, StartModifier};
+    use crate::etl::processor::dissect::DissectProcessorBuilder;
+    use crate::etl::value::Value;
 
     fn assert(pattern_str: &str, input: &str, expected: HashMap<String, Value>) {
         let chs = input.chars().collect::<Vec<char>>();
-        let pattern = pattern_str.parse().unwrap();
+        let pattern_infos: Vec<PatternInfo> = vec![pattern_str.parse().unwrap()];
+        let output_keys: Vec<String> = DissectProcessorBuilder::build_output_keys(&pattern_infos)
+            .into_iter()
+            .collect();
+        let pattern =
+            DissectProcessorBuilder::build_patterns_from_pattern_infos(pattern_infos, &output_keys)
+                .unwrap();
 
         let processor = DissectProcessor::default();
-        let map = processor.process_pattern(&chs, &pattern).unwrap();
+        let result: HashMap<String, Value> = processor
+            .process_pattern(&chs, &pattern[0])
+            .unwrap()
+            .into_iter()
+            .map(|(k, v)| (output_keys[k].to_string(), v))
+            .collect();
 
-        assert_eq!(map, Map::from(expected), "pattern: {}", pattern_str);
+        assert_eq!(result, expected, "pattern: {}", pattern_str);
     }
 
     #[test]
@@ -723,28 +886,28 @@ mod tests {
         let cases = [(
             "%{clientip} %{ident} %{auth} [%{timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{status} %{size}",
             vec![
-                Part::Name("clientip".into()),
-                Part::Split(" ".into()),
-                Part::Name("ident".into()),
-                Part::Split(" ".into()),
-                Part::Name("auth".into()),
-                Part::Split(" [".into()),
-                Part::Name("timestamp".into()),
-                Part::Split("] \"".into()),
-                Part::Name("verb".into()),
-                Part::Split(" ".into()),
-                Part::Name("request".into()),
-                Part::Split(" HTTP/".into()),
-                Part::Name("httpversion".into()),
-                Part::Split("\" ".into()),
-                Part::Name("status".into()),
-                Part::Split(" ".into()),
-                Part::Name("size".into()),
+                PartInfo::Name("clientip".into()),
+                PartInfo::Split(" ".into()),
+                PartInfo::Name("ident".into()),
+                PartInfo::Split(" ".into()),
+                PartInfo::Name("auth".into()),
+                PartInfo::Split(" [".into()),
+                PartInfo::Name("timestamp".into()),
+                PartInfo::Split("] \"".into()),
+                PartInfo::Name("verb".into()),
+                PartInfo::Split(" ".into()),
+                PartInfo::Name("request".into()),
+                PartInfo::Split(" HTTP/".into()),
+                PartInfo::Name("httpversion".into()),
+                PartInfo::Split("\" ".into()),
+                PartInfo::Name("status".into()),
+                PartInfo::Split(" ".into()),
+                PartInfo::Name("size".into()),
             ],
         )];
 
         for (pattern, expected) in cases.into_iter() {
-            let p: Pattern = pattern.parse().unwrap();
+            let p: PatternInfo = pattern.parse().unwrap();
             assert_eq!(p.parts, expected);
         }
     }
@@ -755,13 +918,13 @@ mod tests {
             (
                 "%{} %{}",
                 vec![
-                    Part::Name(Name {
+                    PartInfo::Name(NameInfo {
                         name: "".into(),
                         start_modifier: None,
                         end_modifier: None,
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "".into(),
                         start_modifier: None,
                         end_modifier: None,
@@ -771,61 +934,61 @@ mod tests {
             (
                 "%{ts->} %{level}",
                 vec![
-                    Part::Name(Name {
+                    PartInfo::Name(NameInfo {
                         name: "ts".into(),
                         start_modifier: None,
                         end_modifier: Some(EndModifier),
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name("level".into()),
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name("level".into()),
                 ],
             ),
             (
                 "[%{ts}]%{->}[%{level}]",
                 vec![
-                    Part::Split("[".into()),
-                    Part::Name(Name {
+                    PartInfo::Split("[".into()),
+                    PartInfo::Name(NameInfo {
                         name: "ts".into(),
                         start_modifier: None,
                         end_modifier: None,
                     }),
-                    Part::Split("]".into()),
-                    Part::Name(Name {
+                    PartInfo::Split("]".into()),
+                    PartInfo::Name(NameInfo {
                         name: "".into(),
                         start_modifier: None,
                         end_modifier: Some(EndModifier),
                     }),
-                    Part::Split("[".into()),
-                    Part::Name(Name {
+                    PartInfo::Split("[".into()),
+                    PartInfo::Name(NameInfo {
                         name: "level".into(),
                         start_modifier: None,
                         end_modifier: None,
                     }),
-                    Part::Split("]".into()),
+                    PartInfo::Split("]".into()),
                 ],
             ),
             (
                 "%{+name} %{+name} %{+name} %{+name}",
                 vec![
-                    Part::Name(Name {
+                    PartInfo::Name(NameInfo {
                         name: "name".into(),
                         start_modifier: Some(StartModifier::Append(None)),
                         end_modifier: None,
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "name".into(),
                         start_modifier: Some(StartModifier::Append(None)),
                         end_modifier: None,
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "name".into(),
                         start_modifier: Some(StartModifier::Append(None)),
                         end_modifier: None,
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "name".into(),
                         start_modifier: Some(StartModifier::Append(None)),
                         end_modifier: None,
@@ -835,25 +998,25 @@ mod tests {
             (
                 "%{+name/2} %{+name/4} %{+name/3} %{+name/1}",
                 vec![
-                    Part::Name(Name {
+                    PartInfo::Name(NameInfo {
                         name: "name".into(),
                         start_modifier: Some(StartModifier::Append(Some(2))),
                         end_modifier: None,
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "name".into(),
                         start_modifier: Some(StartModifier::Append(Some(4))),
                         end_modifier: None,
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "name".into(),
                         start_modifier: Some(StartModifier::Append(Some(3))),
                         end_modifier: None,
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "name".into(),
                         start_modifier: Some(StartModifier::Append(Some(1))),
                         end_modifier: None,
@@ -863,67 +1026,67 @@ mod tests {
             (
                 "%{clientip} %{?ident} %{?auth} [%{timestamp}]",
                 vec![
-                    Part::Name(Name {
+                    PartInfo::Name(NameInfo {
                         name: "clientip".into(),
                         start_modifier: None,
                         end_modifier: None,
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "ident".into(),
                         start_modifier: Some(StartModifier::NamedSkip),
                         end_modifier: None,
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "auth".into(),
                         start_modifier: Some(StartModifier::NamedSkip),
                         end_modifier: None,
                     }),
-                    Part::Split(" [".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" [".into()),
+                    PartInfo::Name(NameInfo {
                         name: "timestamp".into(),
                         start_modifier: None,
                         end_modifier: None,
                     }),
-                    Part::Split("]".into()),
+                    PartInfo::Split("]".into()),
                 ],
             ),
             (
                 "[%{ts}] [%{level}] %{*p1}:%{&p1} %{*p2}:%{&p2}",
                 vec![
-                    Part::Split("[".into()),
-                    Part::Name(Name {
+                    PartInfo::Split("[".into()),
+                    PartInfo::Name(NameInfo {
                         name: "ts".into(),
                         start_modifier: None,
                         end_modifier: None,
                     }),
-                    Part::Split("] [".into()),
-                    Part::Name(Name {
+                    PartInfo::Split("] [".into()),
+                    PartInfo::Name(NameInfo {
                         name: "level".into(),
                         start_modifier: None,
                         end_modifier: None,
                     }),
-                    Part::Split("] ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split("] ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "p1".into(),
                         start_modifier: Some(StartModifier::MapKey),
                         end_modifier: None,
                     }),
-                    Part::Split(":".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(":".into()),
+                    PartInfo::Name(NameInfo {
                         name: "p1".into(),
                         start_modifier: Some(StartModifier::MapVal),
                         end_modifier: None,
                     }),
-                    Part::Split(" ".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(" ".into()),
+                    PartInfo::Name(NameInfo {
                         name: "p2".into(),
                         start_modifier: Some(StartModifier::MapKey),
                         end_modifier: None,
                     }),
-                    Part::Split(":".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(":".into()),
+                    PartInfo::Name(NameInfo {
                         name: "p2".into(),
                         start_modifier: Some(StartModifier::MapVal),
                         end_modifier: None,
@@ -933,13 +1096,13 @@ mod tests {
             (
                 "%{&p1}:%{*p1}",
                 vec![
-                    Part::Name(Name {
+                    PartInfo::Name(NameInfo {
                         name: "p1".into(),
                         start_modifier: Some(StartModifier::MapVal),
                         end_modifier: None,
                     }),
-                    Part::Split(":".into()),
-                    Part::Name(Name {
+                    PartInfo::Split(":".into()),
+                    PartInfo::Name(NameInfo {
                         name: "p1".into(),
                         start_modifier: Some(StartModifier::MapKey),
                         end_modifier: None,
@@ -949,7 +1112,7 @@ mod tests {
         ];
 
         for (pattern, expected) in cases.into_iter() {
-            let p: Pattern = pattern.parse().unwrap();
+            let p: PatternInfo = pattern.parse().unwrap();
             assert_eq!(p.parts, expected);
         }
     }
@@ -1029,7 +1192,7 @@ mod tests {
         ];
 
         for (pattern, expected) in cases.into_iter() {
-            let err = pattern.parse::<Pattern>().unwrap_err();
+            let err = pattern.parse::<PatternInfo>().unwrap_err();
             assert_eq!(err, expected);
         }
     }
@@ -1164,45 +1327,4 @@ mod tests {
             );
         }
     }
-
-    #[test]
-    fn test_dissect_reference_keys() {
-        let cases = [
-            (
-                "[%{ts}] [%{level}] %{*p1}:%{&p1} %{*p2}:%{&p2}",
-                "[2018-08-10T17:15:42,466] [ERR] ip:1.2.3.4 error:REFUSED",
-                [
-                    ("ts", "2018-08-10T17:15:42,466"),
-                    ("level", "ERR"),
-                    ("ip", "1.2.3.4"),
-                    ("error", "REFUSED"),
-                ],
-            ),
-            (
-                "[%{ts}] [%{level}] %{&p1}:%{*p1} %{*p2}:%{&p2}",
-                "[2018-08-10T17:15:42,466] [ERR] ip:1.2.3.4 error:REFUSED",
-                [
-                    ("ts", "2018-08-10T17:15:42,466"),
-                    ("level", "ERR"),
-                    ("1.2.3.4", "ip"),
-                    ("error", "REFUSED"),
-                ],
-            ),
-        ]
-        .into_iter()
-        .map(|(pattern, input, expected)| {
-            let map = expected
-                .into_iter()
-                .map(|(k, v)| (k.to_string(), Value::String(v.to_string())));
-            (pattern, input, map)
-        });
-
-        for (pattern_str, input, expected) in cases {
-            assert(
-                pattern_str,
-                input,
-                expected.collect::<HashMap<String, Value>>(),
-            );
-        }
-    }
 }
diff --git a/src/pipeline/src/etl/processor/epoch.rs b/src/pipeline/src/etl/processor/epoch.rs
index 7af075bdb0..32c7d61786 100644
--- a/src/pipeline/src/etl/processor/epoch.rs
+++ b/src/pipeline/src/etl/processor/epoch.rs
@@ -14,17 +14,17 @@
 
 use ahash::HashSet;
 
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, Processor,
-    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
+    ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
 };
 use crate::etl::value::time::{
     MICROSECOND_RESOLUTION, MICRO_RESOLUTION, MILLISECOND_RESOLUTION, MILLI_RESOLUTION,
     MS_RESOLUTION, NANOSECOND_RESOLUTION, NANO_RESOLUTION, NS_RESOLUTION, SECOND_RESOLUTION,
     SEC_RESOLUTION, S_RESOLUTION, US_RESOLUTION,
 };
-use crate::etl::value::{Map, Timestamp, Value};
+use crate::etl::value::{Timestamp, Value};
 
 pub(crate) const PROCESSOR_EPOCH: &str = "epoch";
 const RESOLUTION_NAME: &str = "resolution";
@@ -52,12 +52,56 @@ impl TryFrom<&str> for Resolution {
     }
 }
 
+#[derive(Debug, Default)]
+pub struct EpochProcessorBuilder {
+    fields: Fields,
+    resolution: Resolution,
+    ignore_missing: bool,
+}
+
+impl ProcessorBuilder for EpochProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Epoch)
+    }
+}
+
+impl EpochProcessorBuilder {
+    pub fn build(self, intermediate_keys: &[String]) -> Result<EpochProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "epoch",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+        Ok(EpochProcessor {
+            fields: real_fields,
+            resolution: self.resolution,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
 /// support string, integer, float, time, epoch
 /// deprecated it should be removed in the future
 /// Reserved for compatibility only
 #[derive(Debug, Default)]
 pub struct EpochProcessor {
-    fields: Fields,
+    fields: Vec<OneInputOneOutputField>,
     resolution: Resolution,
     ignore_missing: bool,
     // description
@@ -68,19 +112,6 @@ pub struct EpochProcessor {
 }
 
 impl EpochProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields
-    }
-
-    fn with_resolution(&mut self, resolution: Resolution) {
-        self.resolution = resolution;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
     fn parse(&self, val: &Value) -> Result<Timestamp, String> {
         let t: i64 = match val {
             Value::String(s) => s
@@ -117,19 +148,15 @@ impl EpochProcessor {
             Resolution::Nano => Ok(Timestamp::Nanosecond(t)),
         }
     }
-
-    fn process_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        let key = field.get_target_field();
-
-        Ok(Map::one(key, Value::Timestamp(self.parse(val)?)))
-    }
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for EpochProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for EpochProcessorBuilder {
     type Error = String;
 
     fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = EpochProcessor::default();
+        let mut fields = Fields::default();
+        let mut resolution = Resolution::default();
+        let mut ignore_missing = false;
 
         for (k, v) in hash {
             let key = k
@@ -138,24 +165,29 @@ impl TryFrom<&yaml_rust::yaml::Hash> for EpochProcessor {
 
             match key {
                 FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                 }
                 FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                 }
                 RESOLUTION_NAME => {
                     let s = yaml_string(v, RESOLUTION_NAME)?.as_str().try_into()?;
-                    processor.with_resolution(s);
+                    resolution = s;
                 }
                 IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                 }
 
                 _ => {}
             }
         }
+        let builder = EpochProcessorBuilder {
+            fields,
+            resolution,
+            ignore_missing,
+        };
 
-        Ok(processor)
+        Ok(builder)
     }
 }
 
@@ -168,49 +200,23 @@ impl Processor for EpochProcessor {
         self.ignore_missing
     }
 
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        self.process_field(val, field)
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
         for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
             match val.get(index) {
                 Some(Value::Null) | None => {
                     if !self.ignore_missing {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                         ));
                     }
                 }
                 Some(v) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut map = self.process_field(v, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let timestamp = self.parse(v)?;
+                    let output_index = field.output_index();
+                    val[output_index] = Value::Timestamp(timestamp);
                 }
             }
         }
@@ -225,8 +231,10 @@ mod tests {
 
     #[test]
     fn test_parse_epoch() {
-        let mut processor = EpochProcessor::default();
-        processor.with_resolution(super::Resolution::Second);
+        let processor = EpochProcessor {
+            resolution: super::Resolution::Second,
+            ..Default::default()
+        };
 
         let values = [
             Value::String("1573840000".into()),
diff --git a/src/pipeline/src/etl/processor/gsub.rs b/src/pipeline/src/etl/processor/gsub.rs
index 9129dc1a0f..1b8e581e6a 100644
--- a/src/pipeline/src/etl/processor/gsub.rs
+++ b/src/pipeline/src/etl/processor/gsub.rs
@@ -15,45 +15,43 @@
 use ahash::HashSet;
 use regex::Regex;
 
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, Processor,
-    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
+    ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
 };
-use crate::etl::value::{Array, Map, Value};
+use crate::etl::value::Value;
 
 pub(crate) const PROCESSOR_GSUB: &str = "gsub";
 
 const REPLACEMENT_NAME: &str = "replacement";
 
-/// A processor to replace all matches of a pattern in string by a replacement, only support string value, and array string value
 #[derive(Debug, Default)]
-pub struct GsubProcessor {
+pub struct GsubProcessorBuilder {
     fields: Fields,
     pattern: Option<Regex>,
     replacement: Option<String>,
     ignore_missing: bool,
 }
 
-impl GsubProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields;
+impl ProcessorBuilder for GsubProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
     }
 
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
     }
 
-    fn try_pattern(&mut self, pattern: &str) -> Result<(), String> {
-        self.pattern = Some(Regex::new(pattern).map_err(|e| e.to_string())?);
-        Ok(())
-    }
-
-    fn with_replacement(&mut self, replacement: impl Into<String>) {
-        self.replacement = Some(replacement.into());
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Gsub)
     }
+}
 
+impl GsubProcessorBuilder {
     fn check(self) -> Result<Self, String> {
         if self.pattern.is_none() {
             return Err("pattern is required".to_string());
@@ -66,7 +64,49 @@ impl GsubProcessor {
         Ok(self)
     }
 
-    fn process_string_field(&self, val: &str, field: &Field) -> Result<Map, String> {
+    fn build(self, intermediate_keys: &[String]) -> Result<GsubProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "gsub",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+        Ok(GsubProcessor {
+            fields: real_fields,
+            pattern: self.pattern,
+            replacement: self.replacement,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+/// A processor to replace all matches of a pattern in string by a replacement, only support string value, and array string value
+#[derive(Debug, Default)]
+pub struct GsubProcessor {
+    fields: Vec<OneInputOneOutputField>,
+    pattern: Option<Regex>,
+    replacement: Option<String>,
+    ignore_missing: bool,
+}
+
+impl GsubProcessor {
+    fn check(self) -> Result<Self, String> {
+        if self.pattern.is_none() {
+            return Err("pattern is required".to_string());
+        }
+
+        if self.replacement.is_none() {
+            return Err("replacement is required".to_string());
+        }
+
+        Ok(self)
+    }
+
+    fn process_string(&self, val: &str) -> Result<Value, String> {
         let replacement = self.replacement.as_ref().unwrap();
         let new_val = self
             .pattern
@@ -76,42 +116,28 @@ impl GsubProcessor {
             .to_string();
         let val = Value::String(new_val);
 
-        let key = field.get_target_field();
-
-        Ok(Map::one(key, val))
+        Ok(val)
     }
 
-    fn process_array_field(&self, arr: &Array, field: &Field) -> Result<Map, String> {
-        let key = field.get_target_field();
-
-        let re = self.pattern.as_ref().unwrap();
-        let replacement = self.replacement.as_ref().unwrap();
-
-        let mut result = Array::default();
-        for val in arr.iter() {
-            match val {
-                Value::String(haystack) => {
-                    let new_val = re.replace_all(haystack, replacement).to_string();
-                    result.push(Value::String(new_val));
-                }
-                _ => {
-                    return Err(format!(
-                        "{} processor: expect string or array string, but got {val:?}",
-                        self.kind()
-                    ))
-                }
-            }
+    fn process(&self, val: &Value) -> Result<Value, String> {
+        match val {
+            Value::String(val) => self.process_string(val),
+            _ => Err(format!(
+                "{} processor: expect string or array string, but got {val:?}",
+                self.kind()
+            )),
         }
-
-        Ok(Map::one(key, Value::Array(result)))
     }
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for GsubProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for GsubProcessorBuilder {
     type Error = String;
 
     fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = GsubProcessor::default();
+        let mut fields = Fields::default();
+        let mut ignore_missing = false;
+        let mut pattern = None;
+        let mut replacement = None;
 
         for (k, v) in value.iter() {
             let key = k
@@ -119,27 +145,36 @@ impl TryFrom<&yaml_rust::yaml::Hash> for GsubProcessor {
                 .ok_or(format!("key must be a string, but got {k:?}"))?;
             match key {
                 FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                 }
                 FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                 }
                 PATTERN_NAME => {
-                    processor.try_pattern(&yaml_string(v, PATTERN_NAME)?)?;
+                    let pattern_str = yaml_string(v, PATTERN_NAME)?;
+                    pattern = Some(Regex::new(&pattern_str).map_err(|e| e.to_string())?);
                 }
                 REPLACEMENT_NAME => {
-                    processor.with_replacement(yaml_string(v, REPLACEMENT_NAME)?);
+                    let replacement_str = yaml_string(v, REPLACEMENT_NAME)?;
+                    replacement = Some(replacement_str);
                 }
 
                 IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                 }
 
                 _ => {}
             }
         }
 
-        processor.check()
+        let builder = GsubProcessorBuilder {
+            fields,
+            pattern,
+            replacement,
+            ignore_missing,
+        };
+
+        builder.check()
     }
 }
 
@@ -152,56 +187,23 @@ impl crate::etl::processor::Processor for GsubProcessor {
         self.ignore_missing
     }
 
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => self.process_string_field(val, field),
-            Value::Array(arr) => self.process_array_field(arr, field),
-            _ => Err(format!(
-                "{} processor: expect string or array string, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
         for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
             match val.get(index) {
                 Some(Value::Null) | None => {
                     if !self.ignore_missing {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                         ));
                     }
                 }
                 Some(v) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut map = self.exec_field(v, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let result = self.process(v)?;
+                    let output_index = field.output_index();
+                    val[output_index] = result;
                 }
             }
         }
@@ -211,55 +213,20 @@ impl crate::etl::processor::Processor for GsubProcessor {
 
 #[cfg(test)]
 mod tests {
-    use crate::etl::field::Field;
     use crate::etl::processor::gsub::GsubProcessor;
-    use crate::etl::processor::Processor;
-    use crate::etl::value::{Map, Value};
+    use crate::etl::value::Value;
 
     #[test]
     fn test_string_value() {
-        let mut processor = GsubProcessor::default();
-        processor.try_pattern(r"\d+").unwrap();
-        processor.with_replacement("xxx");
+        let processor = GsubProcessor {
+            pattern: Some(regex::Regex::new(r"\d+").unwrap()),
+            replacement: Some("xxx".to_string()),
+            ..Default::default()
+        };
 
-        let field = Field::new("message");
         let val = Value::String("123".to_string());
-        let result = processor.exec_field(&val, &field).unwrap();
+        let result = processor.process(&val).unwrap();
 
-        assert_eq!(
-            result,
-            Map::one("message", Value::String("xxx".to_string()))
-        );
-    }
-
-    #[test]
-    fn test_array_string_value() {
-        let mut processor = GsubProcessor::default();
-        processor.try_pattern(r"\d+").unwrap();
-        processor.with_replacement("xxx");
-
-        let field = Field::new("message");
-        let val = Value::Array(
-            vec![
-                Value::String("123".to_string()),
-                Value::String("456".to_string()),
-            ]
-            .into(),
-        );
-        let result = processor.exec_field(&val, &field).unwrap();
-
-        assert_eq!(
-            result,
-            Map::one(
-                "message",
-                Value::Array(
-                    vec![
-                        Value::String("xxx".to_string()),
-                        Value::String("xxx".to_string())
-                    ]
-                    .into()
-                )
-            )
-        );
+        assert_eq!(result, Value::String("xxx".to_string()));
     }
 }
diff --git a/src/pipeline/src/etl/processor/join.rs b/src/pipeline/src/etl/processor/join.rs
index b1ab620b66..d4b309d5c2 100644
--- a/src/pipeline/src/etl/processor/join.rs
+++ b/src/pipeline/src/etl/processor/join.rs
@@ -14,40 +14,78 @@
 
 use ahash::HashSet;
 
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, Processor,
-    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, SEPARATOR_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
+    ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, SEPARATOR_NAME,
 };
-use crate::etl::value::{Array, Map, Value};
+use crate::etl::value::{Array, Value};
 
 pub(crate) const PROCESSOR_JOIN: &str = "join";
 
-/// A processor to join each element of an array into a single string using a separator string between each element
 #[derive(Debug, Default)]
-pub struct JoinProcessor {
+pub struct JoinProcessorBuilder {
     fields: Fields,
     separator: Option<String>,
     ignore_missing: bool,
 }
 
+impl ProcessorBuilder for JoinProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Join)
+    }
+}
+
+impl JoinProcessorBuilder {
+    fn check(self) -> Result<Self, String> {
+        if self.separator.is_none() {
+            return Err("separator is required".to_string());
+        }
+
+        Ok(self)
+    }
+
+    pub fn build(self, intermediate_keys: &[String]) -> Result<JoinProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "join",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+
+        Ok(JoinProcessor {
+            fields: real_fields,
+            separator: self.separator,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+/// A processor to join each element of an array into a single string using a separator string between each element
+#[derive(Debug, Default)]
+pub struct JoinProcessor {
+    fields: Vec<OneInputOneOutputField>,
+    separator: Option<String>,
+    ignore_missing: bool,
+}
+
 impl JoinProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields;
-    }
-
-    fn with_separator(&mut self, separator: impl Into<String>) {
-        self.separator = Some(separator.into());
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
-    fn process_field(&self, arr: &Array, field: &Field) -> Result<Map, String> {
-        let key = field.get_target_field();
-
+    fn process(&self, arr: &Array) -> Result<Value, String> {
         let sep = self.separator.as_ref().unwrap();
         let val = arr
             .iter()
@@ -55,7 +93,7 @@ impl JoinProcessor {
             .collect::<Vec<String>>()
             .join(sep);
 
-        Ok(Map::one(key, Value::String(val)))
+        Ok(Value::String(val))
     }
 
     fn check(self) -> Result<Self, String> {
@@ -67,11 +105,13 @@ impl JoinProcessor {
     }
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for JoinProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for JoinProcessorBuilder {
     type Error = String;
 
     fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = JoinProcessor::default();
+        let mut fields = Fields::default();
+        let mut separator = None;
+        let mut ignore_missing = false;
 
         for (k, v) in value.iter() {
             let key = k
@@ -79,30 +119,31 @@ impl TryFrom<&yaml_rust::yaml::Hash> for JoinProcessor {
                 .ok_or(format!("key must be a string, but got {k:?}"))?;
             match key {
                 FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                 }
                 FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                 }
                 SEPARATOR_NAME => {
-                    processor.with_separator(yaml_string(v, SEPARATOR_NAME)?);
+                    separator = Some(yaml_string(v, SEPARATOR_NAME)?);
                 }
                 IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                 }
                 _ => {}
             }
         }
 
-        processor.check()
+        let builder = JoinProcessorBuilder {
+            fields,
+            separator,
+            ignore_missing,
+        };
+        builder.check()
     }
 }
 
 impl Processor for JoinProcessor {
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
     fn kind(&self) -> &str {
         PROCESSOR_JOIN
     }
@@ -111,49 +152,21 @@ impl Processor for JoinProcessor {
         self.ignore_missing
     }
 
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::Array(arr) => self.process_field(arr, field),
-            _ => Err(format!(
-                "{} processor: expect array value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
         for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
             match val.get(index) {
                 Some(Value::Array(arr)) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut map = self.process_field(arr, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let result = self.process(arr)?;
+                    let output_index = field.output_index();
+                    val[output_index] = result;
                 }
                 Some(Value::Null) | None => {
                     if !self.ignore_missing {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                         ));
                     }
                 }
@@ -173,25 +186,22 @@ impl Processor for JoinProcessor {
 #[cfg(test)]
 mod tests {
 
-    use crate::etl::field::Field;
     use crate::etl::processor::join::JoinProcessor;
-    use crate::etl::processor::Processor;
-    use crate::etl::value::{Map, Value};
+    use crate::etl::value::Value;
 
     #[test]
     fn test_join_processor() {
-        let mut processor = JoinProcessor::default();
-        processor.with_separator("-");
+        let processor = JoinProcessor {
+            separator: Some("-".to_string()),
+            ..Default::default()
+        };
 
-        let field = Field::new("test");
-        let arr = Value::Array(
-            vec![
-                Value::String("a".to_string()),
-                Value::String("b".to_string()),
-            ]
-            .into(),
-        );
-        let result = processor.exec_field(&arr, &field).unwrap();
-        assert_eq!(result, Map::one("test", Value::String("a-b".to_string())));
+        let arr = vec![
+            Value::String("a".to_string()),
+            Value::String("b".to_string()),
+        ]
+        .into();
+        let result = processor.process(&arr).unwrap();
+        assert_eq!(result, Value::String("a-b".to_string()));
     }
 }
diff --git a/src/pipeline/src/etl/processor/letter.rs b/src/pipeline/src/etl/processor/letter.rs
index 6502d6f221..f388b5a2a9 100644
--- a/src/pipeline/src/etl/processor/letter.rs
+++ b/src/pipeline/src/etl/processor/letter.rs
@@ -14,12 +14,12 @@
 
 use ahash::HashSet;
 
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, Processor,
-    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, METHOD_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
+    ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, METHOD_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;
 
 pub(crate) const PROCESSOR_LETTER: &str = "letter";
 
@@ -54,29 +54,61 @@ impl std::str::FromStr for Method {
     }
 }
 
-/// only support string value
 #[derive(Debug, Default)]
-pub struct LetterProcessor {
+pub struct LetterProcessorBuilder {
     fields: Fields,
     method: Method,
     ignore_missing: bool,
 }
 
+impl ProcessorBuilder for LetterProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Letter)
+    }
+}
+
+impl LetterProcessorBuilder {
+    pub fn build(self, intermediate_keys: &[String]) -> Result<LetterProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "letter",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+
+        Ok(LetterProcessor {
+            fields: real_fields,
+            method: self.method,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+/// only support string value
+#[derive(Debug, Default)]
+pub struct LetterProcessor {
+    fields: Vec<OneInputOneOutputField>,
+    method: Method,
+    ignore_missing: bool,
+}
+
 impl LetterProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields;
-    }
-
-    fn with_method(&mut self, method: Method) {
-        self.method = method;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
-    fn process_field(&self, val: &str, field: &Field) -> Result<Map, String> {
+    fn process_field(&self, val: &str) -> Result<Value, String> {
         let processed = match self.method {
             Method::Upper => val.to_uppercase(),
             Method::Lower => val.to_lowercase(),
@@ -84,17 +116,17 @@ impl LetterProcessor {
         };
         let val = Value::String(processed);
 
-        let key = field.get_target_field();
-
-        Ok(Map::one(key, val))
+        Ok(val)
     }
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for LetterProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for LetterProcessorBuilder {
     type Error = String;
 
     fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = LetterProcessor::default();
+        let mut fields = Fields::default();
+        let mut method = Method::Lower;
+        let mut ignore_missing = false;
 
         for (k, v) in value.iter() {
             let key = k
@@ -102,23 +134,26 @@ impl TryFrom<&yaml_rust::yaml::Hash> for LetterProcessor {
                 .ok_or(format!("key must be a string, but got {k:?}"))?;
             match key {
                 FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                 }
                 FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                 }
                 METHOD_NAME => {
-                    let method = yaml_string(v, METHOD_NAME)?;
-                    processor.with_method(method.parse()?);
+                    method = yaml_string(v, METHOD_NAME)?.parse()?;
                 }
                 IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                 }
                 _ => {}
             }
         }
 
-        Ok(processor)
+        Ok(LetterProcessorBuilder {
+            fields,
+            method,
+            ignore_missing,
+        })
     }
 }
 
@@ -131,53 +166,21 @@ impl Processor for LetterProcessor {
         self.ignore_missing
     }
 
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => self.process_field(val, field),
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
         for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
             match val.get(index) {
                 Some(Value::String(s)) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut processed = self.process_field(s, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = processed.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let result = self.process_field(s)?;
+                    let (_, output_index) = field.output();
+                    val[*output_index] = result;
                 }
                 Some(Value::Null) | None => {
                     if !self.ignore_missing {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            &field.input().name
                         ));
                     }
                 }
@@ -204,33 +207,36 @@ fn capitalize(s: &str) -> String {
 
 #[cfg(test)]
 mod tests {
-    use crate::etl::field::Fields;
     use crate::etl::processor::letter::{LetterProcessor, Method};
-    use crate::etl::value::{Map, Value};
+    use crate::etl::value::Value;
 
     #[test]
     fn test_process() {
-        let field = "letter";
-        let ff: crate::etl::processor::Field = field.parse().unwrap();
-        let mut processor = LetterProcessor::default();
-        processor.with_fields(Fields::one(ff.clone()));
-
         {
-            processor.with_method(Method::Upper);
-            let processed = processor.process_field("pipeline", &ff).unwrap();
-            assert_eq!(Map::one(field, Value::String("PIPELINE".into())), processed)
+            let processor = LetterProcessor {
+                method: Method::Upper,
+                ..Default::default()
+            };
+            let processed = processor.process_field("pipeline").unwrap();
+            assert_eq!(Value::String("PIPELINE".into()), processed)
         }
 
         {
-            processor.with_method(Method::Lower);
-            let processed = processor.process_field("Pipeline", &ff).unwrap();
-            assert_eq!(Map::one(field, Value::String("pipeline".into())), processed)
+            let processor = LetterProcessor {
+                method: Method::Lower,
+                ..Default::default()
+            };
+            let processed = processor.process_field("Pipeline").unwrap();
+            assert_eq!(Value::String("pipeline".into()), processed)
         }
 
         {
-            processor.with_method(Method::Capital);
-            let processed = processor.process_field("pipeline", &ff).unwrap();
-            assert_eq!(Map::one(field, Value::String("Pipeline".into())), processed)
+            let processor = LetterProcessor {
+                method: Method::Capital,
+                ..Default::default()
+            };
+            let processed = processor.process_field("pipeline").unwrap();
+            assert_eq!(Value::String("Pipeline".into()), processed)
         }
     }
 }
diff --git a/src/pipeline/src/etl/processor/regex.rs b/src/pipeline/src/etl/processor/regex.rs
index e5af339585..a1de2ea76d 100644
--- a/src/pipeline/src/etl/processor/regex.rs
+++ b/src/pipeline/src/etl/processor/regex.rs
@@ -18,16 +18,17 @@ const PATTERNS_NAME: &str = "patterns";
 
 pub(crate) const PROCESSOR_REGEX: &str = "regex";
 
-use ahash::HashSet;
+use ahash::{HashSet, HashSetExt};
 use lazy_static::lazy_static;
 use regex::Regex;
 
-use crate::etl::field::Fields;
+use crate::etl::field::{Fields, InputFieldInfo, OneInputMultiOutputField};
+use crate::etl::find_key_index;
 use crate::etl::processor::{
-    yaml_bool, yaml_field, yaml_fields, yaml_string, yaml_strings, Field, Processor, FIELDS_NAME,
-    FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor,
+    ProcessorBuilder, ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, PATTERN_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;
 
 lazy_static! {
     static ref GROUPS_NAME_REGEX: Regex = Regex::new(r"\(\?P?<([[:word:]]+)>.+?\)").unwrap();
@@ -40,6 +41,10 @@ fn get_regex_group_names(s: &str) -> Vec<String> {
         .collect()
 }
 
+fn generate_key(prefix: &str, group: &str) -> String {
+    format!("{prefix}_{group}")
+}
+
 #[derive(Debug)]
 struct GroupRegex {
     origin: String,
@@ -72,34 +77,29 @@ impl std::str::FromStr for GroupRegex {
     }
 }
 
-/// only support string value
-/// if no value found from a pattern, the target_field will be ignored
 #[derive(Debug, Default)]
-pub struct RegexProcessor {
+pub struct RegexProcessorBuilder {
     fields: Fields,
     patterns: Vec<GroupRegex>,
     ignore_missing: bool,
+    output_keys: HashSet<String>,
 }
 
-impl RegexProcessor {
-    fn with_fields(&mut self, fields: Fields) {
-        self.fields = fields;
+impl ProcessorBuilder for RegexProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.output_keys.iter().map(|k| k.as_str()).collect()
     }
 
-    fn try_with_patterns(&mut self, patterns: Vec<String>) -> Result<(), String> {
-        let mut rs = vec![];
-        for pattern in patterns {
-            let gr = pattern.parse()?;
-            rs.push(gr);
-        }
-        self.patterns = rs;
-        Ok(())
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
     }
 
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Regex)
     }
+}
 
+impl RegexProcessorBuilder {
     fn check(self) -> Result<Self, String> {
         if self.fields.is_empty() {
             return Err(format!(
@@ -118,47 +118,78 @@ impl RegexProcessor {
         Ok(self)
     }
 
-    fn generate_key(prefix: &str, group: &str) -> String {
-        format!("{prefix}_{group}")
+    fn build_group_output_info(
+        group_regex: &GroupRegex,
+        om_field: &OneInputMultiOutputField,
+        intermediate_keys: &[String],
+    ) -> Result<Vec<OutPutInfo>, String> {
+        group_regex
+            .groups
+            .iter()
+            .map(|g| {
+                let key = generate_key(om_field.target_prefix(), g);
+                let index = find_key_index(intermediate_keys, &key, "regex");
+                index.map(|index| OutPutInfo {
+                    final_key: key,
+                    group_name: g.to_string(),
+                    index,
+                })
+            })
+            .collect::<Result<Vec<_>, String>>()
     }
 
-    fn process_field(&self, val: &str, field: &Field, gr: &GroupRegex) -> Result<Map, String> {
-        let mut map = Map::default();
-
-        if let Some(captures) = gr.regex.captures(val) {
-            for group in &gr.groups {
-                if let Some(capture) = captures.name(group) {
-                    let value = capture.as_str().to_string();
-                    let prefix = field.get_target_field();
-
-                    let key = Self::generate_key(prefix, group);
-
-                    map.insert(key, Value::String(value));
-                }
-            }
-        }
-
-        Ok(map)
+    fn build_group_output_infos(
+        patterns: &[GroupRegex],
+        om_field: &OneInputMultiOutputField,
+        intermediate_keys: &[String],
+    ) -> Result<Vec<Vec<OutPutInfo>>, String> {
+        patterns
+            .iter()
+            .map(|group_regex| {
+                Self::build_group_output_info(group_regex, om_field, intermediate_keys)
+            })
+            .collect::<Result<Vec<_>, String>>()
     }
 
-    fn update_output_keys(&mut self) {
-        for field in self.fields.iter_mut() {
-            for gr in &self.patterns {
-                for group in &gr.groups {
-                    field
-                        .output_fields_index_mapping
-                        .insert(Self::generate_key(field.get_target_field(), group), 0_usize);
-                }
-            }
+    fn build_output_info(
+        real_fields: &[OneInputMultiOutputField],
+        patterns: &[GroupRegex],
+        intermediate_keys: &[String],
+    ) -> Result<RegexProcessorOutputInfo, String> {
+        let inner = real_fields
+            .iter()
+            .map(|om_field| Self::build_group_output_infos(patterns, om_field, intermediate_keys))
+            .collect::<Result<Vec<_>, String>>();
+        inner.map(|inner| RegexProcessorOutputInfo { inner })
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<RegexProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input_index = find_key_index(intermediate_keys, field.input_field(), "regex")?;
+            let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
+
+            let input = OneInputMultiOutputField::new(input_field_info, field.target_field);
+            real_fields.push(input);
         }
+        let output_info = Self::build_output_info(&real_fields, &self.patterns, intermediate_keys)?;
+        Ok(RegexProcessor {
+            // fields: Fields::one(Field::new("test".to_string())),
+            fields: real_fields,
+            patterns: self.patterns,
+            output_info,
+            ignore_missing: self.ignore_missing,
+        })
     }
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for RegexProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for RegexProcessorBuilder {
     type Error = String;
 
     fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = RegexProcessor::default();
+        let mut fields = Fields::default();
+        let mut patterns: Vec<GroupRegex> = vec![];
+        let mut ignore_missing = false;
 
         for (k, v) in value.iter() {
             let key = k
@@ -166,28 +197,113 @@ impl TryFrom<&yaml_rust::yaml::Hash> for RegexProcessor {
                 .ok_or(format!("key must be a string, but got {k:?}"))?;
             match key {
                 FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                 }
                 FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                 }
                 PATTERN_NAME => {
-                    processor.try_with_patterns(vec![yaml_string(v, PATTERN_NAME)?])?;
+                    let pattern = yaml_string(v, PATTERN_NAME)?;
+                    let gr = pattern.parse()?;
+                    patterns.push(gr);
                 }
                 PATTERNS_NAME => {
-                    processor.try_with_patterns(yaml_strings(v, PATTERNS_NAME)?)?;
+                    for pattern in yaml_strings(v, PATTERNS_NAME)? {
+                        let gr = pattern.parse()?;
+                        patterns.push(gr);
+                    }
                 }
                 IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                 }
                 _ => {}
             }
         }
 
-        processor.check().map(|mut p| {
-            p.update_output_keys();
-            p
-        })
+        let pattern_output_keys = patterns
+            .iter()
+            .flat_map(|pattern| pattern.groups.iter())
+            .collect::<Vec<_>>();
+        let mut output_keys = HashSet::new();
+        for field in fields.iter() {
+            for x in pattern_output_keys.iter() {
+                output_keys.insert(generate_key(field.target_or_input_field(), x));
+            }
+        }
+
+        let processor_builder = RegexProcessorBuilder {
+            fields,
+            patterns,
+            ignore_missing,
+            output_keys,
+        };
+
+        processor_builder.check()
+    }
+}
+
+#[derive(Debug, Default)]
+struct OutPutInfo {
+    final_key: String,
+    group_name: String,
+    index: usize,
+}
+
+#[derive(Debug, Default)]
+struct RegexProcessorOutputInfo {
+    pub inner: Vec<Vec<Vec<OutPutInfo>>>,
+}
+
+impl RegexProcessorOutputInfo {
+    fn get_output_index(
+        &self,
+        field_index: usize,
+        pattern_index: usize,
+        group_index: usize,
+    ) -> usize {
+        self.inner[field_index][pattern_index][group_index].index
+    }
+}
+/// only support string value
+/// if no value found from a pattern, the target_field will be ignored
+#[derive(Debug, Default)]
+pub struct RegexProcessor {
+    fields: Vec<OneInputMultiOutputField>,
+    output_info: RegexProcessorOutputInfo,
+    patterns: Vec<GroupRegex>,
+    ignore_missing: bool,
+}
+
+impl RegexProcessor {
+    fn try_with_patterns(&mut self, patterns: Vec<String>) -> Result<(), String> {
+        let mut rs = vec![];
+        for pattern in patterns {
+            let gr = pattern.parse()?;
+            rs.push(gr);
+        }
+        self.patterns = rs;
+        Ok(())
+    }
+
+    fn process(
+        &self,
+        val: &str,
+        gr: &GroupRegex,
+        index: (usize, usize),
+    ) -> Result<Vec<(usize, Value)>, String> {
+        let mut result = Vec::new();
+        if let Some(captures) = gr.regex.captures(val) {
+            for (group_index, group) in gr.groups.iter().enumerate() {
+                if let Some(capture) = captures.name(group) {
+                    let value = capture.as_str().to_string();
+                    let index = self
+                        .output_info
+                        .get_output_index(index.0, index.1, group_index);
+                    result.push((index, Value::String(value)));
+                }
+            }
+        }
+        Ok(result)
     }
 }
 
@@ -200,71 +316,40 @@ impl Processor for RegexProcessor {
         self.ignore_missing
     }
 
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .flat_map(|f| {
-                self.patterns.iter().flat_map(move |p| {
-                    p.groups
-                        .iter()
-                        .map(move |g| Self::generate_key(&f.input_field.name, g))
-                })
-            })
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => {
-                let mut map = Map::default();
-                for gr in &self.patterns {
-                    let m = self.process_field(val, field, gr)?;
-                    map.extend(m);
-                }
-                Ok(map)
-            }
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
-        for field in self.fields.iter() {
-            let index = field.input_field.index;
+        for (field_index, field) in self.fields.iter().enumerate() {
+            let index = field.input_index();
+            let mut result_list = None;
             match val.get(index) {
                 Some(Value::String(s)) => {
-                    let mut map = Map::default();
-                    for gr in &self.patterns {
-                        // TODO(qtang): Let this method use the intermediate state collection directly.
-                        let m = self.process_field(s, field, gr)?;
-                        map.extend(m);
-                    }
-
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
+                    // we get rust borrow checker error here
+                    // for (gr_index, gr) in self.patterns.iter().enumerate() {
+                    //     let result_list = self.process(s.as_str(), gr, (field_index, gr_index))?;
+                    //     for (output_index, result) in result_list {
+                    //cannot borrow `*val` as mutable because it is also borrowed as immutable mutable borrow occurs here
+                    //         val[output_index] = result;
+                    //     }
+                    // }
+                    for (gr_index, gr) in self.patterns.iter().enumerate() {
+                        let result = self.process(s.as_str(), gr, (field_index, gr_index))?;
+                        if !result.is_empty() {
+                            match result_list.as_mut() {
+                                None => {
+                                    result_list = Some(result);
+                                }
+                                Some(result_list) => {
+                                    result_list.extend(result);
+                                }
                             }
-                        });
+                        }
+                    }
                 }
                 Some(Value::Null) | None => {
                     if !self.ignore_missing {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            field.input_name()
                         ));
                     }
                 }
@@ -275,6 +360,15 @@ impl Processor for RegexProcessor {
                     ));
                 }
             }
+            // safety here
+            match result_list {
+                None => {}
+                Some(result_list) => {
+                    for (output_index, result) in result_list {
+                        val[output_index] = result;
+                    }
+                }
+            }
         }
 
         Ok(())
@@ -282,37 +376,42 @@ impl Processor for RegexProcessor {
 }
 #[cfg(test)]
 mod tests {
+    use ahash::{HashMap, HashMapExt};
     use itertools::Itertools;
 
-    use super::RegexProcessor;
-    use crate::etl::field::Fields;
-    use crate::etl::processor::Processor;
+    use crate::etl::processor::regex::RegexProcessorBuilder;
     use crate::etl::value::{Map, Value};
 
     #[test]
     fn test_simple_parse() {
-        let mut processor = RegexProcessor::default();
+        let pipeline_str = r#"fields: ["a"]
+patterns: ['(?<ar>\d)']
+ignore_missing: false"#;
+
+        let processor_yaml = yaml_rust::YamlLoader::load_from_str(pipeline_str)
+            .unwrap()
+            .pop()
+            .unwrap();
+        let processor_yaml_hash = processor_yaml.as_hash().unwrap();
+        let builder = RegexProcessorBuilder::try_from(processor_yaml_hash).unwrap();
+        let intermediate_keys = ["a".to_string(), "a_ar".to_string()];
+        let processor = builder.build(&intermediate_keys).unwrap();
 
         // single field (with prefix), multiple patterns
-        let f = ["a"].iter().map(|f| f.parse().unwrap()).collect();
-        processor.with_fields(Fields::new(f).unwrap());
 
-        let ar = "(?<ar>\\d)";
+        let result = processor
+            .process("123", &processor.patterns[0], (0, 0))
+            .unwrap()
+            .into_iter()
+            .map(|(k, v)| (intermediate_keys[k].clone(), v))
+            .collect();
 
-        let patterns = [ar].iter().map(|p| p.to_string()).collect();
-        processor.try_with_patterns(patterns).unwrap();
-
-        let mut map = Map::default();
-        map.insert("a", Value::String("123".to_string()));
-        processor.exec_map(&mut map).unwrap();
+        let map = Map { values: result };
 
         let v = Map {
-            values: vec![
-                ("a_ar".to_string(), Value::String("1".to_string())),
-                ("a".to_string(), Value::String("123".to_string())),
-            ]
-            .into_iter()
-            .collect(),
+            values: vec![("a_ar".to_string(), Value::String("1".to_string()))]
+                .into_iter()
+                .collect(),
         };
 
         assert_eq!(v, map);
@@ -320,17 +419,14 @@ mod tests {
 
     #[test]
     fn test_process() {
-        let mut processor = RegexProcessor::default();
-
         let cc = "[c=c,n=US_CA_SANJOSE,o=55155]";
         let cg = "[a=12.34.567.89,b=12345678,c=g,n=US_CA_SANJOSE,o=20940]";
         let co = "[a=987.654.321.09,c=o]";
         let cp = "[c=p,n=US_CA_SANJOSE,o=55155]";
         let cw = "[c=w,n=US_CA_SANJOSE,o=55155]";
-        let breadcrumbs = Value::String([cc, cg, co, cp, cw].iter().join(","));
+        let breadcrumbs_str = [cc, cg, co, cp, cw].iter().join(",");
 
         let values = [
-            ("breadcrumbs", breadcrumbs.clone()),
             ("breadcrumbs_parent", Value::String(cc.to_string())),
             ("breadcrumbs_edge", Value::String(cg.to_string())),
             ("breadcrumbs_origin", Value::String(co.to_string())),
@@ -340,61 +436,141 @@ mod tests {
         .into_iter()
         .map(|(k, v)| (k.to_string(), v))
         .collect();
-        let mut temporary_map = Map { values };
+        let temporary_map = Map { values };
 
         {
             // single field (with prefix), multiple patterns
-            let ff = ["breadcrumbs, breadcrumbs"]
-                .iter()
-                .map(|f| f.parse().unwrap())
-                .collect();
-            processor.with_fields(Fields::new(ff).unwrap());
 
-            let ccr = "(?<parent>\\[[^\\[]*c=c[^\\]]*\\])";
-            let cgr = "(?<edge>\\[[^\\[]*c=g[^\\]]*\\])";
-            let cor = "(?<origin>\\[[^\\[]*c=o[^\\]]*\\])";
-            let cpr = "(?<peer>\\[[^\\[]*c=p[^\\]]*\\])";
-            let cwr = "(?<wrapper>\\[[^\\[]*c=w[^\\]]*\\])";
-            let patterns = [ccr, cgr, cor, cpr, cwr]
-                .iter()
-                .map(|p| p.to_string())
-                .collect();
-            processor.try_with_patterns(patterns).unwrap();
+            let pipeline_str = r#"fields: ["breadcrumbs"]
+patterns:
+  - '(?<parent>\[[^\[]*c=c[^\]]*\])'
+  - '(?<edge>\[[^\[]*c=g[^\]]*\])'
+  - '(?<origin>\[[^\[]*c=o[^\]]*\])'
+  - '(?<peer>\[[^\[]*c=p[^\]]*\])'
+  - '(?<wrapper>\[[^\[]*c=w[^\]]*\])'
+ignore_missing: false"#;
 
-            let mut map = Map::default();
-            map.insert("breadcrumbs", breadcrumbs.clone());
-            processor.exec_map(&mut map).unwrap();
-
-            assert_eq!(map, temporary_map);
+            let processor_yaml = yaml_rust::YamlLoader::load_from_str(pipeline_str)
+                .unwrap()
+                .pop()
+                .unwrap();
+            let processor_yaml_hash = processor_yaml.as_hash().unwrap();
+            let builder = RegexProcessorBuilder::try_from(processor_yaml_hash).unwrap();
+            let intermediate_keys = [
+                "breadcrumbs",
+                "breadcrumbs_parent",
+                "breadcrumbs_edge",
+                "breadcrumbs_origin",
+                "breadcrumbs_peer",
+                "breadcrumbs_wrapper",
+            ]
+            .iter()
+            .map(|k| k.to_string())
+            .collect_vec();
+            let processor = builder.build(&intermediate_keys).unwrap();
+            let mut result = HashMap::new();
+            for (index, pattern) in processor.patterns.iter().enumerate() {
+                let r = processor
+                    .process(&breadcrumbs_str, pattern, (0, index))
+                    .unwrap()
+                    .into_iter()
+                    .map(|(k, v)| (intermediate_keys[k].clone(), v))
+                    .collect::<HashMap<_, _>>();
+                result.extend(r);
+            }
+            let map = Map { values: result };
+            assert_eq!(temporary_map, map);
         }
 
         {
             // multiple fields (with prefix), multiple patterns
-            let ff = [
-                "breadcrumbs_parent, parent",
-                "breadcrumbs_edge, edge",
-                "breadcrumbs_origin, origin",
-                "breadcrumbs_peer, peer",
-                "breadcrumbs_wrapper, wrapper",
-            ]
-            .iter()
-            .map(|f| f.parse().unwrap())
-            .collect();
-            processor.with_fields(Fields::new(ff).unwrap());
 
-            let patterns = [
-                "a=(?<ip>[^,\\]]+)",
-                "b=(?<request_id>[^,\\]]+)",
-                "k=(?<request_end_time>[^,\\]]+)",
-                "l=(?<turn_around_time>[^,\\]]+)",
-                "m=(?<dns_lookup_time>[^,\\]]+)",
-                "n=(?<geo>[^,\\]]+)",
-                "o=(?<asn>[^,\\]]+)",
+            let pipeline_str = r#"fields:
+  - breadcrumbs_parent, parent
+  - breadcrumbs_edge, edge
+  - breadcrumbs_origin, origin
+  - breadcrumbs_peer, peer
+  - breadcrumbs_wrapper, wrapper
+patterns:
+  - 'a=(?<ip>[^,\]]+)'
+  - 'b=(?<request_id>[^,\]]+)'
+  - 'k=(?<request_end_time>[^,\]]+)'
+  - 'l=(?<turn_around_time>[^,\]]+)'
+  - 'm=(?<dns_lookup_time>[^,\]]+)'
+  - 'n=(?<geo>[^,\]]+)'
+  - 'o=(?<asn>[^,\]]+)'
+ignore_missing: false"#;
+
+            let processor_yaml = yaml_rust::YamlLoader::load_from_str(pipeline_str)
+                .unwrap()
+                .pop()
+                .unwrap();
+            let processor_yaml_hash = processor_yaml.as_hash().unwrap();
+            let builder = RegexProcessorBuilder::try_from(processor_yaml_hash).unwrap();
+
+            let intermediate_keys = [
+                "breadcrumbs_parent",
+                "breadcrumbs_edge",
+                "breadcrumbs_origin",
+                "breadcrumbs_peer",
+                "breadcrumbs_wrapper",
+                "edge_ip",
+                "edge_request_id",
+                "edge_request_end_time",
+                "edge_turn_around_time",
+                "edge_dns_lookup_time",
+                "edge_geo",
+                "edge_asn",
+                "origin_ip",
+                "origin_request_id",
+                "origin_request_end_time",
+                "origin_turn_around_time",
+                "origin_dns_lookup_time",
+                "origin_geo",
+                "origin_asn",
+                "peer_ip",
+                "peer_request_id",
+                "peer_request_end_time",
+                "peer_turn_around_time",
+                "peer_dns_lookup_time",
+                "peer_geo",
+                "peer_asn",
+                "parent_ip",
+                "parent_request_id",
+                "parent_request_end_time",
+                "parent_turn_around_time",
+                "parent_dns_lookup_time",
+                "parent_geo",
+                "parent_asn",
+                "wrapper_ip",
+                "wrapper_request_id",
+                "wrapper_request_end_time",
+                "wrapper_turn_around_time",
+                "wrapper_dns_lookup_time",
+                "wrapper_geo",
+                "wrapper_asn",
             ]
             .iter()
-            .map(|p| p.to_string())
-            .collect();
-            processor.try_with_patterns(patterns).unwrap();
+            .map(|k| k.to_string())
+            .collect_vec();
+            let processor = builder.build(&intermediate_keys).unwrap();
+
+            let mut result = HashMap::new();
+            for (field_index, field) in processor.fields.iter().enumerate() {
+                for (pattern_index, pattern) in processor.patterns.iter().enumerate() {
+                    let s = temporary_map
+                        .get(field.input_name())
+                        .unwrap()
+                        .to_str_value();
+                    let r = processor
+                        .process(&s, pattern, (field_index, pattern_index))
+                        .unwrap()
+                        .into_iter()
+                        .map(|(k, v)| (intermediate_keys[k].clone(), v))
+                        .collect::<HashMap<_, _>>();
+                    result.extend(r);
+                }
+            }
 
             let new_values = vec![
                 ("edge_ip", Value::String("12.34.567.89".to_string())),
@@ -413,11 +589,7 @@ mod tests {
             .map(|(k, v)| (k.to_string(), v))
             .collect();
 
-            let mut expected_map = temporary_map.clone();
-            processor.exec_map(&mut temporary_map).unwrap();
-            expected_map.extend(Map { values: new_values });
-
-            assert_eq!(expected_map, temporary_map);
+            assert_eq!(result, new_values);
         }
     }
 }
diff --git a/src/pipeline/src/etl/processor/timestamp.rs b/src/pipeline/src/etl/processor/timestamp.rs
index 1be9177a8a..7ab9571101 100644
--- a/src/pipeline/src/etl/processor/timestamp.rs
+++ b/src/pipeline/src/etl/processor/timestamp.rs
@@ -19,18 +19,17 @@ use chrono::{DateTime, NaiveDateTime};
 use chrono_tz::Tz;
 use lazy_static::lazy_static;
 
-use super::yaml_strings;
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    update_one_one_output_keys, yaml_bool, yaml_field, yaml_fields, yaml_string, Processor,
-    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, Processor,
+    ProcessorBuilder, ProcessorKind, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
 };
 use crate::etl::value::time::{
     MICROSECOND_RESOLUTION, MICRO_RESOLUTION, MILLISECOND_RESOLUTION, MILLI_RESOLUTION,
     MS_RESOLUTION, NANOSECOND_RESOLUTION, NANO_RESOLUTION, NS_RESOLUTION, SECOND_RESOLUTION,
     SEC_RESOLUTION, S_RESOLUTION, US_RESOLUTION,
 };
-use crate::etl::value::{Map, Timestamp, Value};
+use crate::etl::value::{Timestamp, Value};
 
 pub(crate) const PROCESSOR_TIMESTAMP: &str = "timestamp";
 const RESOLUTION_NAME: &str = "resolution";
@@ -108,10 +107,56 @@ impl std::ops::Deref for Formats {
     }
 }
 
+#[derive(Debug)]
+pub struct TimestampProcessorBuilder {
+    fields: Fields,
+    formats: Formats,
+    resolution: Resolution,
+    ignore_missing: bool,
+}
+
+impl ProcessorBuilder for TimestampProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys).map(ProcessorKind::Timestamp)
+    }
+}
+
+impl TimestampProcessorBuilder {
+    pub fn build(self, intermediate_keys: &[String]) -> Result<TimestampProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "timestamp",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+        Ok(TimestampProcessor {
+            fields: real_fields,
+            formats: self.formats,
+            resolution: self.resolution,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
 /// support string, integer, float, time, epoch
 #[derive(Debug, Default)]
 pub struct TimestampProcessor {
-    fields: Fields,
+    fields: Vec<OneInputOneOutputField>,
     formats: Formats,
     resolution: Resolution,
     ignore_missing: bool,
@@ -123,29 +168,6 @@ pub struct TimestampProcessor {
 }
 
 impl TimestampProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields
-    }
-
-    fn with_resolution(&mut self, resolution: Resolution) {
-        self.resolution = resolution;
-    }
-
-    fn with_formats(&mut self, v: Option<Vec<(Arc<String>, Tz)>>) {
-        let v = match v {
-            Some(v) if !v.is_empty() => v,
-            _ => DEFAULT_FORMATS.clone(),
-        };
-
-        let formats = Formats::new(v);
-        self.formats = formats;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
     /// try to parse val with timezone first, if failed, parse without timezone
     fn try_parse(val: &str, fmt: &str, tz: Tz) -> Result<i64, String> {
         if let Ok(dt) = DateTime::parse_from_str(val, fmt) {
@@ -212,12 +234,6 @@ impl TimestampProcessor {
             Resolution::Nano => Ok(Timestamp::Nanosecond(t)),
         }
     }
-
-    fn process_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        let key = field.get_target_field();
-
-        Ok(Map::one(key, Value::Timestamp(self.parse(val)?)))
-    }
 }
 
 fn parse_formats(yaml: &yaml_rust::yaml::Yaml) -> Result<Vec<(Arc<String>, Tz)>, String> {
@@ -250,11 +266,14 @@ fn parse_formats(yaml: &yaml_rust::yaml::Yaml) -> Result<Vec<(Arc<String>, Tz)>,
     };
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for TimestampProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for TimestampProcessorBuilder {
     type Error = String;
 
     fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = TimestampProcessor::default();
+        let mut fields = Fields::default();
+        let mut formats = Formats::default();
+        let mut resolution = Resolution::default();
+        let mut ignore_missing = false;
 
         for (k, v) in hash {
             let key = k
@@ -263,28 +282,33 @@ impl TryFrom<&yaml_rust::yaml::Hash> for TimestampProcessor {
 
             match key {
                 FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                 }
                 FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                 }
                 FORMATS_NAME => {
-                    let formats = parse_formats(v)?;
-                    processor.with_formats(Some(formats));
+                    let formats_vec = parse_formats(v)?;
+                    formats = Formats::new(formats_vec);
                 }
                 RESOLUTION_NAME => {
-                    let s = yaml_string(v, RESOLUTION_NAME)?.as_str().try_into()?;
-                    processor.with_resolution(s);
+                    resolution = yaml_string(v, RESOLUTION_NAME)?.as_str().try_into()?;
                 }
                 IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                 }
-
                 _ => {}
             }
         }
 
-        Ok(processor)
+        let processor_builder = TimestampProcessorBuilder {
+            fields,
+            formats,
+            resolution,
+            ignore_missing,
+        };
+
+        Ok(processor_builder)
     }
 }
 
@@ -297,49 +321,23 @@ impl Processor for TimestampProcessor {
         self.ignore_missing
     }
 
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        self.process_field(val, field)
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
         for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input().index;
             match val.get(index) {
                 Some(Value::Null) | None => {
                     if !self.ignore_missing {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            &field.input().name
                         ));
                     }
                 }
                 Some(v) => {
-                    // TODO(qtang): Let this method use the intermediate state collection directly.
-                    let mut map = self.process_field(v, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let result = self.parse(v)?;
+                    let (_, index) = field.output();
+                    val[*index] = Value::Timestamp(result);
                 }
             }
         }
@@ -351,9 +349,18 @@ impl Processor for TimestampProcessor {
 mod tests {
     use yaml_rust::YamlLoader;
 
-    use super::TimestampProcessor;
+    use super::{TimestampProcessor, TimestampProcessorBuilder};
     use crate::etl::value::{Timestamp, Value};
 
+    fn builder_to_native_processor(builder: TimestampProcessorBuilder) -> TimestampProcessor {
+        TimestampProcessor {
+            fields: vec![],
+            formats: builder.formats,
+            resolution: builder.resolution,
+            ignore_missing: builder.ignore_missing,
+        }
+    }
+
     #[test]
     fn test_parse_epoch() {
         let processor_yaml_str = r#"fields:
@@ -367,7 +374,9 @@ formats:
 "#;
         let yaml = &YamlLoader::load_from_str(processor_yaml_str).unwrap()[0];
         let timestamp_yaml = yaml.as_hash().unwrap();
-        let processor = TimestampProcessor::try_from(timestamp_yaml).unwrap();
+        let processor = builder_to_native_processor(
+            TimestampProcessorBuilder::try_from(timestamp_yaml).unwrap(),
+        );
 
         let values = [
             (
@@ -419,7 +428,9 @@ formats:
 "#;
         let yaml = &YamlLoader::load_from_str(processor_yaml_str).unwrap()[0];
         let timestamp_yaml = yaml.as_hash().unwrap();
-        let processor = TimestampProcessor::try_from(timestamp_yaml).unwrap();
+        let processor = builder_to_native_processor(
+            TimestampProcessorBuilder::try_from(timestamp_yaml).unwrap(),
+        );
 
         let values: Vec<&str> = vec![
             "2014-5-17T12:34:56",
diff --git a/src/pipeline/src/etl/processor/urlencoding.rs b/src/pipeline/src/etl/processor/urlencoding.rs
index 67a9ff9ecc..7db9d092f2 100644
--- a/src/pipeline/src/etl/processor/urlencoding.rs
+++ b/src/pipeline/src/etl/processor/urlencoding.rs
@@ -15,12 +15,12 @@
 use ahash::HashSet;
 use urlencoding::{decode, encode};
 
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{Fields, OneInputOneOutputField};
 use crate::etl::processor::{
-    yaml_bool, yaml_field, yaml_fields, yaml_string, FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME,
-    METHOD_NAME,
+    yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, ProcessorBuilder, ProcessorKind,
+    FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, METHOD_NAME,
 };
-use crate::etl::value::{Map, Value};
+use crate::etl::value::Value;
 
 pub(crate) const PROCESSOR_URL_ENCODING: &str = "urlencoding";
 
@@ -52,54 +52,76 @@ impl std::str::FromStr for Method {
     }
 }
 
-/// only support string value
 #[derive(Debug, Default)]
-pub struct UrlEncodingProcessor {
+pub struct UrlEncodingProcessorBuilder {
     fields: Fields,
     method: Method,
     ignore_missing: bool,
 }
 
+impl ProcessorBuilder for UrlEncodingProcessorBuilder {
+    fn output_keys(&self) -> HashSet<&str> {
+        self.fields
+            .iter()
+            .map(|f| f.target_or_input_field())
+            .collect()
+    }
+
+    fn input_keys(&self) -> HashSet<&str> {
+        self.fields.iter().map(|f| f.input_field()).collect()
+    }
+
+    fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind, String> {
+        self.build(intermediate_keys)
+            .map(ProcessorKind::UrlEncoding)
+    }
+}
+
+impl UrlEncodingProcessorBuilder {
+    fn build(self, intermediate_keys: &[String]) -> Result<UrlEncodingProcessor, String> {
+        let mut real_fields = vec![];
+        for field in self.fields.into_iter() {
+            let input = OneInputOneOutputField::build(
+                "urlencoding",
+                intermediate_keys,
+                field.input_field(),
+                field.target_or_input_field(),
+            )?;
+            real_fields.push(input);
+        }
+        Ok(UrlEncodingProcessor {
+            fields: real_fields,
+            method: self.method,
+            ignore_missing: self.ignore_missing,
+        })
+    }
+}
+
+/// only support string value
+#[derive(Debug, Default)]
+pub struct UrlEncodingProcessor {
+    fields: Vec<OneInputOneOutputField>,
+    method: Method,
+    ignore_missing: bool,
+}
+
 impl UrlEncodingProcessor {
-    fn with_fields(&mut self, mut fields: Fields) {
-        Self::update_output_keys(&mut fields);
-        self.fields = fields;
-    }
-
-    fn with_ignore_missing(&mut self, ignore_missing: bool) {
-        self.ignore_missing = ignore_missing;
-    }
-
-    fn with_method(&mut self, method: Method) {
-        self.method = method;
-    }
-
-    fn process_field(&self, val: &str, field: &Field) -> Result<Map, String> {
+    fn process_field(&self, val: &str) -> Result<Value, String> {
         let processed = match self.method {
             Method::Encode => encode(val).to_string(),
             Method::Decode => decode(val).map_err(|e| e.to_string())?.into_owned(),
         };
-        let val = Value::String(processed);
-
-        let key = field.get_target_field();
-
-        Ok(Map::one(key, val))
-    }
-
-    fn update_output_keys(fields: &mut Fields) {
-        for field in fields.iter_mut() {
-            field
-                .output_fields_index_mapping
-                .insert(field.get_target_field().to_string(), 0_usize);
-        }
+        Ok(Value::String(processed))
     }
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for UrlEncodingProcessor {
+impl TryFrom<&yaml_rust::yaml::Hash> for UrlEncodingProcessorBuilder {
     type Error = String;
 
     fn try_from(value: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut processor = UrlEncodingProcessor::default();
+        let mut fields = Fields::default();
+        let mut method = Method::Decode;
+        let mut ignore_missing = false;
 
         for (k, v) in value.iter() {
             let key = k
@@ -107,24 +129,29 @@ impl TryFrom<&yaml_rust::yaml::Hash> for UrlEncodingProcessor {
                 .ok_or(format!("key must be a string, but got {k:?}"))?;
             match key {
                 FIELD_NAME => {
-                    processor.with_fields(Fields::one(yaml_field(v, FIELD_NAME)?));
+                    fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
                 }
                 FIELDS_NAME => {
-                    processor.with_fields(yaml_fields(v, FIELDS_NAME)?);
+                    fields = yaml_new_fields(v, FIELDS_NAME)?;
                 }
 
                 IGNORE_MISSING_NAME => {
-                    processor.with_ignore_missing(yaml_bool(v, IGNORE_MISSING_NAME)?);
+                    ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
                 }
 
                 METHOD_NAME => {
-                    let method = yaml_string(v, METHOD_NAME)?;
-                    processor.with_method(method.parse()?);
+                    let method_str = yaml_string(v, METHOD_NAME)?;
+                    method = method_str.parse()?;
                 }
 
                 _ => {}
             }
         }
+        let processor = UrlEncodingProcessorBuilder {
+            fields,
+            method,
+            ignore_missing,
+        };
 
         Ok(processor)
     }
@@ -139,52 +166,21 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
         self.ignore_missing
     }
 
-    fn fields(&self) -> &Fields {
-        &self.fields
-    }
-
-    fn fields_mut(&mut self) -> &mut Fields {
-        &mut self.fields
-    }
-
-    fn output_keys(&self) -> HashSet<String> {
-        self.fields
-            .iter()
-            .map(|f| f.get_target_field().to_string())
-            .collect()
-    }
-
-    fn exec_field(&self, val: &Value, field: &Field) -> Result<Map, String> {
-        match val {
-            Value::String(val) => self.process_field(val, field),
-            _ => Err(format!(
-                "{} processor: expect string value, but got {val:?}",
-                self.kind()
-            )),
-        }
-    }
-
     fn exec_mut(&self, val: &mut Vec<Value>) -> Result<(), String> {
         for field in self.fields.iter() {
-            let index = field.input_field.index;
+            let index = field.input_index();
             match val.get(index) {
                 Some(Value::String(s)) => {
-                    let mut map = self.process_field(s, field)?;
-                    field
-                        .output_fields_index_mapping
-                        .iter()
-                        .for_each(|(k, output_index)| {
-                            if let Some(v) = map.remove(k) {
-                                val[*output_index] = v;
-                            }
-                        });
+                    let result = self.process_field(s)?;
+                    let output_index = field.output_index();
+                    val[output_index] = result;
                 }
                 Some(Value::Null) | None => {
                     if !self.ignore_missing {
                         return Err(format!(
                             "{} processor: missing field: {}",
                             self.kind(),
-                            field.get_field_name()
+                            field.output_name()
                         ));
                     }
                 }
@@ -202,29 +198,28 @@ impl crate::etl::processor::Processor for UrlEncodingProcessor {
 
 #[cfg(test)]
 mod tests {
-    use crate::etl::field::{Field, Fields};
+
     use crate::etl::processor::urlencoding::UrlEncodingProcessor;
-    use crate::etl::value::{Map, Value};
+    use crate::etl::value::Value;
 
     #[test]
     fn test_decode_url() {
-        let field = "url";
-        let ff: Field = field.parse().unwrap();
-
         let decoded = "//BC/[a=6.7.8.9,c=g,k=0,l=1]";
         let encoded = "%2F%2FBC%2F%5Ba%3D6.7.8.9%2Cc%3Dg%2Ck%3D0%2Cl%3D1%5D";
 
-        let mut processor = UrlEncodingProcessor::default();
-        processor.with_fields(Fields::one(ff.clone()));
-
         {
-            let result = processor.process_field(encoded, &ff).unwrap();
-            assert_eq!(Map::one(field, Value::String(decoded.into())), result)
+            let processor = UrlEncodingProcessor::default();
+            let result = processor.process_field(encoded).unwrap();
+            assert_eq!(Value::String(decoded.into()), result)
         }
         {
-            processor.with_method(super::Method::Encode);
-            let result = processor.process_field(decoded, &ff).unwrap();
-            assert_eq!(Map::one(field, Value::String(encoded.into())), result)
+            let processor = UrlEncodingProcessor {
+                fields: vec![],
+                method: super::Method::Encode,
+                ignore_missing: false,
+            };
+            let result = processor.process_field(decoded).unwrap();
+            assert_eq!(Value::String(encoded.into()), result)
         }
     }
 }
diff --git a/src/pipeline/src/etl/transform.rs b/src/pipeline/src/etl/transform.rs
index f6becad872..15d1bf3378 100644
--- a/src/pipeline/src/etl/transform.rs
+++ b/src/pipeline/src/etl/transform.rs
@@ -17,8 +17,8 @@ pub mod transformer;
 
 use itertools::Itertools;
 
-use crate::etl::field::Fields;
-use crate::etl::processor::{update_one_one_output_keys, yaml_field, yaml_fields, yaml_string};
+use crate::etl::find_key_index;
+use crate::etl::processor::yaml_string;
 use crate::etl::transform::index::Index;
 use crate::etl::value::Value;
 
@@ -31,6 +31,9 @@ const TRANSFORM_ON_FAILURE: &str = "on_failure";
 
 pub use transformer::greptime::GreptimeTransformer;
 
+use super::field::{Fields, InputFieldInfo, OneInputOneOutputField};
+use super::processor::{yaml_new_field, yaml_new_fields};
+
 pub trait Transformer: std::fmt::Display + Sized + Send + Sync + 'static {
     type Output;
     type VecOutput;
@@ -39,12 +42,11 @@ pub trait Transformer: std::fmt::Display + Sized + Send + Sync + 'static {
     fn schemas(&self) -> &Vec<greptime_proto::v1::ColumnSchema>;
     fn transforms(&self) -> &Transforms;
     fn transforms_mut(&mut self) -> &mut Transforms;
-    fn transform(&self, val: Value) -> Result<Self::Output, String>;
     fn transform_mut(&self, val: &mut Vec<Value>) -> Result<Self::VecOutput, String>;
 }
 
 /// On Failure behavior when transform fails
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone, Default, Copy)]
 pub enum OnFailure {
     // Return None if transform fails
     #[default]
@@ -74,12 +76,18 @@ impl std::fmt::Display for OnFailure {
         }
     }
 }
+#[derive(Debug, Default, Clone)]
+pub struct TransformBuilders {
+    pub(crate) builders: Vec<TransformBuilder>,
+    pub(crate) output_keys: Vec<String>,
+    pub(crate) required_keys: Vec<String>,
+}
 
 #[derive(Debug, Default, Clone)]
 pub struct Transforms {
-    transforms: Vec<Transform>,
-    output_keys: Vec<String>,
-    required_keys: Vec<String>,
+    pub(crate) transforms: Vec<Transform>,
+    pub(crate) output_keys: Vec<String>,
+    pub(crate) required_keys: Vec<String>,
 }
 
 impl Transforms {
@@ -130,7 +138,7 @@ impl std::ops::DerefMut for Transforms {
     }
 }
 
-impl TryFrom<&Vec<yaml_rust::Yaml>> for Transforms {
+impl TryFrom<&Vec<yaml_rust::Yaml>> for TransformBuilders {
     type Error = String;
 
     fn try_from(docs: &Vec<yaml_rust::Yaml>) -> Result<Self, Self::Error> {
@@ -138,41 +146,78 @@ impl TryFrom<&Vec<yaml_rust::Yaml>> for Transforms {
         let mut all_output_keys: Vec<String> = Vec::with_capacity(100);
         let mut all_required_keys = Vec::with_capacity(100);
         for doc in docs {
-            let transform: Transform = doc
+            let transform_builder: TransformBuilder = doc
                 .as_hash()
                 .ok_or("transform element must be a map".to_string())?
                 .try_into()?;
-            let mut transform_output_keys = transform
+            let mut transform_output_keys = transform_builder
                 .fields
                 .iter()
-                .map(|f| f.get_target_field().to_string())
+                .map(|f| f.target_or_input_field().to_string())
                 .collect();
             all_output_keys.append(&mut transform_output_keys);
 
-            let mut transform_required_keys = transform
+            let mut transform_required_keys = transform_builder
                 .fields
                 .iter()
-                .map(|f| f.input_field.name.clone())
+                .map(|f| f.input_field().to_string())
                 .collect();
             all_required_keys.append(&mut transform_required_keys);
 
-            transforms.push(transform);
+            transforms.push(transform_builder);
         }
 
         all_required_keys.sort();
 
-        Ok(Transforms {
-            transforms,
+        Ok(TransformBuilders {
+            builders: transforms,
             output_keys: all_output_keys,
             required_keys: all_required_keys,
         })
     }
 }
 
+#[derive(Debug, Clone)]
+pub struct TransformBuilder {
+    fields: Fields,
+    type_: Value,
+    default: Option<Value>,
+    index: Option<Index>,
+    on_failure: Option<OnFailure>,
+}
+
+impl TransformBuilder {
+    pub fn build(
+        self,
+        intermediate_keys: &[String],
+        output_keys: &[String],
+    ) -> Result<Transform, String> {
+        let mut real_fields = vec![];
+        for field in self.fields {
+            let input_index = find_key_index(intermediate_keys, field.input_field(), "transform")?;
+            let input_field_info = InputFieldInfo::new(field.input_field(), input_index);
+            let output_index =
+                find_key_index(output_keys, field.target_or_input_field(), "transform")?;
+            let input = OneInputOneOutputField::new(
+                input_field_info,
+                (field.target_or_input_field().to_string(), output_index),
+            );
+            real_fields.push(input);
+        }
+        Ok(Transform {
+            real_fields,
+            type_: self.type_,
+            default: self.default,
+            index: self.index,
+            on_failure: self.on_failure,
+        })
+    }
+}
+
 /// only field is required
 #[derive(Debug, Clone)]
 pub struct Transform {
-    pub fields: Fields,
+    pub real_fields: Vec<OneInputOneOutputField>,
 
     pub type_: Value,
 
@@ -192,7 +237,7 @@ impl std::fmt::Display for Transform {
         };
 
         let type_ = format!("type: {}", self.type_);
-        let fields = format!("field(s): {}", self.fields);
+        let fields = format!("field(s): {:?}", self.real_fields);
         let default = if let Some(default) = &self.default {
             format!(", default: {}", default)
         } else {
@@ -212,7 +257,7 @@ impl std::fmt::Display for Transform {
 impl Default for Transform {
     fn default() -> Self {
         Transform {
-            fields: Fields::default(),
+            real_fields: Vec::new(),
             type_: Value::Null,
             default: None,
             index: None,
@@ -222,40 +267,6 @@ impl Default for Transform {
 }
 
 impl Transform {
-    fn with_fields(&mut self, mut fields: Fields) {
-        update_one_one_output_keys(&mut fields);
-        self.fields = fields;
-    }
-
-    fn with_type(&mut self, type_: Value) {
-        self.type_ = type_;
-    }
-
-    fn try_default(&mut self, default: Value) -> Result<(), String> {
-        match (&self.type_, &default) {
-            (Value::Null, _) => Err(format!(
-                "transform {} type MUST BE set before default {}",
-                self.fields, &default,
-            )),
-            (_, Value::Null) => Ok(()), // if default is not set, then it will be regarded as default null
-            (_, _) => {
-                let target = self
-                    .type_
-                    .parse_str_value(default.to_str_value().as_str())?;
-                self.default = Some(target);
-                Ok(())
-            }
-        }
-    }
-
-    fn with_index(&mut self, index: Index) {
-        self.index = Some(index);
-    }
-
-    fn with_on_failure(&mut self, on_failure: OnFailure) {
-        self.on_failure = Some(on_failure);
-    }
-
     pub(crate) fn get_default(&self) -> Option<&Value> {
         self.default.as_ref()
     }
@@ -265,52 +276,74 @@ impl Transform {
     }
 }
 
-impl TryFrom<&yaml_rust::yaml::Hash> for Transform {
+impl TryFrom<&yaml_rust::yaml::Hash> for TransformBuilder {
     type Error = String;
 
     fn try_from(hash: &yaml_rust::yaml::Hash) -> Result<Self, Self::Error> {
-        let mut transform = Transform::default();
-
-        let mut default_opt = None;
+        let mut fields = Fields::default();
+        let mut type_ = Value::Null;
+        let mut default = None;
+        let mut index = None;
+        let mut on_failure = None;
 
         for (k, v) in hash {
             let key = k.as_str().ok_or("key must be a string")?;
             match key {
                 TRANSFORM_FIELD => {
-                    transform.with_fields(Fields::one(yaml_field(v, TRANSFORM_FIELD)?));
+                    fields = Fields::one(yaml_new_field(v, TRANSFORM_FIELD)?);
                 }
 
                 TRANSFORM_FIELDS => {
-                    transform.with_fields(yaml_fields(v, TRANSFORM_FIELDS)?);
+                    fields = yaml_new_fields(v, TRANSFORM_FIELDS)?;
                 }
 
                 TRANSFORM_TYPE => {
                     let t = yaml_string(v, TRANSFORM_TYPE)?;
-                    transform.with_type(Value::parse_str_type(&t)?);
+                    type_ = Value::parse_str_type(&t)?;
                 }
 
                 TRANSFORM_INDEX => {
-                    let index = yaml_string(v, TRANSFORM_INDEX)?;
-                    transform.with_index(index.try_into()?);
+                    let index_str = yaml_string(v, TRANSFORM_INDEX)?;
+                    index = Some(index_str.try_into()?);
                 }
 
                 TRANSFORM_DEFAULT => {
-                    default_opt = Some(Value::try_from(v)?);
+                    default = Some(Value::try_from(v)?);
                 }
 
                 TRANSFORM_ON_FAILURE => {
-                    let on_failure = yaml_string(v, TRANSFORM_ON_FAILURE)?;
-                    transform.with_on_failure(on_failure.parse()?);
+                    let on_failure_str = yaml_string(v, TRANSFORM_ON_FAILURE)?;
+                    on_failure = Some(on_failure_str.parse()?);
                 }
 
                 _ => {}
             }
         }
+        let mut final_default = None;
 
-        if let Some(default) = default_opt {
-            transform.try_default(default)?;
+        if let Some(default_value) = default {
+            match (&type_, &default_value) {
+                (Value::Null, _) => {
+                    return Err(format!(
+                        "transform {:?} type MUST BE set before default {}",
+                        fields, &default_value,
+                    ));
+                }
+                (_, Value::Null) => {} // if default is not set, then it will be regarded as default null
+                (_, _) => {
+                    let target = type_.parse_str_value(default_value.to_str_value().as_str())?;
+                    final_default = Some(target);
+                }
+            }
         }
+        let builder = TransformBuilder {
+            fields,
+            type_,
+            default: final_default,
+            index,
+            on_failure,
+        };
 
-        Ok(transform)
+        Ok(builder)
     }
 }
diff --git a/src/pipeline/src/etl/transform/transformer/greptime.rs b/src/pipeline/src/etl/transform/transformer/greptime.rs
index d9eaec2920..9753b01004 100644
--- a/src/pipeline/src/etl/transform/transformer/greptime.rs
+++ b/src/pipeline/src/etl/transform/transformer/greptime.rs
@@ -20,10 +20,10 @@ use coerce::{coerce_columns, coerce_value};
 use greptime_proto::v1::{ColumnSchema, Row, Rows, Value as GreptimeValue};
 use itertools::Itertools;
 
-use crate::etl::field::{Field, Fields};
+use crate::etl::field::{InputFieldInfo, OneInputOneOutputField};
 use crate::etl::transform::index::Index;
 use crate::etl::transform::{Transform, Transformer, Transforms};
-use crate::etl::value::{Array, Map, Timestamp, Value};
+use crate::etl::value::{Timestamp, Value};
 
 const DEFAULT_GREPTIME_TIMESTAMP_COLUMN: &str = "greptime_timestamp";
 
@@ -36,23 +36,41 @@ pub struct GreptimeTransformer {
 }
 
 impl GreptimeTransformer {
-    fn default_greptime_timestamp_column() -> Transform {
+    /// Add a default timestamp column to the transforms
+    fn add_greptime_timestamp_column(transforms: &mut Transforms) {
         let ns = chrono::Utc::now().timestamp_nanos_opt().unwrap_or(0);
         let type_ = Value::Timestamp(Timestamp::Nanosecond(ns));
         let default = Some(type_.clone());
-        let mut field = Field::new(DEFAULT_GREPTIME_TIMESTAMP_COLUMN);
-        field.insert_output_index(DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string(), 0);
-        let fields = Fields::new(vec![field]).unwrap();
 
-        Transform {
-            fields,
+        let transform = Transform {
+            real_fields: vec![OneInputOneOutputField::new(
+                InputFieldInfo {
+                    name: DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string(),
+                    index: usize::MAX,
+                },
+                (
+                    DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string(),
+                    transforms
+                        .transforms
+                        .iter()
+                        .map(|x| x.real_fields.len())
+                        .sum(),
+                ),
+            )],
             type_,
             default,
             index: Some(Index::Time),
             on_failure: Some(crate::etl::transform::OnFailure::Default),
-        }
+        };
+        let required_keys = transforms.required_keys_mut();
+        required_keys.push(DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string());
+
+        let output_keys = transforms.output_keys_mut();
+        output_keys.push(DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string());
+        transforms.push(transform);
     }
 
+    /// Generate the schema for the GreptimeTransformer
     fn schemas(transforms: &Transforms) -> Result<Vec<ColumnSchema>, String> {
         let mut schema = vec![];
         for transform in transforms.iter() {
@@ -60,53 +78,6 @@ impl GreptimeTransformer {
         }
         Ok(schema)
     }
-
-    fn transform_map(&self, map: &Map) -> Result<Row, String> {
-        let mut values = vec![GreptimeValue { value_data: None }; self.schema.len()];
-        for transform in self.transforms.iter() {
-            for field in transform.fields.iter() {
-                let value_data = match map.get(field.get_field_name()) {
-                    Some(val) => coerce_value(val, transform)?,
-                    None => {
-                        let default = transform.get_default();
-                        match default {
-                            Some(default) => coerce_value(default, transform)?,
-                            None => None,
-                        }
-                    }
-                };
-                if let Some(i) = field
-                    .output_fields_index_mapping
-                    .iter()
-                    .next()
-                    .map(|kv| kv.1)
-                {
-                    values[*i] = GreptimeValue { value_data }
-                } else {
-                    return Err(format!(
-                        "field: {} output_fields is empty.",
-                        field.get_field_name()
-                    ));
-                }
-            }
-        }
-
-        Ok(Row { values })
-    }
-
-    fn transform_array(&self, arr: &Array) -> Result<Vec<Row>, String> {
-        let mut rows = Vec::with_capacity(arr.len());
-        for v in arr.iter() {
-            match v {
-                Value::Map(map) => {
-                    let row = self.transform_map(map)?;
-                    rows.push(row);
-                }
-                _ => return Err(format!("Expected map, found: {v:?}")),
-            }
-        }
-        Ok(rows)
-    }
 }
 
 impl std::fmt::Display for GreptimeTransformer {
@@ -129,9 +100,9 @@ impl Transformer for GreptimeTransformer {
 
         for transform in transforms.iter() {
             let target_fields_set = transform
-                .fields
+                .real_fields
                 .iter()
-                .map(|f| f.get_target_field())
+                .map(|f| f.output_name())
                 .collect::<HashSet<_>>();
 
             let intersections: Vec<_> = column_names_set.intersection(&target_fields_set).collect();
@@ -146,12 +117,15 @@ impl Transformer for GreptimeTransformer {
 
             if let Some(idx) = transform.index {
                 if idx == Index::Time {
-                    match transform.fields.len() {
-                        1 => timestamp_columns.push(transform.fields.first().unwrap().get_field_name()),
-                        _ => return Err(format!(
-                            "Illegal to set multiple timestamp Index columns, please set only one: {}",
-                            transform.fields.get_target_fields().join(", ")
-                        )),
+                    match transform.real_fields.len() {
+                        1 => timestamp_columns
+                            .push(transform.real_fields.first().unwrap().input_name()),
+                        _ => {
+                            return Err(format!(
+                                "Illegal to set multiple timestamp Index columns, please set only one: {}",
+                                transform.real_fields.iter().map(|x|x.input_name()).join(", ")
+                            ))
+                        }
                     }
                 }
             }
@@ -159,13 +133,7 @@ impl Transformer for GreptimeTransformer {
 
         match timestamp_columns.len() {
             0 => {
-                transforms.push(GreptimeTransformer::default_greptime_timestamp_column());
-
-                let required_keys = transforms.required_keys_mut();
-                required_keys.push(DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string());
-
-                let output_keys = transforms.output_keys_mut();
-                output_keys.push(DEFAULT_GREPTIME_TIMESTAMP_COLUMN.to_string());
+                GreptimeTransformer::add_greptime_timestamp_column(&mut transforms);
 
                 let schema = GreptimeTransformer::schemas(&transforms)?;
                 Ok(GreptimeTransformer { transforms, schema })
@@ -184,54 +152,26 @@ impl Transformer for GreptimeTransformer {
         }
     }
 
-    fn transform(&self, value: Value) -> Result<Self::Output, String> {
-        match value {
-            Value::Map(map) => {
-                let rows = vec![self.transform_map(&map)?];
-                Ok(Rows {
-                    schema: self.schema.clone(),
-                    rows,
-                })
-            }
-            Value::Array(arr) => {
-                let rows = self.transform_array(&arr)?;
-                Ok(Rows {
-                    schema: self.schema.clone(),
-                    rows,
-                })
-            }
-            _ => Err(format!("Expected map or array, found: {}", value)),
-        }
-    }
-
     fn transform_mut(&self, val: &mut Vec<Value>) -> Result<Self::VecOutput, String> {
         let mut values = vec![GreptimeValue { value_data: None }; self.schema.len()];
         for transform in self.transforms.iter() {
-            for field in transform.fields.iter() {
-                let index = field.input_field.index;
+            for field in transform.real_fields.iter() {
+                let index = field.input_index();
+                let output_index = field.output_index();
                 match val.get(index) {
                     Some(v) => {
                         let value_data = coerce_value(v, transform)
-                            .map_err(|e| format!("{} processor: {}", field.get_field_name(), e))?;
+                            .map_err(|e| format!("{} processor: {}", field.input_name(), e))?;
                         // every transform fields has only one output field
-                        if let Some(i) = field
-                            .output_fields_index_mapping
-                            .iter()
-                            .next()
-                            .map(|kv| kv.1)
-                        {
-                            values[*i] = GreptimeValue { value_data }
-                        } else {
-                            return Err(format!(
-                                "field: {} output_fields is empty.",
-                                field.get_field_name()
-                            ));
-                        }
+                        values[output_index] = GreptimeValue { value_data };
                     }
-                    _ => {
-                        return Err(format!(
-                            "Get field not in the array field: {field:?}, {val:?}"
-                        ))
+                    None => {
+                        let default = transform.get_default();
+                        let value_data = match default {
+                            Some(default) => coerce_value(default, transform)?,
+                            None => None,
+                        };
+                        values[output_index] = GreptimeValue { value_data };
                     }
                 }
             }
diff --git a/src/pipeline/src/etl/transform/transformer/greptime/coerce.rs b/src/pipeline/src/etl/transform/transformer/greptime/coerce.rs
index 4e83d0b203..8c7efef22f 100644
--- a/src/pipeline/src/etl/transform/transformer/greptime/coerce.rs
+++ b/src/pipeline/src/etl/transform/transformer/greptime/coerce.rs
@@ -66,8 +66,8 @@ impl TryFrom<Value> for ValueData {
 pub(crate) fn coerce_columns(transform: &Transform) -> Result<Vec<ColumnSchema>, String> {
     let mut columns = Vec::new();
 
-    for field in transform.fields.iter() {
-        let column_name = field.get_target_field().to_string();
+    for field in transform.real_fields.iter() {
+        let column_name = field.output_name().to_string();
 
         let datatype = coerce_type(transform)? as i32;
 
@@ -134,7 +134,7 @@ fn coerce_type(transform: &Transform) -> Result<ColumnDataType, String> {
 
         Value::Null => Err(format!(
             "Null type not supported when to coerce '{}' type",
-            transform.fields
+            transform.type_.to_str_type()
         )),
     }
 }
@@ -144,15 +144,18 @@ pub(crate) fn coerce_value(
     transform: &Transform,
 ) -> Result<Option<ValueData>, String> {
     match val {
-        Value::Null => match transform.on_failure {
-            Some(OnFailure::Ignore) => Ok(None),
-            Some(OnFailure::Default) => transform
-                .get_default()
-                .map(|default| coerce_value(default, transform))
-                .unwrap_or_else(|| {
-                    coerce_value(transform.get_type_matched_default_val(), transform)
-                }),
-            None => Ok(None),
+        Value::Null => match &transform.default {
+            Some(default) => coerce_value(default, transform),
+            None => match transform.on_failure {
+                Some(OnFailure::Ignore) => Ok(None),
+                Some(OnFailure::Default) => transform
+                    .get_default()
+                    .map(|default| coerce_value(default, transform))
+                    .unwrap_or_else(|| {
+                        coerce_value(transform.get_type_matched_default_val(), transform)
+                    }),
+                None => Ok(None),
+            },
         },
 
         Value::Int8(n) => coerce_i64_value(*n as i64, transform),
@@ -404,12 +407,11 @@ fn coerce_string_value(s: &String, transform: &Transform) -> Result<Option<Value
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::etl::field::Fields;
 
     #[test]
     fn test_coerce_string_without_on_failure() {
         let transform = Transform {
-            fields: Fields::default(),
+            real_fields: vec![],
             type_: Value::Int32(0),
             default: None,
             index: None,
@@ -434,7 +436,7 @@ mod tests {
     #[test]
     fn test_coerce_string_with_on_failure_ignore() {
         let transform = Transform {
-            fields: Fields::default(),
+            real_fields: vec![],
             type_: Value::Int32(0),
             default: None,
             index: None,
@@ -449,7 +451,7 @@ mod tests {
     #[test]
     fn test_coerce_string_with_on_failure_default() {
         let mut transform = Transform {
-            fields: Fields::default(),
+            real_fields: vec![],
             type_: Value::Int32(0),
             default: None,
             index: None,
diff --git a/src/pipeline/tests/common.rs b/src/pipeline/tests/common.rs
index 7e1a44112e..aa96d14d55 100644
--- a/src/pipeline/tests/common.rs
+++ b/src/pipeline/tests/common.rs
@@ -13,20 +13,45 @@
 // limitations under the License.
 
 use greptime_proto::v1::{ColumnDataType, ColumnSchema, Rows, SemanticType};
-use pipeline::{parse, Content, GreptimeTransformer, Pipeline, Value};
+use pipeline::{parse, Content, GreptimeTransformer, Pipeline};
 
 /// test util function to parse and execute pipeline
 pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows {
-    let input_value: Value = serde_json::from_str::<serde_json::Value>(input_str)
-        .expect("failed to parse into json")
-        .try_into()
-        .expect("failed to convert into value");
+    let input_value = serde_json::from_str::<serde_json::Value>(input_str).unwrap();
 
     let yaml_content = Content::Yaml(pipeline_yaml.into());
     let pipeline: Pipeline<GreptimeTransformer> =
         parse(&yaml_content).expect("failed to parse pipeline");
+    let mut result = pipeline.init_intermediate_state();
 
-    pipeline.exec(input_value).expect("failed to exec pipeline")
+    let schema = pipeline.schemas().clone();
+
+    let mut rows = Vec::new();
+
+    match input_value {
+        serde_json::Value::Array(array) => {
+            for value in array {
+                pipeline.prepare(value, &mut result).unwrap();
+                let row = pipeline
+                    .exec_mut(&mut result)
+                    .expect("failed to exec pipeline");
+                rows.push(row);
+                pipeline.reset_intermediate_state(&mut result);
+            }
+        }
+        serde_json::Value::Object(_) => {
+            pipeline.prepare(input_value, &mut result).unwrap();
+            let row = pipeline
+                .exec_mut(&mut result)
+                .expect("failed to exec pipeline");
+            rows.push(row);
+        }
+        _ => {
+            panic!("invalid input value");
+        }
+    }
+
+    Rows { schema, rows }
 }
 
 /// test util function to create column schema
diff --git a/src/pipeline/tests/dissect.rs b/src/pipeline/tests/dissect.rs
index 10f9e27996..82ce63399c 100644
--- a/src/pipeline/tests/dissect.rs
+++ b/src/pipeline/tests/dissect.rs
@@ -157,7 +157,7 @@ transform:
 fn test_modifier() {
     let empty_str = r#"
 {
-    "str": "key1 key2 key3 key4 key5       key6 key7 key8"
+    "str": "key1 key2 key3 key4 key5       key6"
 }"#;
 
     let pipeline_yaml = r#"
@@ -165,7 +165,7 @@ processors:
   - dissect:
       field: str
       patterns: 
-        - "%{key1} %{key2} %{+key3} %{+key3/2} %{key5->} %{?key6} %{*key_7} %{&key_7}"
+        - "%{key1} %{key2} %{+key3} %{+key3/2} %{key5->} %{?key6}"
 
 transform:
   - fields:
@@ -173,7 +173,6 @@ transform:
         - key2
         - key3
         - key5
-        - key7
     type: string
 "#;
 
@@ -184,7 +183,6 @@ transform:
         make_string_column_schema("key2".to_string()),
         make_string_column_schema("key3".to_string()),
         make_string_column_schema("key5".to_string()),
-        make_string_column_schema("key7".to_string()),
         common::make_column_schema(
             "greptime_timestamp".to_string(),
             ColumnDataType::TimestampNanosecond,
@@ -209,10 +207,6 @@ transform:
         output.rows[0].values[3].value_data,
         Some(StringValue("key5".to_string()))
     );
-    assert_eq!(
-        output.rows[0].values[4].value_data,
-        Some(StringValue("key8".to_string()))
-    );
 }
 
 #[test]
diff --git a/src/pipeline/tests/pipeline.rs b/src/pipeline/tests/pipeline.rs
index af3b5a8c20..d5712eaedd 100644
--- a/src/pipeline/tests/pipeline.rs
+++ b/src/pipeline/tests/pipeline.rs
@@ -12,18 +12,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use api::v1::Rows;
 use common_telemetry::tracing::info;
 use greptime_proto::v1::value::ValueData::{
     BoolValue, F64Value, StringValue, TimestampNanosecondValue, TimestampSecondValue, U32Value,
     U64Value, U8Value,
 };
 use greptime_proto::v1::Value as GreptimeValue;
-use pipeline::{parse, Content, GreptimeTransformer, Pipeline, Value};
+use pipeline::{parse, Content, GreptimeTransformer, Pipeline};
 
 #[test]
 fn test_complex_data() {
     let input_value_str = r#"
-    [
       {
         "version": 1,
         "streamId": "12345",
@@ -73,12 +73,9 @@ fn test_complex_data() {
         "ewExecutionInfo": "c:4380:7:161:162:161:n:::12473:200|C:4380:3:0:4:0:n:::6967:200|R:4380:20:99:99:1:n:::35982:200",
         "customField": "any-custom-value"
       }
-    ]
 "#;
-    let input_value: Value = serde_json::from_str::<serde_json::Value>(input_value_str)
-        .expect("failed to parse input value")
-        .try_into()
-        .expect("failed to convert input value");
+    let input_value = serde_json::from_str::<serde_json::Value>(input_value_str)
+        .expect("failed to parse input value");
 
     let pipeline_yaml = r#"
 ---
@@ -422,7 +419,19 @@ transform:
     let yaml_content = Content::Yaml(pipeline_yaml.into());
     let pipeline: Pipeline<GreptimeTransformer> =
         parse(&yaml_content).expect("failed to parse pipeline");
-    let output = pipeline.exec(input_value).expect("failed to exec pipeline");
+    let mut stats = pipeline.init_intermediate_state();
+    pipeline
+        .prepare(input_value, &mut stats)
+        .expect("failed to prepare pipeline");
+
+    let row = pipeline
+        .exec_mut(&mut stats)
+        .expect("failed to exec pipeline");
+
+    let output = Rows {
+        schema: pipeline.schemas().clone(),
+        rows: vec![row],
+    };
 
     assert_eq!(output.rows.len(), 1);
     let values = output.rows.first().unwrap().values.clone();
@@ -464,10 +473,7 @@ fn test_simple_data() {
     "line": "2024-05-25 20:16:37.217 hello world"
 }
 "#;
-    let input_value: Value = serde_json::from_str::<serde_json::Value>(input_value_str)
-        .unwrap()
-        .try_into()
-        .unwrap();
+    let input_value = serde_json::from_str::<serde_json::Value>(input_value_str).unwrap();
 
     let pipeline_yaml = r#"
 processors:
@@ -493,11 +499,13 @@ transform:
 
     let yaml_content = Content::Yaml(pipeline_yaml.into());
     let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();
-    let output = pipeline.exec(input_value).unwrap();
-    let r = output
-        .rows
+
+    let mut status = pipeline.init_intermediate_state();
+    pipeline.prepare(input_value, &mut status).unwrap();
+    let row = pipeline.exec_mut(&mut status).unwrap();
+    let r = row
+        .values
         .into_iter()
-        .flat_map(|v| v.values)
         .map(|v| v.value_data.unwrap())
         .collect::<Vec<_>>();
 

From e88465840d2e11bf65eb4f3907aaf95ee04c0878 Mon Sep 17 00:00:00 2001
From: jeremyhi <jiachun_feng@proton.me>
Date: Fri, 6 Sep 2024 16:29:20 +0800
Subject: [PATCH 5/8] feat: add extension field to HeartbeatRequest (#4688)

* feat: add extension field to HeartbeatRequest

* chore: extension to extensions

* chore: upgrade proto
---
 Cargo.lock                                       | 2 +-
 Cargo.toml                                       | 2 +-
 src/api/src/region.rs                            | 6 +++---
 src/common/meta/src/ddl/alter_logical_tables.rs  | 2 +-
 src/common/meta/src/ddl/create_logical_tables.rs | 2 +-
 src/datanode/src/heartbeat.rs                    | 4 +++-
 src/datanode/src/region_server.rs                | 8 ++++----
 src/meta-srv/src/handler/failure_handler.rs      | 1 +
 src/meta-srv/src/handler/node_stat.rs            | 5 ++++-
 src/meta-srv/src/handler/region_lease_handler.rs | 1 +
 src/meta-srv/src/procedure/utils.rs              | 2 +-
 src/meta-srv/src/selector/weight_compute.rs      | 3 +++
 src/metric-engine/src/engine.rs                  | 2 +-
 src/operator/src/flow.rs                         | 2 +-
 14 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a433b41841..6391920f47 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4300,7 +4300,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 [[package]]
 name = "greptime-proto"
 version = "0.1.0"
-source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=c437b55725b7f5224fe9d46db21072b4a682ee4b#c437b55725b7f5224fe9d46db21072b4a682ee4b"
+source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=157cfdb52709e489cf1f3ce8e3042ed4ee8a524a#157cfdb52709e489cf1f3ce8e3042ed4ee8a524a"
 dependencies = [
  "prost 0.12.6",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index e4a04c1f47..93ea8db134 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -120,7 +120,7 @@ etcd-client = { version = "0.13" }
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "c437b55725b7f5224fe9d46db21072b4a682ee4b" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "157cfdb52709e489cf1f3ce8e3042ed4ee8a524a" }
 humantime = "2.1"
 humantime-serde = "1.1"
 itertools = "0.10"
diff --git a/src/api/src/region.rs b/src/api/src/region.rs
index 0493378213..d752382534 100644
--- a/src/api/src/region.rs
+++ b/src/api/src/region.rs
@@ -21,14 +21,14 @@ use greptime_proto::v1::region::RegionResponse as RegionResponseV1;
 #[derive(Debug)]
 pub struct RegionResponse {
     pub affected_rows: AffectedRows,
-    pub extension: HashMap<String, Vec<u8>>,
+    pub extensions: HashMap<String, Vec<u8>>,
 }
 
 impl RegionResponse {
     pub fn from_region_response(region_response: RegionResponseV1) -> Self {
         Self {
             affected_rows: region_response.affected_rows as _,
-            extension: region_response.extension,
+            extensions: region_response.extensions,
         }
     }
 
@@ -36,7 +36,7 @@ impl RegionResponse {
     pub fn new(affected_rows: AffectedRows) -> Self {
         Self {
             affected_rows,
-            extension: Default::default(),
+            extensions: Default::default(),
         }
     }
 }
diff --git a/src/common/meta/src/ddl/alter_logical_tables.rs b/src/common/meta/src/ddl/alter_logical_tables.rs
index 48d34b4307..3af359ef6e 100644
--- a/src/common/meta/src/ddl/alter_logical_tables.rs
+++ b/src/common/meta/src/ddl/alter_logical_tables.rs
@@ -131,7 +131,7 @@ impl AlterLogicalTablesProcedure {
         let phy_raw_schemas = future::join_all(alter_region_tasks)
             .await
             .into_iter()
-            .map(|res| res.map(|mut res| res.extension.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
+            .map(|res| res.map(|mut res| res.extensions.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
             .collect::<Result<Vec<_>>>()?;
 
         if phy_raw_schemas.is_empty() {
diff --git a/src/common/meta/src/ddl/create_logical_tables.rs b/src/common/meta/src/ddl/create_logical_tables.rs
index 5095b7c32e..4b867147be 100644
--- a/src/common/meta/src/ddl/create_logical_tables.rs
+++ b/src/common/meta/src/ddl/create_logical_tables.rs
@@ -157,7 +157,7 @@ impl CreateLogicalTablesProcedure {
         let phy_raw_schemas = join_all(create_region_tasks)
             .await
             .into_iter()
-            .map(|res| res.map(|mut res| res.extension.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
+            .map(|res| res.map(|mut res| res.extensions.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
             .collect::<Result<Vec<_>>>()?;
 
         if phy_raw_schemas.is_empty() {
diff --git a/src/datanode/src/heartbeat.rs b/src/datanode/src/heartbeat.rs
index 04e9d9ac5b..68b4637fce 100644
--- a/src/datanode/src/heartbeat.rs
+++ b/src/datanode/src/heartbeat.rs
@@ -324,10 +324,12 @@ impl HeartbeatTask {
                 region_id: stat.region_id.as_u64(),
                 engine: stat.engine,
                 role: RegionRole::from(stat.role).into(),
-                // TODO(jeremy): w/rcus
+                // TODO(weny): w/rcus
                 rcus: 0,
                 wcus: 0,
                 approximate_bytes: region_server.region_disk_usage(stat.region_id).unwrap_or(0),
+                // TODO(weny): add extensions
+                extensions: Default::default(),
             })
             .collect()
     }
diff --git a/src/datanode/src/region_server.rs b/src/datanode/src/region_server.rs
index f6cc479d6a..56068a38c3 100644
--- a/src/datanode/src/region_server.rs
+++ b/src/datanode/src/region_server.rs
@@ -366,10 +366,10 @@ impl RegionServerHandler for RegionServer {
 
         // merge results by sum up affected rows and merge extensions.
         let mut affected_rows = 0;
-        let mut extension = HashMap::new();
+        let mut extensions = HashMap::new();
         for result in results {
             affected_rows += result.affected_rows;
-            extension.extend(result.extension);
+            extensions.extend(result.extensions);
         }
 
         Ok(RegionResponseV1 {
@@ -380,7 +380,7 @@ impl RegionServerHandler for RegionServer {
                 }),
             }),
             affected_rows: affected_rows as _,
-            extension,
+            extensions,
         })
     }
 }
@@ -708,7 +708,7 @@ impl RegionServerInner {
                     .await?;
                 Ok(RegionResponse {
                     affected_rows: result.affected_rows,
-                    extension: result.extension,
+                    extensions: result.extensions,
                 })
             }
             Err(err) => {
diff --git a/src/meta-srv/src/handler/failure_handler.rs b/src/meta-srv/src/handler/failure_handler.rs
index f8acdd75c2..ebeeaf6b7f 100644
--- a/src/meta-srv/src/handler/failure_handler.rs
+++ b/src/meta-srv/src/handler/failure_handler.rs
@@ -93,6 +93,7 @@ mod tests {
                 approximate_bytes: 0,
                 engine: default_engine().to_string(),
                 role: RegionRole::Follower,
+                extensions: Default::default(),
             }
         }
         acc.stat = Some(Stat {
diff --git a/src/meta-srv/src/handler/node_stat.rs b/src/meta-srv/src/handler/node_stat.rs
index b7fe55a0f4..5f1ec1cc2b 100644
--- a/src/meta-srv/src/handler/node_stat.rs
+++ b/src/meta-srv/src/handler/node_stat.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 
 use api::v1::meta::HeartbeatRequest;
 use common_meta::ClusterId;
@@ -57,6 +57,8 @@ pub struct RegionStat {
     pub engine: String,
     /// The region role.
     pub role: RegionRole,
+    /// The extension info of this region
+    pub extensions: HashMap<String, Vec<u8>>,
 }
 
 impl Stat {
@@ -142,6 +144,7 @@ impl TryFrom<api::v1::meta::RegionStat> for RegionStat {
             approximate_bytes: value.approximate_bytes,
             engine: value.engine.to_string(),
             role: RegionRole::from(value.role()),
+            extensions: value.extensions,
         })
     }
 }
diff --git a/src/meta-srv/src/handler/region_lease_handler.rs b/src/meta-srv/src/handler/region_lease_handler.rs
index 2481e86c8f..28ddb436e0 100644
--- a/src/meta-srv/src/handler/region_lease_handler.rs
+++ b/src/meta-srv/src/handler/region_lease_handler.rs
@@ -135,6 +135,7 @@ mod test {
             wcus: 0,
             approximate_bytes: 0,
             engine: String::new(),
+            extensions: Default::default(),
         }
     }
 
diff --git a/src/meta-srv/src/procedure/utils.rs b/src/meta-srv/src/procedure/utils.rs
index 09f0400ba1..c4e1688de0 100644
--- a/src/meta-srv/src/procedure/utils.rs
+++ b/src/meta-srv/src/procedure/utils.rs
@@ -100,7 +100,7 @@ pub mod mock {
                     }),
                 }),
                 affected_rows: 0,
-                extension: Default::default(),
+                extensions: Default::default(),
             })
         }
     }
diff --git a/src/meta-srv/src/selector/weight_compute.rs b/src/meta-srv/src/selector/weight_compute.rs
index a87a1b3b7f..c8c555d204 100644
--- a/src/meta-srv/src/selector/weight_compute.rs
+++ b/src/meta-srv/src/selector/weight_compute.rs
@@ -199,6 +199,7 @@ mod tests {
                 approximate_bytes: 1,
                 engine: "mito2".to_string(),
                 role: RegionRole::Leader,
+                extensions: Default::default(),
             }],
             ..Default::default()
         }
@@ -215,6 +216,7 @@ mod tests {
                 approximate_bytes: 1,
                 engine: "mito2".to_string(),
                 role: RegionRole::Leader,
+                extensions: Default::default(),
             }],
             ..Default::default()
         }
@@ -231,6 +233,7 @@ mod tests {
                 approximate_bytes: 1,
                 engine: "mito2".to_string(),
                 role: RegionRole::Leader,
+                extensions: Default::default(),
             }],
             ..Default::default()
         }
diff --git a/src/metric-engine/src/engine.rs b/src/metric-engine/src/engine.rs
index f4e386a053..08414a97e4 100644
--- a/src/metric-engine/src/engine.rs
+++ b/src/metric-engine/src/engine.rs
@@ -162,7 +162,7 @@ impl RegionEngine for MetricEngine {
 
         result.map_err(BoxedError::new).map(|rows| RegionResponse {
             affected_rows: rows,
-            extension: extension_return_value,
+            extensions: extension_return_value,
         })
     }
 
diff --git a/src/operator/src/flow.rs b/src/operator/src/flow.rs
index d6344e278d..1c82fcf00a 100644
--- a/src/operator/src/flow.rs
+++ b/src/operator/src/flow.rs
@@ -119,7 +119,7 @@ impl FlowServiceOperator {
             if let Some(prev) = &mut final_result {
                 prev.affected_rows = res.affected_rows;
                 prev.affected_flows.extend(res.affected_flows);
-                prev.extension.extend(res.extension);
+                prev.extensions.extend(res.extensions);
             } else {
                 final_result = Some(res);
             }

From 5d9f8a3be74702abfeab7c7afb6eeee1e8f20a09 Mon Sep 17 00:00:00 2001
From: localhost <xpaomian@gmail.com>
Date: Fri, 6 Sep 2024 16:36:49 +0800
Subject: [PATCH 6/8] feat: add test pipeline api (#4667)

* chore: add test pipeline api

* chore: add test for test pipeline api

* chore: fix taplo check

* chore: change pipeline dryrun api path

* chore: add more info for pipeline dryrun api
---
 Cargo.lock                      |   2 +
 src/datatypes/Cargo.toml        |   2 +
 src/datatypes/src/value.rs      | 168 ++++++++++++++++++++++++++++++++
 src/servers/src/http.rs         |   1 +
 src/servers/src/http/event.rs   | 116 +++++++++++++++++++++-
 tests-integration/tests/http.rs | 166 +++++++++++++++++++++++++++++++
 6 files changed, 453 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6391920f47..d483ec7088 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3156,6 +3156,7 @@ dependencies = [
  "arrow",
  "arrow-array",
  "arrow-schema",
+ "base64 0.21.7",
  "common-base",
  "common-decimal",
  "common-error",
@@ -3164,6 +3165,7 @@ dependencies = [
  "common-time",
  "datafusion-common",
  "enum_dispatch",
+ "greptime-proto",
  "num",
  "num-traits",
  "ordered-float 3.9.2",
diff --git a/src/datatypes/Cargo.toml b/src/datatypes/Cargo.toml
index 281057ce80..b10ea682dd 100644
--- a/src/datatypes/Cargo.toml
+++ b/src/datatypes/Cargo.toml
@@ -15,6 +15,7 @@ workspace = true
 arrow.workspace = true
 arrow-array.workspace = true
 arrow-schema.workspace = true
+base64.workspace = true
 common-base.workspace = true
 common-decimal.workspace = true
 common-error.workspace = true
@@ -23,6 +24,7 @@ common-telemetry.workspace = true
 common-time.workspace = true
 datafusion-common.workspace = true
 enum_dispatch = "0.3"
+greptime-proto.workspace = true
 num = "0.4"
 num-traits = "0.2"
 ordered-float = { version = "3.0", features = ["serde"] }
diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs
index 15aa028f4f..6c49154e40 100644
--- a/src/datatypes/src/value.rs
+++ b/src/datatypes/src/value.rs
@@ -18,6 +18,8 @@ use std::sync::Arc;
 
 use arrow::datatypes::{DataType as ArrowDataType, Field};
 use arrow_array::{Array, ListArray};
+use base64::engine::general_purpose::URL_SAFE;
+use base64::Engine as _;
 use common_base::bytes::{Bytes, StringBytes};
 use common_decimal::Decimal128;
 use common_telemetry::error;
@@ -28,8 +30,10 @@ use common_time::time::Time;
 use common_time::timestamp::{TimeUnit, Timestamp};
 use common_time::{Duration, Interval, Timezone};
 use datafusion_common::ScalarValue;
+use greptime_proto::v1::value::ValueData;
 pub use ordered_float::OrderedFloat;
 use serde::{Deserialize, Serialize, Serializer};
+use serde_json::{Number, Value as JsonValue};
 use snafu::{ensure, ResultExt};
 
 use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Error, Result, TryFromValueSnafu};
@@ -1364,15 +1368,179 @@ impl<'a> ValueRef<'a> {
     }
 }
 
+pub fn column_data_to_json(data: ValueData) -> JsonValue {
+    match data {
+        ValueData::BinaryValue(b) => JsonValue::String(URL_SAFE.encode(b)),
+        ValueData::BoolValue(b) => JsonValue::Bool(b),
+        ValueData::U8Value(i) => JsonValue::Number(i.into()),
+        ValueData::U16Value(i) => JsonValue::Number(i.into()),
+        ValueData::U32Value(i) => JsonValue::Number(i.into()),
+        ValueData::U64Value(i) => JsonValue::Number(i.into()),
+        ValueData::I8Value(i) => JsonValue::Number(i.into()),
+        ValueData::I16Value(i) => JsonValue::Number(i.into()),
+        ValueData::I32Value(i) => JsonValue::Number(i.into()),
+        ValueData::I64Value(i) => JsonValue::Number(i.into()),
+        ValueData::F32Value(f) => Number::from_f64(f as f64)
+            .map(JsonValue::Number)
+            .unwrap_or(JsonValue::Null),
+        ValueData::F64Value(f) => Number::from_f64(f)
+            .map(JsonValue::Number)
+            .unwrap_or(JsonValue::Null),
+        ValueData::StringValue(s) => JsonValue::String(s),
+        ValueData::DateValue(d) => JsonValue::String(Date::from(d).to_string()),
+        ValueData::DatetimeValue(d) => JsonValue::String(DateTime::from(d).to_string()),
+        ValueData::TimeSecondValue(d) => JsonValue::String(Time::new_second(d).to_iso8601_string()),
+        ValueData::TimeMillisecondValue(d) => {
+            JsonValue::String(Time::new_millisecond(d).to_iso8601_string())
+        }
+        ValueData::TimeMicrosecondValue(d) => {
+            JsonValue::String(Time::new_microsecond(d).to_iso8601_string())
+        }
+        ValueData::TimeNanosecondValue(d) => {
+            JsonValue::String(Time::new_nanosecond(d).to_iso8601_string())
+        }
+        ValueData::TimestampMicrosecondValue(d) => {
+            JsonValue::String(Timestamp::new_microsecond(d).to_iso8601_string())
+        }
+        ValueData::TimestampMillisecondValue(d) => {
+            JsonValue::String(Timestamp::new_millisecond(d).to_iso8601_string())
+        }
+        ValueData::TimestampNanosecondValue(d) => {
+            JsonValue::String(Timestamp::new_nanosecond(d).to_iso8601_string())
+        }
+        ValueData::TimestampSecondValue(d) => {
+            JsonValue::String(Timestamp::new_second(d).to_iso8601_string())
+        }
+        ValueData::IntervalYearMonthValue(d) => JsonValue::String(format!("interval year [{}]", d)),
+        ValueData::IntervalMonthDayNanoValue(d) => JsonValue::String(format!(
+            "interval month [{}][{}][{}]",
+            d.months, d.days, d.nanoseconds
+        )),
+        ValueData::IntervalDayTimeValue(d) => JsonValue::String(format!("interval day [{}]", d)),
+        ValueData::Decimal128Value(d) => {
+            JsonValue::String(format!("decimal128 [{}][{}]", d.hi, d.lo))
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use arrow::datatypes::DataType as ArrowDataType;
     use common_time::timezone::set_default_timezone;
+    use greptime_proto::v1::{Decimal128 as ProtoDecimal128, IntervalMonthDayNano};
     use num_traits::Float;
 
     use super::*;
     use crate::vectors::ListVectorBuilder;
 
+    #[test]
+    fn test_column_data_to_json() {
+        assert_eq!(
+            column_data_to_json(ValueData::BinaryValue(b"hello".to_vec())),
+            JsonValue::String("aGVsbG8=".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::BoolValue(true)),
+            JsonValue::Bool(true)
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::U8Value(1)),
+            JsonValue::Number(1.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::U16Value(2)),
+            JsonValue::Number(2.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::U32Value(3)),
+            JsonValue::Number(3.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::U64Value(4)),
+            JsonValue::Number(4.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::I8Value(5)),
+            JsonValue::Number(5.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::I16Value(6)),
+            JsonValue::Number(6.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::I32Value(7)),
+            JsonValue::Number(7.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::I64Value(8)),
+            JsonValue::Number(8.into())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::F32Value(9.0)),
+            JsonValue::Number(Number::from_f64(9.0_f64).unwrap())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::F64Value(10.0)),
+            JsonValue::Number(Number::from_f64(10.0_f64).unwrap())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::StringValue("hello".to_string())),
+            JsonValue::String("hello".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::DateValue(123)),
+            JsonValue::String("1970-05-04".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::DatetimeValue(456)),
+            JsonValue::String("1970-01-01 00:00:00.456+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimeSecondValue(789)),
+            JsonValue::String("00:13:09+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimeMillisecondValue(789)),
+            JsonValue::String("00:00:00.789+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimeMicrosecondValue(789)),
+            JsonValue::String("00:00:00.000789+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimestampMillisecondValue(1234567890)),
+            JsonValue::String("1970-01-15 06:56:07.890+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimestampNanosecondValue(1234567890123456789)),
+            JsonValue::String("2009-02-13 23:31:30.123456789+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::TimestampSecondValue(1234567890)),
+            JsonValue::String("2009-02-13 23:31:30+0000".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::IntervalYearMonthValue(12)),
+            JsonValue::String("interval year [12]".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::IntervalMonthDayNanoValue(IntervalMonthDayNano {
+                months: 1,
+                days: 2,
+                nanoseconds: 3,
+            })),
+            JsonValue::String("interval month [1][2][3]".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::IntervalDayTimeValue(4)),
+            JsonValue::String("interval day [4]".to_string())
+        );
+        assert_eq!(
+            column_data_to_json(ValueData::Decimal128Value(ProtoDecimal128 { hi: 5, lo: 6 })),
+            JsonValue::String("decimal128 [5][6]".to_string())
+        );
+    }
+
     #[test]
     fn test_try_from_scalar_value() {
         assert_eq!(
diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs
index ad4ff52225..5ac52157ea 100644
--- a/src/servers/src/http.rs
+++ b/src/servers/src/http.rs
@@ -753,6 +753,7 @@ impl HttpServer {
                 "/pipelines/:pipeline_name",
                 routing::delete(event::delete_pipeline),
             )
+            .route("/pipelines/dryrun", routing::post(event::pipeline_dryrun))
             .layer(
                 ServiceBuilder::new()
                     .layer(HandleErrorLayer::new(handle_error))
diff --git a/src/servers/src/http/event.rs b/src/servers/src/http/event.rs
index fb436142fc..dbd7f1232a 100644
--- a/src/servers/src/http/event.rs
+++ b/src/servers/src/http/event.rs
@@ -23,15 +23,16 @@ use axum::headers::ContentType;
 use axum::http::header::CONTENT_TYPE;
 use axum::http::{Request, StatusCode};
 use axum::response::{IntoResponse, Response};
-use axum::{async_trait, BoxError, Extension, TypedHeader};
+use axum::{async_trait, BoxError, Extension, Json, TypedHeader};
 use common_query::{Output, OutputData};
 use common_telemetry::{error, warn};
+use datatypes::value::column_data_to_json;
 use pipeline::error::PipelineTransformSnafu;
 use pipeline::util::to_pipeline_version;
 use pipeline::PipelineVersion;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
-use serde_json::{Deserializer, Value};
+use serde_json::{Deserializer, Map, Value};
 use session::context::{Channel, QueryContext, QueryContextRef};
 use snafu::{ensure, OptionExt, ResultExt};
 
@@ -230,6 +231,117 @@ fn transform_ndjson_array_factory(
         })
 }
 
+#[axum_macros::debug_handler]
+pub async fn pipeline_dryrun(
+    State(log_state): State<LogState>,
+    Query(query_params): Query<LogIngesterQueryParams>,
+    Extension(mut query_ctx): Extension<QueryContext>,
+    TypedHeader(content_type): TypedHeader<ContentType>,
+    payload: String,
+) -> Result<Response> {
+    let handler = log_state.log_handler;
+    let pipeline_name = query_params.pipeline_name.context(InvalidParameterSnafu {
+        reason: "pipeline_name is required",
+    })?;
+
+    let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?;
+
+    let ignore_errors = query_params.ignore_errors.unwrap_or(false);
+
+    let value = extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?;
+
+    if value.len() > 10 {
+        return Err(InvalidParameterSnafu {
+            reason: "too many rows for dryrun",
+        }
+        .build());
+    }
+
+    query_ctx.set_channel(Channel::Http);
+    let query_ctx = Arc::new(query_ctx);
+
+    let pipeline = handler
+        .get_pipeline(&pipeline_name, version, query_ctx.clone())
+        .await?;
+
+    let mut intermediate_state = pipeline.init_intermediate_state();
+
+    let mut results = Vec::with_capacity(value.len());
+    for v in value {
+        pipeline
+            .prepare(v, &mut intermediate_state)
+            .map_err(|reason| PipelineTransformSnafu { reason }.build())
+            .context(PipelineSnafu)?;
+        let r = pipeline
+            .exec_mut(&mut intermediate_state)
+            .map_err(|reason| PipelineTransformSnafu { reason }.build())
+            .context(PipelineSnafu)?;
+        results.push(r);
+        pipeline.reset_intermediate_state(&mut intermediate_state);
+    }
+
+    let colume_type_key = "colume_type";
+    let data_type_key = "data_type";
+    let name_key = "name";
+
+    let schema = pipeline
+        .schemas()
+        .iter()
+        .map(|cs| {
+            let mut map = Map::new();
+            map.insert(name_key.to_string(), Value::String(cs.column_name.clone()));
+            map.insert(
+                data_type_key.to_string(),
+                Value::String(cs.datatype().as_str_name().to_string()),
+            );
+            map.insert(
+                colume_type_key.to_string(),
+                Value::String(cs.semantic_type().as_str_name().to_string()),
+            );
+            map.insert(
+                "fulltext".to_string(),
+                Value::Bool(
+                    cs.options
+                        .clone()
+                        .is_some_and(|x| x.options.contains_key("fulltext")),
+                ),
+            );
+            Value::Object(map)
+        })
+        .collect::<Vec<_>>();
+    let rows = results
+        .into_iter()
+        .map(|row| {
+            let row = row
+                .values
+                .into_iter()
+                .enumerate()
+                .map(|(idx, v)| {
+                    v.value_data
+                        .map(|d| {
+                            let mut map = Map::new();
+                            map.insert("value".to_string(), column_data_to_json(d));
+                            map.insert("key".to_string(), schema[idx][name_key].clone());
+                            map.insert(
+                                "semantic_type".to_string(),
+                                schema[idx][colume_type_key].clone(),
+                            );
+                            map.insert("data_type".to_string(), schema[idx][data_type_key].clone());
+                            Value::Object(map)
+                        })
+                        .unwrap_or(Value::Null)
+                })
+                .collect();
+            Value::Array(row)
+        })
+        .collect::<Vec<_>>();
+    let mut result = Map::new();
+    result.insert("schema".to_string(), Value::Array(schema));
+    result.insert("rows".to_string(), Value::Array(rows));
+    let result = Value::Object(result);
+    Ok(Json(result).into_response())
+}
+
 #[axum_macros::debug_handler]
 pub async fn log_ingester(
     State(log_state): State<LogState>,
diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs
index 497ea4969c..56307e0427 100644
--- a/tests-integration/tests/http.rs
+++ b/tests-integration/tests/http.rs
@@ -78,6 +78,7 @@ macro_rules! http_tests {
                 test_vm_proto_remote_write,
 
                 test_pipeline_api,
+                test_test_pipeline_api,
                 test_plain_text_ingestion,
             );
         )*
@@ -1146,6 +1147,171 @@ transform:
     guard.remove_all().await;
 }
 
+pub async fn test_test_pipeline_api(store_type: StorageType) {
+    common_telemetry::init_default_ut_logging();
+    let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "test_pipeline_api").await;
+
+    // handshake
+    let client = TestClient::new(app);
+
+    let body = r#"
+processors:
+  - date:
+      field: time
+      formats:
+        - "%Y-%m-%d %H:%M:%S%.3f"
+      ignore_missing: true
+
+transform:
+  - fields:
+      - id1
+      - id2
+    type: int32
+  - fields:
+      - type
+      - log
+      - logger
+    type: string
+  - field: time
+    type: time
+    index: timestamp
+"#;
+
+    // 1. create pipeline
+    let res = client
+        .post("/v1/events/pipelines/test")
+        .header("Content-Type", "application/x-yaml")
+        .body(body)
+        .send()
+        .await;
+
+    assert_eq!(res.status(), StatusCode::OK);
+
+    let content = res.text().await;
+
+    let content = serde_json::from_str(&content);
+    assert!(content.is_ok());
+    //  {"execution_time_ms":13,"pipelines":[{"name":"test","version":"2024-07-04 08:31:00.987136"}]}
+    let content: Value = content.unwrap();
+
+    let execution_time = content.get("execution_time_ms");
+    assert!(execution_time.unwrap().is_number());
+    let pipelines = content.get("pipelines");
+    let pipelines = pipelines.unwrap().as_array().unwrap();
+    assert_eq!(pipelines.len(), 1);
+    let pipeline = pipelines.first().unwrap();
+    assert_eq!(pipeline.get("name").unwrap(), "test");
+
+    // 2. write data
+    let data_body = r#"
+        [
+          {
+            "id1": "2436",
+            "id2": "2528",
+            "logger": "INTERACT.MANAGER",
+            "type": "I",
+            "time": "2024-05-25 20:16:37.217",
+            "log": "ClusterAdapter:enter sendTextDataToCluster\\n"
+          }
+        ]
+        "#;
+    let res = client
+        .post("/v1/events/pipelines/dryrun?pipeline_name=test")
+        .header("Content-Type", "application/json")
+        .body(data_body)
+        .send()
+        .await;
+    assert_eq!(res.status(), StatusCode::OK);
+    let body: serde_json::Value = res.json().await;
+    let schema = &body["schema"];
+    let rows = &body["rows"];
+    assert_eq!(
+        schema,
+        &json!([
+            {
+                "colume_type": "FIELD",
+                "data_type": "INT32",
+                "fulltext": false,
+                "name": "id1"
+            },
+            {
+                "colume_type": "FIELD",
+                "data_type": "INT32",
+                "fulltext": false,
+                "name": "id2"
+            },
+            {
+                "colume_type": "FIELD",
+                "data_type": "STRING",
+                "fulltext": false,
+                "name": "type"
+            },
+            {
+                "colume_type": "FIELD",
+                "data_type": "STRING",
+                "fulltext": false,
+                "name": "log"
+            },
+            {
+                "colume_type": "FIELD",
+                "data_type": "STRING",
+                "fulltext": false,
+                "name": "logger"
+            },
+            {
+                "colume_type": "TIMESTAMP",
+                "data_type": "TIMESTAMP_NANOSECOND",
+                "fulltext": false,
+                "name": "time"
+            }
+        ])
+    );
+    assert_eq!(
+        rows,
+        &json!([
+            [
+                {
+                    "data_type": "INT32",
+                    "key": "id1",
+                    "semantic_type": "FIELD",
+                    "value": 2436
+                },
+                {
+                    "data_type": "INT32",
+                    "key": "id2",
+                    "semantic_type": "FIELD",
+                    "value": 2528
+                },
+                {
+                    "data_type": "STRING",
+                    "key": "type",
+                    "semantic_type": "FIELD",
+                    "value": "I"
+                },
+                {
+                    "data_type": "STRING",
+                    "key": "log",
+                    "semantic_type": "FIELD",
+                    "value": "ClusterAdapter:enter sendTextDataToCluster\\n"
+                },
+                {
+                    "data_type": "STRING",
+                    "key": "logger",
+                    "semantic_type": "FIELD",
+                    "value": "INTERACT.MANAGER"
+                },
+                {
+                    "data_type": "TIMESTAMP_NANOSECOND",
+                    "key": "time",
+                    "semantic_type": "TIMESTAMP",
+                    "value": "2024-05-25 20:16:37.217+0000"
+                }
+            ]
+        ])
+    );
+    guard.remove_all().await;
+}
+
 pub async fn test_plain_text_ingestion(store_type: StorageType) {
     common_telemetry::init_default_ut_logging();
     let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "test_pipeline_api").await;

From d2d62e0c6f42e75f500a85847494faa97032b6e3 Mon Sep 17 00:00:00 2001
From: Ruihang Xia <waynestxia@gmail.com>
Date: Sat, 7 Sep 2024 12:28:11 +0800
Subject: [PATCH 7/8] fix: unconditional statistics (#4694)

* fix: unconditional statistics

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
---
 src/mito2/src/read/scan_region.rs             |  4 ++
 src/mito2/src/read/seq_scan.rs                |  5 ++
 src/mito2/src/read/unordered_scan.rs          |  5 ++
 src/store-api/src/region_engine.rs            |  7 +++
 src/table/src/table/scan.rs                   |  2 +-
 .../standalone/common/aggregate/count.result  | 47 +++++++++++++++++++
 .../standalone/common/aggregate/count.sql     | 24 ++++++++++
 7 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs
index dcf5b4395c..ec45c9b934 100644
--- a/src/mito2/src/read/scan_region.rs
+++ b/src/mito2/src/read/scan_region.rs
@@ -709,6 +709,10 @@ impl ScanInput {
         rows_in_files + rows_in_memtables
     }
 
+    pub(crate) fn predicate(&self) -> Option<Predicate> {
+        self.predicate.clone()
+    }
+
     /// Retrieves [`PartitionRange`] from memtable and files
     pub(crate) fn partition_ranges(&self) -> Vec<PartitionRange> {
         let mut id = 0;
diff --git a/src/mito2/src/read/seq_scan.rs b/src/mito2/src/read/seq_scan.rs
index ec5fcf53d3..ca232df834 100644
--- a/src/mito2/src/read/seq_scan.rs
+++ b/src/mito2/src/read/seq_scan.rs
@@ -515,6 +515,11 @@ impl RegionScanner for SeqScan {
         self.properties.partitions = ranges;
         Ok(())
     }
+
+    fn has_predicate(&self) -> bool {
+        let predicate = self.stream_ctx.input.predicate();
+        predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false)
+    }
 }
 
 impl DisplayAs for SeqScan {
diff --git a/src/mito2/src/read/unordered_scan.rs b/src/mito2/src/read/unordered_scan.rs
index ec43654e09..5dfcc519d6 100644
--- a/src/mito2/src/read/unordered_scan.rs
+++ b/src/mito2/src/read/unordered_scan.rs
@@ -228,6 +228,11 @@ impl RegionScanner for UnorderedScan {
 
         Ok(stream)
     }
+
+    fn has_predicate(&self) -> bool {
+        let predicate = self.stream_ctx.input.predicate();
+        predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false)
+    }
 }
 
 impl DisplayAs for UnorderedScan {
diff --git a/src/store-api/src/region_engine.rs b/src/store-api/src/region_engine.rs
index cf37fe82f9..84555a595b 100644
--- a/src/store-api/src/region_engine.rs
+++ b/src/store-api/src/region_engine.rs
@@ -233,6 +233,9 @@ pub trait RegionScanner: Debug + DisplayAs + Send {
     /// # Panics
     /// Panics if the `partition` is out of bound.
     fn scan_partition(&self, partition: usize) -> Result<SendableRecordBatchStream, BoxedError>;
+
+    /// Check if there is any predicate that may be executed in this scanner.
+    fn has_predicate(&self) -> bool;
 }
 
 pub type RegionScannerRef = Box<dyn RegionScanner>;
@@ -367,6 +370,10 @@ impl RegionScanner for SinglePartitionScanner {
             ))
         })
     }
+
+    fn has_predicate(&self) -> bool {
+        false
+    }
 }
 
 impl DisplayAs for SinglePartitionScanner {
diff --git a/src/table/src/table/scan.rs b/src/table/src/table/scan.rs
index 19283058c6..e67c6dc032 100644
--- a/src/table/src/table/scan.rs
+++ b/src/table/src/table/scan.rs
@@ -180,7 +180,7 @@ impl ExecutionPlan for RegionScanExec {
     }
 
     fn statistics(&self) -> DfResult<Statistics> {
-        let statistics = if self.append_mode {
+        let statistics = if self.append_mode && !self.scanner.lock().unwrap().has_predicate() {
             let column_statistics = self
                 .arrow_schema
                 .fields
diff --git a/tests/cases/standalone/common/aggregate/count.result b/tests/cases/standalone/common/aggregate/count.result
index 4523118d18..f93189d985 100644
--- a/tests/cases/standalone/common/aggregate/count.result
+++ b/tests/cases/standalone/common/aggregate/count.result
@@ -54,3 +54,50 @@ drop table test;
 
 Affected Rows: 0
 
+-- Append table
+create table count_where_bug (
+    tag String,
+    ts TimestampMillisecond time index,
+    num Int64,
+    primary key (tag),
+) engine=mito with('append_mode'='true');
+
+Affected Rows: 0
+
+insert into count_where_bug (tag, ts, num)
+values  ('a', '2024-09-06T06:00:01Z', 1),
+        ('a', '2024-09-06T06:00:02Z', 2),
+        ('a', '2024-09-06T06:00:03Z', 3),
+        ('b', '2024-09-06T06:00:04Z', 4),
+        ('b', '2024-09-06T06:00:05Z', 5);
+
+Affected Rows: 5
+
+select count(1) from count_where_bug where tag = 'b';
+
++-----------------+
+| COUNT(Int64(1)) |
++-----------------+
+| 2               |
++-----------------+
+
+select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z';
+
++-----------------+
+| COUNT(Int64(1)) |
++-----------------+
+| 1               |
++-----------------+
+
+select count(1) from count_where_bug where num != 3;
+
++-----------------+
+| COUNT(Int64(1)) |
++-----------------+
+| 4               |
++-----------------+
+
+drop table count_where_bug;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/aggregate/count.sql b/tests/cases/standalone/common/aggregate/count.sql
index 80100c96ae..22745b723c 100644
--- a/tests/cases/standalone/common/aggregate/count.sql
+++ b/tests/cases/standalone/common/aggregate/count.sql
@@ -17,3 +17,27 @@ select count(*) from (select * from test cross join "HelloWorld");
 drop table "HelloWorld";
 
 drop table test;
+
+-- Append table
+
+create table count_where_bug (
+    tag String,
+    ts TimestampMillisecond time index,
+    num Int64,
+    primary key (tag),
+) engine=mito with('append_mode'='true');
+
+insert into count_where_bug (tag, ts, num)
+values  ('a', '2024-09-06T06:00:01Z', 1),
+        ('a', '2024-09-06T06:00:02Z', 2),
+        ('a', '2024-09-06T06:00:03Z', 3),
+        ('b', '2024-09-06T06:00:04Z', 4),
+        ('b', '2024-09-06T06:00:05Z', 5);
+
+select count(1) from count_where_bug where tag = 'b';
+
+select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z';
+
+select count(1) from count_where_bug where num != 3;
+
+drop table count_where_bug;

From b950e705f5e6d681084c0313cc8e458ff94943db Mon Sep 17 00:00:00 2001
From: Yiran <cuiyiran3@gmail.com>
Date: Sat, 7 Sep 2024 23:27:32 +0800
Subject: [PATCH 8/8] chore: update the document link in README.md (#4690)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1eb0db19ff..cb0519f321 100644
--- a/README.md
+++ b/README.md
@@ -74,7 +74,7 @@ Our core developers have been building time-series data platforms for years. Bas
 
 * **Compatible with InfluxDB, Prometheus and more protocols**
 
-  Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/clients/overview).
+  Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/protocols/overview).
 
 ## Try GreptimeDB